aboutsummaryrefslogtreecommitdiff
path: root/contrib/arm-optimized-routines/pl
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/arm-optimized-routines/pl')
-rw-r--r--contrib/arm-optimized-routines/pl/Dir.mk21
-rw-r--r--contrib/arm-optimized-routines/pl/README.contributors23
-rw-r--r--contrib/arm-optimized-routines/pl/math/Dir.mk216
-rw-r--r--contrib/arm-optimized-routines/pl/math/acos_2u.c100
-rw-r--r--contrib/arm-optimized-routines/pl/math/acosf_1u4.c99
-rw-r--r--contrib/arm-optimized-routines/pl/math/acosh_3u.c66
-rw-r--r--contrib/arm-optimized-routines/pl/math/acoshf_2u8.c63
-rw-r--r--contrib/arm-optimized-routines/pl/math/asin_3u.c106
-rw-r--r--contrib/arm-optimized-routines/pl/math/asin_data.c19
-rw-r--r--contrib/arm-optimized-routines/pl/math/asinf_2u5.c100
-rw-r--r--contrib/arm-optimized-routines/pl/math/asinf_data.c16
-rw-r--r--contrib/arm-optimized-routines/pl/math/asinh_2u5.c85
-rw-r--r--contrib/arm-optimized-routines/pl/math/asinh_data.c22
-rw-r--r--contrib/arm-optimized-routines/pl/math/asinhf_3u5.c76
-rw-r--r--contrib/arm-optimized-routines/pl/math/asinhf_data.c15
-rw-r--r--contrib/arm-optimized-routines/pl/math/atan2_2u5.c159
-rw-r--r--contrib/arm-optimized-routines/pl/math/atan2f_3u.c167
-rw-r--r--contrib/arm-optimized-routines/pl/math/atan_2u5.c73
-rw-r--r--contrib/arm-optimized-routines/pl/math/atan_common.h33
-rw-r--r--contrib/arm-optimized-routines/pl/math/atan_data.c20
-rw-r--r--contrib/arm-optimized-routines/pl/math/atanf_2u9.c72
-rw-r--r--contrib/arm-optimized-routines/pl/math/atanf_common.h38
-rw-r--r--contrib/arm-optimized-routines/pl/math/atanf_data.c15
-rw-r--r--contrib/arm-optimized-routines/pl/math/atanh_3u.c83
-rw-r--r--contrib/arm-optimized-routines/pl/math/atanhf_3u1.c86
-rw-r--r--contrib/arm-optimized-routines/pl/math/cbrt_2u.c69
-rw-r--r--contrib/arm-optimized-routines/pl/math/cbrt_data.c15
-rw-r--r--contrib/arm-optimized-routines/pl/math/cbrtf_1u5.c66
-rw-r--r--contrib/arm-optimized-routines/pl/math/cbrtf_data.c15
-rw-r--r--contrib/arm-optimized-routines/pl/math/cosh_2u.c63
-rw-r--r--contrib/arm-optimized-routines/pl/math/coshf_1u9.c68
-rw-r--r--contrib/arm-optimized-routines/pl/math/cospi_3u1.c89
-rw-r--r--contrib/arm-optimized-routines/pl/math/cospif_2u6.c84
-rw-r--r--contrib/arm-optimized-routines/pl/math/erf_2u5.c102
-rw-r--r--contrib/arm-optimized-routines/pl/math/erf_data.c788
-rw-r--r--contrib/arm-optimized-routines/pl/math/erfc_1u8.c153
-rw-r--r--contrib/arm-optimized-routines/pl/math/erfc_data.c3507
-rw-r--r--contrib/arm-optimized-routines/pl/math/erfcf_1u7.c103
-rw-r--r--contrib/arm-optimized-routines/pl/math/erfcf_data.c664
-rw-r--r--contrib/arm-optimized-routines/pl/math/erff_2u.c82
-rw-r--r--contrib/arm-optimized-routines/pl/math/erff_data.c532
-rw-r--r--contrib/arm-optimized-routines/pl/math/erfinv_24u5.c81
-rw-r--r--contrib/arm-optimized-routines/pl/math/erfinvf_4u7.c74
-rw-r--r--contrib/arm-optimized-routines/pl/math/erfinvl.c114
-rw-r--r--contrib/arm-optimized-routines/pl/math/exp.c163
-rw-r--r--contrib/arm-optimized-routines/pl/math/exp_data.c1120
-rw-r--r--contrib/arm-optimized-routines/pl/math/expf.c76
-rw-r--r--contrib/arm-optimized-routines/pl/math/expf_data.c31
-rw-r--r--contrib/arm-optimized-routines/pl/math/expm1_2u5.c85
-rw-r--r--contrib/arm-optimized-routines/pl/math/expm1_data.c21
-rw-r--r--contrib/arm-optimized-routines/pl/math/expm1f_1u6.c79
-rw-r--r--contrib/arm-optimized-routines/pl/math/expm1f_data.c12
-rw-r--r--contrib/arm-optimized-routines/pl/math/finite_pow.h365
-rw-r--r--contrib/arm-optimized-routines/pl/math/include/mathlib.h206
-rw-r--r--contrib/arm-optimized-routines/pl/math/include/pl_test.h24
-rw-r--r--contrib/arm-optimized-routines/pl/math/log.c161
-rw-r--r--contrib/arm-optimized-routines/pl/math/log10_2u.c150
-rw-r--r--contrib/arm-optimized-routines/pl/math/log10_data.c337
-rw-r--r--contrib/arm-optimized-routines/pl/math/log10f.c97
-rw-r--r--contrib/arm-optimized-routines/pl/math/log1p_2u.c131
-rw-r--r--contrib/arm-optimized-routines/pl/math/log1p_data.c19
-rw-r--r--contrib/arm-optimized-routines/pl/math/log1pf_2u1.c161
-rw-r--r--contrib/arm-optimized-routines/pl/math/log1pf_data.c14
-rw-r--r--contrib/arm-optimized-routines/pl/math/log_data.c511
-rw-r--r--contrib/arm-optimized-routines/pl/math/logf.c75
-rw-r--r--contrib/arm-optimized-routines/pl/math/logf_data.c36
-rw-r--r--contrib/arm-optimized-routines/pl/math/math_config.h624
-rw-r--r--contrib/arm-optimized-routines/pl/math/math_err.c78
-rw-r--r--contrib/arm-optimized-routines/pl/math/math_errf.c78
-rw-r--r--contrib/arm-optimized-routines/pl/math/pl_sig.h59
-rw-r--r--contrib/arm-optimized-routines/pl/math/poly_advsimd_f32.h24
-rw-r--r--contrib/arm-optimized-routines/pl/math/poly_advsimd_f64.h24
-rw-r--r--contrib/arm-optimized-routines/pl/math/poly_generic.h277
-rw-r--r--contrib/arm-optimized-routines/pl/math/poly_scalar_f32.h24
-rw-r--r--contrib/arm-optimized-routines/pl/math/poly_scalar_f64.h24
-rw-r--r--contrib/arm-optimized-routines/pl/math/poly_sve_f32.h26
-rw-r--r--contrib/arm-optimized-routines/pl/math/poly_sve_f64.h26
-rw-r--r--contrib/arm-optimized-routines/pl/math/poly_sve_generic.h301
-rw-r--r--contrib/arm-optimized-routines/pl/math/sinh_3u.c63
-rw-r--r--contrib/arm-optimized-routines/pl/math/sinhf_2u3.c73
-rw-r--r--contrib/arm-optimized-routines/pl/math/sinpi_3u.c90
-rw-r--r--contrib/arm-optimized-routines/pl/math/sinpif_2u5.c83
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_acos_2u.c91
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_acosf_1u4.c84
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_acosh_3u5.c50
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_acoshf_2u8.c47
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_asin_3u.c84
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_asinf_2u5.c76
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_asinh_3u0.c129
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_asinhf_2u5.c55
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_atan2_2u5.c116
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_atan2f_3u.c108
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_atan_2u5.c87
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_atanf_2u9.c76
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_atanh_3u3.c60
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_atanhf_2u8.c56
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_cbrt_2u.c122
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_cbrtf_1u7.c116
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_cexpi_3u5.c45
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_cexpif_1u8.c47
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_cos_2u5.c86
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_cosf_2u1.c80
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_cosh_2u.c100
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_coshf_2u.c56
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_cospi_3u2.c63
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_cospif_2u6.c59
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_erf_2u5.c111
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_erf_data.c1558
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_erfc_1u8.c164
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_erfcf_1u7.c111
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_erff_2u.c90
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_erff_data.c1046
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_exp10_1u5.c122
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_exp10f_1u5.c87
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_exp2_2u.c107
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_exp2f_1u6.c80
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_exp_1u5.c137
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_expf_2u.c86
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_expf_inline.h66
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_expm1_2u5.c95
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_expm1f_1u6.c93
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_expm1f_inline.h73
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_hypot_1u5.c51
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_hypotf_1u5.c45
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_log10_2u5.c75
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_log10f_3u5.c93
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_log1p_2u5.c116
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_log1p_inline.h96
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_log1pf_1u3.c97
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_log1pf_inline.h65
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_log2_3u.c73
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_log2f_2u5.c86
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_log_2u5.c76
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_logf_3u4.c86
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_math.h133
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_pow_1u5.c444
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_powf_2u6.c360
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_powi.c48
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_powif.c48
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sin_3u5.c96
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sincos_3u5.c61
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sincos_common.h85
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sincosf_1u8.c62
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sincosf_common.h81
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sinf_1u9.c93
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sinh_3u.c103
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sinhf_2u3.c64
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sinpi_3u1.c57
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_sinpif_2u5.c53
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_tan_3u5.c99
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_tanf_3u5.c119
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_tanh_3u.c96
-rw-r--r--contrib/arm-optimized-routines/pl/math/sv_tanhf_2u6.c59
-rw-r--r--contrib/arm-optimized-routines/pl/math/tanf_3u3.c193
-rw-r--r--contrib/arm-optimized-routines/pl/math/tanf_data.c45
-rw-r--r--contrib/arm-optimized-routines/pl/math/tanh_3u.c78
-rw-r--r--contrib/arm-optimized-routines/pl/math/tanhf_2u6.c88
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/mathbench_funcs.h87
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/mathbench_wrappers.h206
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/pl_test.h39
-rwxr-xr-xcontrib/arm-optimized-routines/pl/math/test/runulp.sh78
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/acos.tst17
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosf.tst21
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosh.tst19
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/acoshf.tst19
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/asin.tst24
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinf.tst24
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinh.tst18
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinhf.tst18
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan.tst22
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2.tst110
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2f.tst121
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanf.tst22
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanh.tst22
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanhf.tst23
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/cbrtf.tst29
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/cosh.tst15
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/coshf.tst15
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfc.tst23
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfcf.tst14
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/erff.tst17
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1.tst21
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1f.tst57
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10.tst16
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10f.tst69
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1p.tst22
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1pf.tst130
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2.tst21
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2f.tst27
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinh.tst21
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinhf.tst21
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanf.tst25
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanh.tst18
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanhf.tst20
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/random/double.tst6
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/testcases/random/float.tst8
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/ulp_funcs.h70
-rw-r--r--contrib/arm-optimized-routines/pl/math/test/ulp_wrappers.h140
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/asin.sollya29
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/asinf.sollya36
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/asinh.sollya28
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/asinhf.sollya29
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/atan.sollya23
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/atanf.sollya20
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/cbrt.sollya20
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/cbrtf.sollya20
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/erf.sollya25
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/erfc.sollya51
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/erfcf.sollya22
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/erff.sollya20
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/exp10.sollya55
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/expm1.sollya21
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/expm1f.sollya21
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/log10.sollya44
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/log10f.sollya37
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/log1p.sollya30
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/log1pf.sollya21
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/sincos.sollya33
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/sincosf.sollya33
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/sinpi.sollya33
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/tan.sollya20
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/tanf.sollya78
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/v_erf.sollya20
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/v_erfc.sollya46
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/v_log10.sollya38
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/v_log10f.sollya45
-rw-r--r--contrib/arm-optimized-routines/pl/math/tools/v_log2f.sollya38
-rw-r--r--contrib/arm-optimized-routines/pl/math/trigpi_references.c57
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_acos_2u.c122
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_acosf_1u4.c113
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_acosh_3u5.c66
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_acoshf_3u1.c78
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_asin_3u.c113
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_asinf_2u5.c104
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_asinh_3u5.c175
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_asinhf_2u7.c80
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_atan2_3u.c121
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_atan2f_3u.c115
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_atan_2u5.c104
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_atanf_3u.c107
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_atanh_3u5.c66
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_atanhf_3u1.c77
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_cbrt_2u.c116
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_cbrtf_1u7.c116
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_cexpi_3u5.c45
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_cexpif_1u8.c47
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_cosh_2u.c104
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_coshf_2u4.c80
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_cospi_3u1.c86
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_cospif_3u2.c83
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_erf_2u5.c158
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_erfc_1u8.c198
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_erfcf_1u7.c166
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_erff_2u.c118
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_erfinv_25u.c161
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_erfinvf_5u.c163
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_exp10_2u.c144
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_exp10f_2u4.c138
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_exp2_2u.c128
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_exp_data.c55
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_exp_tail.h21
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_exp_tail_data.c98
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_exp_tail_inline.h102
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_expf_inline.h60
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_expm1_2u5.c118
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_expm1f_1u6.c117
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_expm1f_inline.h63
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_hypot_1u5.c95
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_hypotf_1u5.c94
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log10_2u5.c120
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log10_data.c163
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log10f_3u5.c82
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log1p_2u5.c128
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log1p_inline.h91
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log1pf_2u1.c126
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log1pf_inline.h67
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log2_3u.c109
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log2_data.c153
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log2f_2u5.c77
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log_data.c161
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_log_inline.h104
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_logf_inline.h59
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_math.h175
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_pow_1u5.c259
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_pow_exp_data.c289
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_pow_log_data.c174
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_powf_data.c89
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_sincos_3u5.c57
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_sincos_common.h86
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_sincosf_1u8.c58
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_sincosf_common.h84
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_sinh_3u.c118
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_sinhf_2u3.c84
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_sinpi_3u1.c86
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_sinpif_3u.c81
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_tan_3u5.c120
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_tanf_3u5.c127
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_tanh_3u.c106
-rw-r--r--contrib/arm-optimized-routines/pl/math/v_tanhf_2u6.c73
299 files changed, 34142 insertions, 0 deletions
diff --git a/contrib/arm-optimized-routines/pl/Dir.mk b/contrib/arm-optimized-routines/pl/Dir.mk
new file mode 100644
index 000000000000..2d007790d241
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/Dir.mk
@@ -0,0 +1,21 @@
+# Makefile fragment - requires GNU make
+#
+# Copyright (c) 2022, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+# These targets are defined if we prescribe pl in SUBS.
+# It requires PLSUBS to be set.
+
+$(foreach sub,$(PLSUBS),$(eval include $(srcdir)/pl/$(sub)/Dir.mk))
+
+pl-files := $($(PLSUBS:%=pl/%-files))
+
+all-pl: $(PLSUBS:%=all-pl/%)
+
+check-pl: $(PLSUBS:%=check-pl/%)
+
+install-pl: $(PLSUBS:%=install-pl/%)
+
+clean-pl: $(PLSUBS:%=clean-pl/%)
+
+.PHONY: all-pl check-pl install-pl clean-pl
diff --git a/contrib/arm-optimized-routines/pl/README.contributors b/contrib/arm-optimized-routines/pl/README.contributors
new file mode 100644
index 000000000000..3af9b1fc7741
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/README.contributors
@@ -0,0 +1,23 @@
+Code in this sub-directory should follow the GNU Coding Standard, but it is
+not expected to be upstreamed into glibc without modification, so
+glibc-specific conventions need not be followed.
+
+The requirements for portable code apply to non-portable code with the
+following differences:
+
+
+1. Worst-case ULP error should be encoded in filenames (e.g. sin_u35.c). There
+ are no specific restrictions on acceptable ULP error, but if functions
+ provide significantly less accuracy than portable equivalents then a clear
+ justification for inclusion should be stated in comments at the top of the
+ source file. Error bounds of the approximation should be clearly documented
+ in comments.
+
+2. Functions are assumed to support round-to-nearest mode by default, unless
+ stated; other rounding modes are not required to be provided.
+
+3. Handling of special cases may be relaxed for vector functions. Checking
+ whether each vector lane contains special values such as NaN, Inf or
+ denormal numbers can prove too costly for vector functions. This is often
+ not required since vector functions are typically used along with aggressive
+ compiler optimization flags.
diff --git a/contrib/arm-optimized-routines/pl/math/Dir.mk b/contrib/arm-optimized-routines/pl/math/Dir.mk
new file mode 100644
index 000000000000..94b26cf3309c
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/Dir.mk
@@ -0,0 +1,216 @@
+# Makefile fragment - requires GNU make
+#
+# Copyright (c) 2019-2024, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+PLM := $(srcdir)/pl/math
+AOR := $(srcdir)/math
+B := build/pl/math
+
+pl-lib-srcs := $(wildcard $(PLM)/*.[cS])
+
+ifeq ($(WANT_SVE_MATH), 0)
+pl-lib-srcs := $(filter-out $(PLM)/sv_%, $(pl-lib-srcs))
+endif
+
+math-test-srcs := \
+ $(AOR)/test/mathtest.c \
+ $(AOR)/test/mathbench.c \
+ $(AOR)/test/ulp.c \
+
+math-test-host-srcs := $(wildcard $(AOR)/test/rtest/*.[cS])
+
+pl-includes := $(patsubst $(PLM)/%,build/pl/%,$(wildcard $(PLM)/include/*.h))
+pl-test-includes := $(patsubst $(PLM)/%,build/pl/include/%,$(wildcard $(PLM)/test/*.h))
+
+pl-libs := \
+ build/pl/lib/libmathlib.so \
+ build/pl/lib/libmathlib.a \
+
+math-tools := \
+ build/pl/bin/mathtest \
+ build/pl/bin/mathbench \
+ build/pl/bin/mathbench_libc \
+ build/pl/bin/runulp.sh \
+ build/pl/bin/ulp \
+
+math-host-tools := \
+ build/pl/bin/rtest \
+
+pl-lib-objs := $(patsubst $(PLM)/%,$(B)/%.o,$(basename $(pl-lib-srcs)))
+math-test-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-srcs)))
+math-host-objs := $(patsubst $(AOR)/%,$(B)/%.o,$(basename $(math-test-host-srcs)))
+pl-target-objs := $(pl-lib-objs) $(math-test-objs)
+pl-objs := $(pl-target-objs) $(pl-target-objs:%.o=%.os) $(math-host-objs)
+
+pl/math-files := \
+ $(pl-objs) \
+ $(pl-libs) \
+ $(math-tools) \
+ $(math-host-tools) \
+ $(pl-includes) \
+ $(pl-test-includes) \
+
+all-pl/math: $(pl-libs) $(math-tools) $(pl-includes) $(pl-test-includes)
+
+$(pl-objs): $(pl-includes) $(pl-test-includes)
+$(pl-objs): CFLAGS_PL += $(math-cflags)
+$(B)/test/mathtest.o: CFLAGS_PL += -fmath-errno
+$(math-host-objs): CC = $(HOST_CC)
+$(math-host-objs): CFLAGS_PL = $(HOST_CFLAGS)
+
+$(B)/sv_%: CFLAGS_PL += $(math-sve-cflags)
+
+build/pl/include/test/ulp_funcs_gen.h: $(pl-lib-srcs)
+ # Replace PL_SIG
+ cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=_Z##v##t##a(f)" -P > $@
+
+build/pl/include/test/mathbench_funcs_gen.h: $(pl-lib-srcs)
+ # Replace PL_SIG macros with mathbench func entries
+ cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=_Z##v##t##a(f, ##__VA_ARGS__)" -P > $@
+
+build/pl/include/test/ulp_wrappers_gen.h: $(pl-lib-srcs)
+ # Replace PL_SIG macros with ULP wrapper declarations
+ cat $^ | grep PL_SIG | $(CC) -xc - -o - -E "-DPL_SIG(v, t, a, f, ...)=Z##v##N##t##a##_WRAP(f)" -P > $@
+
+$(B)/test/ulp.o: $(AOR)/test/ulp.h build/pl/include/test/ulp_funcs_gen.h build/pl/include/test/ulp_wrappers_gen.h
+$(B)/test/ulp.o: CFLAGS_PL += -I build/pl/include/test
+
+$(B)/test/mathbench.o: build/pl/include/test/mathbench_funcs_gen.h
+$(B)/test/mathbench.o: CFLAGS_PL += -I build/pl/include/test
+
+build/pl/lib/libmathlib.so: $(pl-lib-objs:%.o=%.os)
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -shared -o $@ $^
+
+build/pl/lib/libmathlib.a: $(pl-lib-objs)
+ rm -f $@
+ $(AR) rc $@ $^
+ $(RANLIB) $@
+
+$(math-host-tools): HOST_LDLIBS += -lm -lmpfr -lmpc
+$(math-tools): LDLIBS += $(math-ldlibs) -lm
+# math-sve-cflags should be empty if WANT_SVE_MATH is not enabled
+$(math-tools): CFLAGS_PL += $(math-sve-cflags)
+
+# Some targets to build pl/math/test from math/test sources
+build/pl/math/test/%.o: $(srcdir)/math/test/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/math/test/%.o: $(srcdir)/math/test/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/math/test/%.os: $(srcdir)/math/test/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/math/test/%.os: $(srcdir)/math/test/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+# Some targets to build pl/ sources using appropriate flags
+build/pl/%.o: $(srcdir)/pl/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/%.o: $(srcdir)/pl/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/%.os: $(srcdir)/pl/%.S
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/%.os: $(srcdir)/pl/%.c
+ $(CC) $(CFLAGS_PL) -c -o $@ $<
+
+build/pl/bin/rtest: $(math-host-objs)
+ $(HOST_CC) $(HOST_CFLAGS) $(HOST_LDFLAGS) -o $@ $^ $(HOST_LDLIBS)
+
+build/pl/bin/mathtest: $(B)/test/mathtest.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+build/pl/bin/mathbench: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+# This is not ideal, but allows custom symbols in mathbench to get resolved.
+build/pl/bin/mathbench_libc: $(B)/test/mathbench.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $< $(LDLIBS) -lc build/pl/lib/libmathlib.a -lm
+
+build/pl/bin/ulp: $(B)/test/ulp.o build/pl/lib/libmathlib.a
+ $(CC) $(CFLAGS_PL) $(LDFLAGS) -static -o $@ $^ $(LDLIBS)
+
+build/pl/include/%.h: $(PLM)/include/%.h
+ cp $< $@
+
+build/pl/include/test/%.h: $(PLM)/test/%.h
+ cp $< $@
+
+build/pl/bin/%.sh: $(PLM)/test/%.sh
+ cp $< $@
+
+pl-math-tests := $(wildcard $(PLM)/test/testcases/directed/*.tst)
+pl-math-rtests := $(wildcard $(PLM)/test/testcases/random/*.tst)
+
+check-pl/math-test: $(math-tools)
+ cat $(pl-math-tests) | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
+
+check-pl/math-rtest: $(math-host-tools) $(math-tools)
+ cat $(pl-math-rtests) | build/pl/bin/rtest | $(EMULATOR) build/pl/bin/mathtest $(math-testflags)
+
+ulp-input-dir=$(B)/test/inputs
+
+math-lib-lims = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.ulp,$(basename $(pl-lib-srcs)))
+math-lib-fenvs = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.fenv,$(basename $(pl-lib-srcs)))
+math-lib-itvs = $(patsubst $(PLM)/%,$(ulp-input-dir)/%.itv,$(basename $(pl-lib-srcs)))
+
+ulp-inputs = $(math-lib-lims) $(math-lib-fenvs) $(math-lib-itvs)
+
+$(ulp-inputs): CFLAGS_PL += -I$(PLM) -I$(PLM)/include $(math-cflags)
+
+$(ulp-input-dir)/%.ulp: $(PLM)/%.c
+ mkdir -p $(@D)
+ $(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep -o "PL_TEST_ULP [^ ]* [^ ]*" || true; } > $@
+
+$(ulp-input-dir)/%.fenv: $(PLM)/%.c
+ mkdir -p $(@D)
+ $(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep -o "PL_TEST_EXPECT_FENV_ENABLED [^ ]*" || true; } > $@
+
+$(ulp-input-dir)/%.itv: $(PLM)/%.c
+ mkdir -p $(dir $@)
+ $(CC) -I$(PLM)/test $(CFLAGS_PL) $< -o - -E | { grep "PL_TEST_INTERVAL " || true; } | sed "s/ PL_TEST_INTERVAL/\nPL_TEST_INTERVAL/g" > $@
+
+ulp-lims := $(ulp-input-dir)/limits
+$(ulp-lims): $(math-lib-lims)
+ cat $^ | sed "s/PL_TEST_ULP //g;s/^ *//g" > $@
+
+fenv-exps := $(ulp-input-dir)/fenv
+$(fenv-exps): $(math-lib-fenvs)
+ cat $^ | sed "s/PL_TEST_EXPECT_FENV_ENABLED //g;s/^ *//g" > $@
+
+ulp-itvs := $(ulp-input-dir)/intervals
+$(ulp-itvs): $(math-lib-itvs)
+ cat $^ | sort -u | sed "s/PL_TEST_INTERVAL //g" > $@
+
+check-pl/math-ulp: $(math-tools) $(ulp-lims) $(fenv-exps) $(ulp-itvs)
+ WANT_SVE_MATH=$(WANT_SVE_MATH) \
+ ULPFLAGS="$(math-ulpflags)" \
+ LIMITS=../../../$(ulp-lims) \
+ INTERVALS=../../../$(ulp-itvs) \
+ FENV=../../../$(fenv-exps) \
+ FUNC=$(func) \
+ build/pl/bin/runulp.sh $(EMULATOR)
+
+check-pl/math: check-pl/math-test check-pl/math-rtest check-pl/math-ulp
+
+$(DESTDIR)$(libdir)/pl/%.so: build/pl/lib/%.so
+ $(INSTALL) -D $< $@
+
+$(DESTDIR)$(libdir)/pl/%: build/pl/lib/%
+ $(INSTALL) -m 644 -D $< $@
+
+$(DESTDIR)$(includedir)/pl/%: build/pl/include/%
+ $(INSTALL) -m 644 -D $< $@
+
+install-pl/math: \
+ $(pl-libs:build/pl/lib/%=$(DESTDIR)$(libdir)/pl/%) \
+ $(pl-includes:build/pl/include/%=$(DESTDIR)$(includedir)/pl/%)
+
+clean-pl/math:
+ rm -f $(pl/math-files)
+
+.PHONY: all-pl/math check-pl/math-test check-pl/math-rtest check-pl/math-ulp check-pl/math install-pl/math clean-pl/math
diff --git a/contrib/arm-optimized-routines/pl/math/acos_2u.c b/contrib/arm-optimized-routines/pl/math/acos_2u.c
new file mode 100644
index 000000000000..9ec6894f1d81
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/acos_2u.c
@@ -0,0 +1,100 @@
+/*
+ * Double-precision acos(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "poly_scalar_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask (0x7fffffffffffffff)
+#define Half (0x3fe0000000000000)
+#define One (0x3ff0000000000000)
+#define PiOver2 (0x1.921fb54442d18p+0)
+#define Pi (0x1.921fb54442d18p+1)
+#define Small (0x3c90000000000000) /* 2^-53. */
+#define Small16 (0x3c90)
+#define QNaN (0x7ff8)
+
+/* Fast implementation of double-precision acos(x) based on polynomial
+ approximation of double-precision asin(x).
+
+ For x < Small, approximate acos(x) by pi/2 - x. Small = 2^-53 for correct
+ rounding.
+
+ For |x| in [Small, 0.5], use the trigonometric identity
+
+ acos(x) = pi/2 - asin(x)
+
+ and use an order 11 polynomial P such that the final approximation of asin is
+ an odd polynomial: asin(x) ~ x + x^3 * P(x^2).
+
+ The largest observed error in this region is 1.18 ulps,
+ acos(0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0
+ want 0x1.0d54d1985c069p+0.
+
+ For |x| in [0.5, 1.0], use the following development of acos(x) near x = 1
+
+ acos(x) ~ pi/2 - 2 * sqrt(z) (1 + z * P(z))
+
+ where z = (1-x)/2, z is near 0 when x approaches 1, and P contributes to the
+ approximation of asin near 0.
+
+ The largest observed error in this region is 1.52 ulps,
+ acos(0x1.23d362722f591p-1) got 0x1.edbbedf8a7d6ep-1
+ want 0x1.edbbedf8a7d6cp-1.
+
+ For x in [-1.0, -0.5], use this other identity to deduce the negative inputs
+ from their absolute value: acos(x) = pi - acos(-x). */
+double
+acos (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & AbsMask;
+ uint64_t ia16 = ia >> 48;
+ double ax = asdouble (ia);
+ uint64_t sign = ix & ~AbsMask;
+
+ /* Special values and invalid range. */
+ if (unlikely (ia16 == QNaN))
+ return x;
+ if (ia > One)
+ return __math_invalid (x);
+ if (ia16 < Small16)
+ return PiOver2 - x;
+
+ /* Evaluate polynomial Q(|x|) = z + z * z2 * P(z2) with
+ z2 = x ^ 2 and z = |x| , if |x| < 0.5
+ z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */
+ double z2 = ax < 0.5 ? x * x : fma (-0.5, ax, 0.5);
+ double z = ax < 0.5 ? ax : sqrt (z2);
+
+ /* Use a single polynomial approximation P for both intervals. */
+ double z4 = z2 * z2;
+ double z8 = z4 * z4;
+ double z16 = z8 * z8;
+ double p = estrin_11_f64 (z2, z4, z8, z16, __asin_poly);
+
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = fma (z * z2, p, z);
+
+ /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5
+ = pi - 2 Q(|x|), for -1.0 < x <= -0.5
+ = 2 Q(|x|) , for -0.5 < x < 0.0. */
+ if (ax < 0.5)
+ return PiOver2 - asdouble (asuint64 (p) | sign);
+
+ return (x <= -0.5) ? fma (-2.0, p, Pi) : 2.0 * p;
+}
+
+PL_SIG (S, D, 1, acos, -1.0, 1.0)
+PL_TEST_ULP (acos, 1.02)
+PL_TEST_INTERVAL (acos, 0, Small, 5000)
+PL_TEST_INTERVAL (acos, Small, 0.5, 50000)
+PL_TEST_INTERVAL (acos, 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (acos, 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (acos, 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (acos, -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/acosf_1u4.c b/contrib/arm-optimized-routines/pl/math/acosf_1u4.c
new file mode 100644
index 000000000000..6dde422ef85a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/acosf_1u4.c
@@ -0,0 +1,99 @@
+/*
+ * Single-precision acos(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "poly_scalar_f32.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask (0x7fffffff)
+#define Half (0x3f000000)
+#define One (0x3f800000)
+#define PiOver2f (0x1.921fb6p+0f)
+#define Pif (0x1.921fb6p+1f)
+#define Small (0x32800000) /* 2^-26. */
+#define Small12 (0x328)
+#define QNaN (0x7fc)
+
+/* Fast implementation of single-precision acos(x) based on polynomial
+ approximation of single-precision asin(x).
+
+ For x < Small, approximate acos(x) by pi/2 - x. Small = 2^-26 for correct
+ rounding.
+
+ For |x| in [Small, 0.5], use the trigonometric identity
+
+ acos(x) = pi/2 - asin(x)
+
+ and use an order 4 polynomial P such that the final approximation of asin is
+ an odd polynomial: asin(x) ~ x + x^3 * P(x^2).
+
+ The largest observed error in this region is 1.16 ulps,
+ acosf(0x1.ffbeccp-2) got 0x1.0c27f8p+0 want 0x1.0c27f6p+0.
+
+ For |x| in [0.5, 1.0], use the following development of acos(x) near x = 1
+
+ acos(x) ~ pi/2 - 2 * sqrt(z) (1 + z * P(z))
+
+ where z = (1-x)/2, z is near 0 when x approaches 1, and P contributes to the
+ approximation of asin near 0.
+
+ The largest observed error in this region is 1.32 ulps,
+ acosf(0x1.15ba56p-1) got 0x1.feb33p-1 want 0x1.feb32ep-1.
+
+ For x in [-1.0, -0.5], use this other identity to deduce the negative inputs
+ from their absolute value.
+
+ acos(x) = pi - acos(-x)
+
+ The largest observed error in this region is 1.28 ulps,
+ acosf(-0x1.002072p-1) got 0x1.0c1e84p+1 want 0x1.0c1e82p+1. */
+float
+acosf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 20;
+ float ax = asfloat (ia);
+ uint32_t sign = ix & ~AbsMask;
+
+ /* Special values and invalid range. */
+ if (unlikely (ia12 == QNaN))
+ return x;
+ if (ia > One)
+ return __math_invalidf (x);
+ if (ia12 < Small12)
+ return PiOver2f - x;
+
+ /* Evaluate polynomial Q(|x|) = z + z * z2 * P(z2) with
+ z2 = x ^ 2 and z = |x| , if |x| < 0.5
+ z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */
+ float z2 = ax < 0.5 ? x * x : fmaf (-0.5f, ax, 0.5f);
+ float z = ax < 0.5 ? ax : sqrtf (z2);
+
+ /* Use a single polynomial approximation P for both intervals. */
+ float p = horner_4_f32 (z2, __asinf_poly);
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = fmaf (z * z2, p, z);
+
+ /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5
+ = pi - 2 Q(|x|), for -1.0 < x <= -0.5
+ = 2 Q(|x|) , for -0.5 < x < 0.0. */
+ if (ax < 0.5)
+ return PiOver2f - asfloat (asuint (p) | sign);
+
+ return (x <= -0.5) ? fmaf (-2.0f, p, Pif) : 2.0f * p;
+}
+
+PL_SIG (S, F, 1, acos, -1.0, 1.0)
+PL_TEST_ULP (acosf, 0.82)
+PL_TEST_INTERVAL (acosf, 0, Small, 5000)
+PL_TEST_INTERVAL (acosf, Small, 0.5, 50000)
+PL_TEST_INTERVAL (acosf, 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (acosf, 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (acosf, 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (acosf, -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/acosh_3u.c b/contrib/arm-optimized-routines/pl/math/acosh_3u.c
new file mode 100644
index 000000000000..4e2cb6737ba8
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/acosh_3u.c
@@ -0,0 +1,66 @@
+/*
+ * Double-precision acosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Ln2 (0x1.62e42fefa39efp-1)
+#define MinusZero (0x8000000000000000)
+#define SquareLim (0x5fe0000000000000) /* asuint64(0x1.0p511). */
+#define Two (0x4000000000000000) /* asuint64(2.0). */
+
+double
+optr_aor_log_f64 (double);
+
+double
+log1p (double);
+
+/* acosh approximation using a variety of approaches on different intervals:
+
+ acosh(x) = ln(x + sqrt(x * x - 1)).
+
+ x >= 2^511: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is
+ close enough to x that we can calculate the result by ln(2x) == ln(x) +
+ ln(2). The greatest observed error in this region is 0.98 ULP:
+ acosh(0x1.1b9bf42923d1dp+853) got 0x1.28066a11a7c7fp+9
+ want 0x1.28066a11a7c8p+9.
+
+ x > 2: Calculate the result directly using definition of acosh(x). Greatest
+ observed error in this region is 1.33 ULP:
+ acosh(0x1.1e45d14bfcfa2p+1) got 0x1.71a06f50c34b5p+0
+ want 0x1.71a06f50c34b6p+0.
+
+ 0 <= x <= 2: Calculate the result using log1p. For x < 1, acosh(x) is
+ undefined. For 1 <= x <= 2, the largest observed error is 2.69 ULP:
+ acosh(0x1.073528248093p+0) got 0x1.e4d9bd20684f3p-3
+ want 0x1.e4d9bd20684f6p-3. */
+double
+acosh (double x)
+{
+ uint64_t ix = asuint64 (x);
+
+ if (unlikely (ix >= MinusZero))
+ return __math_invalid (x);
+
+ if (unlikely (ix >= SquareLim))
+ return optr_aor_log_f64 (x) + Ln2;
+
+ if (ix >= Two)
+ return optr_aor_log_f64 (x + sqrt (x * x - 1));
+
+ double xm1 = x - 1;
+ return log1p (xm1 + sqrt (2 * xm1 + xm1 * xm1));
+}
+
+PL_SIG (S, D, 1, acosh, 1.0, 10.0)
+PL_TEST_ULP (acosh, 2.19)
+PL_TEST_INTERVAL (acosh, 0, 1, 10000)
+PL_TEST_INTERVAL (acosh, 1, 2, 100000)
+PL_TEST_INTERVAL (acosh, 2, 0x1p511, 100000)
+PL_TEST_INTERVAL (acosh, 0x1p511, inf, 100000)
+PL_TEST_INTERVAL (acosh, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/acoshf_2u8.c b/contrib/arm-optimized-routines/pl/math/acoshf_2u8.c
new file mode 100644
index 000000000000..c9cded7fd2ff
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/acoshf_2u8.c
@@ -0,0 +1,63 @@
+/*
+ * Single-precision acosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Ln2 (0x1.62e4p-1f)
+#define MinusZero 0x80000000
+#define SquareLim 0x5f800000 /* asuint(0x1p64). */
+#define Two 0x40000000
+
+/* Single-precision log from math/. */
+float
+optr_aor_log_f32 (float);
+
+/* Single-precision log(1+x) from pl/math. */
+float
+log1pf (float);
+
+/* acoshf approximation using a variety of approaches on different intervals:
+
+ x >= 2^64: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is
+ close enough to x that we can calculate the result by ln(2x) == ln(x) +
+ ln(2). The greatest error in the region is 0.94 ULP:
+ acoshf(0x1.15f706p+92) got 0x1.022e14p+6 want 0x1.022e16p+6.
+
+ x > 2: Calculate the result directly using definition of asinh(x) = ln(x +
+ sqrt(x*x - 1)). Greatest error in this region is 1.30 ULP:
+ acoshf(0x1.249d8p+1) got 0x1.77e1aep+0 want 0x1.77e1bp+0.
+
+ 0 <= x <= 2: Calculate the result using log1p. For x < 1, acosh(x) is
+ undefined. For 1 <= x <= 2, the greatest error is 2.78 ULP:
+ acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 want 0x1.ef9ea2p-3. */
+float
+acoshf (float x)
+{
+ uint32_t ix = asuint (x);
+
+ if (unlikely (ix >= MinusZero))
+ return __math_invalidf (x);
+
+ if (unlikely (ix >= SquareLim))
+ return optr_aor_log_f32 (x) + Ln2;
+
+ if (ix > Two)
+ return optr_aor_log_f32 (x + sqrtf (x * x - 1));
+
+ float xm1 = x - 1;
+ return log1pf (xm1 + sqrtf (2 * xm1 + xm1 * xm1));
+}
+
+PL_SIG (S, F, 1, acosh, 1.0, 10.0)
+PL_TEST_ULP (acoshf, 2.30)
+PL_TEST_INTERVAL (acoshf, 0, 1, 100)
+PL_TEST_INTERVAL (acoshf, 1, 2, 10000)
+PL_TEST_INTERVAL (acoshf, 2, 0x1p64, 100000)
+PL_TEST_INTERVAL (acoshf, 0x1p64, inf, 100000)
+PL_TEST_INTERVAL (acoshf, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/asin_3u.c b/contrib/arm-optimized-routines/pl/math/asin_3u.c
new file mode 100644
index 000000000000..0b50995449ce
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/asin_3u.c
@@ -0,0 +1,106 @@
+/*
+ * Double-precision asin(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "poly_scalar_f64.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask (0x7fffffffffffffff)
+#define Half (0x3fe0000000000000)
+#define One (0x3ff0000000000000)
+#define PiOver2 (0x1.921fb54442d18p+0)
+#define Small (0x3e50000000000000) /* 2^-26. */
+#define Small16 (0x3e50)
+#define QNaN (0x7ff8)
+
+/* Fast implementation of double-precision asin(x) based on polynomial
+ approximation.
+
+ For x < Small, approximate asin(x) by x. Small = 2^-26 for correct rounding.
+
+ For x in [Small, 0.5], use an order 11 polynomial P such that the final
+ approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+
+ The largest observed error in this region is 1.01 ulps,
+ asin(0x1.da9735b5a9277p-2) got 0x1.ed78525a927efp-2
+ want 0x1.ed78525a927eep-2.
+
+ No cheap approximation can be obtained near x = 1, since the function is not
+ continuously differentiable on 1.
+
+ For x in [0.5, 1.0], we use a method based on a trigonometric identity
+
+ asin(x) = pi/2 - acos(x)
+
+ and a generalized power series expansion of acos(y) near y=1, that reads as
+
+ acos(y)/sqrt(2y) ~ 1 + 1/12 * y + 3/160 * y^2 + ... (1)
+
+ The Taylor series of asin(z) near z = 0, reads as
+
+ asin(z) ~ z + z^3 P(z^2) = z + z^3 * (1/6 + 3/40 z^2 + ...).
+
+ Therefore, (1) can be written in terms of P(y/2) or even asin(y/2)
+
+ acos(y) ~ sqrt(2y) (1 + y/2 * P(y/2)) = 2 * sqrt(y/2) (1 + y/2 * P(y/2)
+
+ Hence, if we write z = (1-x)/2, z is near 0 when x approaches 1 and
+
+ asin(x) ~ pi/2 - acos(x) ~ pi/2 - 2 * sqrt(z) (1 + z * P(z)).
+
+ The largest observed error in this region is 2.69 ulps,
+ asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
+ want 0x1.110d7e85fdd53p-1. */
+double
+asin (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & AbsMask;
+ uint64_t ia16 = ia >> 48;
+ double ax = asdouble (ia);
+ uint64_t sign = ix & ~AbsMask;
+
+ /* Special values and invalid range. */
+ if (unlikely (ia16 == QNaN))
+ return x;
+ if (ia > One)
+ return __math_invalid (x);
+ if (ia16 < Small16)
+ return x;
+
+ /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+ z2 = x ^ 2 and z = |x| , if |x| < 0.5
+ z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */
+ double z2 = ax < 0.5 ? x * x : fma (-0.5, ax, 0.5);
+ double z = ax < 0.5 ? ax : sqrt (z2);
+
+ /* Use a single polynomial approximation P for both intervals. */
+ double z4 = z2 * z2;
+ double z8 = z4 * z4;
+ double z16 = z8 * z8;
+ double p = estrin_11_f64 (z2, z4, z8, z16, __asin_poly);
+
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = fma (z * z2, p, z);
+
+ /* asin(|x|) = Q(|x|) , for |x| < 0.5
+ = pi/2 - 2 Q(|x|), for |x| >= 0.5. */
+ double y = ax < 0.5 ? p : fma (-2.0, p, PiOver2);
+
+ /* Copy sign. */
+ return asdouble (asuint64 (y) | sign);
+}
+
+PL_SIG (S, D, 1, asin, -1.0, 1.0)
+PL_TEST_ULP (asin, 2.19)
+PL_TEST_INTERVAL (asin, 0, Small, 5000)
+PL_TEST_INTERVAL (asin, Small, 0.5, 50000)
+PL_TEST_INTERVAL (asin, 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (asin, 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (asin, 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (asin, -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/asin_data.c b/contrib/arm-optimized-routines/pl/math/asin_data.c
new file mode 100644
index 000000000000..b5517731c7f4
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/asin_data.c
@@ -0,0 +1,19 @@
+/*
+ * Coefficients for single-precision asin(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Approximate asin(x) directly in [0x1p-106, 0.25]. See tools/asin.sollya
+ for these coeffcients were generated. */
+const double __asin_poly[] = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
+ on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */
+ 0x1.555555555554ep-3, 0x1.3333333337233p-4, 0x1.6db6db67f6d9fp-5,
+ 0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6,
+ 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, 0x1.fd1151acb6bedp-8,
+ 0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6,
+};
diff --git a/contrib/arm-optimized-routines/pl/math/asinf_2u5.c b/contrib/arm-optimized-routines/pl/math/asinf_2u5.c
new file mode 100644
index 000000000000..ec608146ff66
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/asinf_2u5.c
@@ -0,0 +1,100 @@
+/*
+ * Single-precision asin(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "poly_scalar_f32.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask (0x7fffffff)
+#define Half (0x3f000000)
+#define One (0x3f800000)
+#define PiOver2f (0x1.921fb6p+0f)
+#define Small (0x39800000) /* 2^-12. */
+#define Small12 (0x398)
+#define QNaN (0x7fc)
+
+/* Fast implementation of single-precision asin(x) based on polynomial
+ approximation.
+
+ For x < Small, approximate asin(x) by x. Small = 2^-12 for correct rounding.
+
+ For x in [Small, 0.5], use order 4 polynomial P such that the final
+ approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+
+ The largest observed error in this region is 0.83 ulps,
+ asinf(0x1.ea00f4p-2) got 0x1.fef15ep-2 want 0x1.fef15cp-2.
+
+ No cheap approximation can be obtained near x = 1, since the function is not
+ continuously differentiable on 1.
+
+ For x in [0.5, 1.0], we use a method based on a trigonometric identity
+
+ asin(x) = pi/2 - acos(x)
+
+ and a generalized power series expansion of acos(y) near y=1, that reads as
+
+ acos(y)/sqrt(2y) ~ 1 + 1/12 * y + 3/160 * y^2 + ... (1)
+
+ The Taylor series of asin(z) near z = 0, reads as
+
+ asin(z) ~ z + z^3 P(z^2) = z + z^3 * (1/6 + 3/40 z^2 + ...).
+
+ Therefore, (1) can be written in terms of P(y/2) or even asin(y/2)
+
+ acos(y) ~ sqrt(2y) (1 + y/2 * P(y/2)) = 2 * sqrt(y/2) (1 + y/2 * P(y/2)
+
+ Hence, if we write z = (1-x)/2, z is near 0 when x approaches 1 and
+
+ asin(x) ~ pi/2 - acos(x) ~ pi/2 - 2 * sqrt(z) (1 + z * P(z)).
+
+ The largest observed error in this region is 2.41 ulps,
+ asinf(0x1.00203ep-1) got 0x1.0c3a64p-1 want 0x1.0c3a6p-1. */
+float
+asinf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 20;
+ float ax = asfloat (ia);
+ uint32_t sign = ix & ~AbsMask;
+
+ /* Special values and invalid range. */
+ if (unlikely (ia12 == QNaN))
+ return x;
+ if (ia > One)
+ return __math_invalidf (x);
+ if (ia12 < Small12)
+ return x;
+
+ /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+ z2 = x ^ 2 and z = |x| , if |x| < 0.5
+ z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */
+ float z2 = ax < 0.5 ? x * x : fmaf (-0.5f, ax, 0.5f);
+ float z = ax < 0.5 ? ax : sqrtf (z2);
+
+ /* Use a single polynomial approximation P for both intervals. */
+ float p = horner_4_f32 (z2, __asinf_poly);
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = fmaf (z * z2, p, z);
+
+ /* asin(|x|) = Q(|x|) , for |x| < 0.5
+ = pi/2 - 2 Q(|x|), for |x| >= 0.5. */
+ float y = ax < 0.5 ? p : fmaf (-2.0f, p, PiOver2f);
+
+ /* Copy sign. */
+ return asfloat (asuint (y) | sign);
+}
+
+PL_SIG (S, F, 1, asin, -1.0, 1.0)
+PL_TEST_ULP (asinf, 1.91)
+PL_TEST_INTERVAL (asinf, 0, Small, 5000)
+PL_TEST_INTERVAL (asinf, Small, 0.5, 50000)
+PL_TEST_INTERVAL (asinf, 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (asinf, 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (asinf, 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (asinf, -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/asinf_data.c b/contrib/arm-optimized-routines/pl/math/asinf_data.c
new file mode 100644
index 000000000000..1652025e2920
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/asinf_data.c
@@ -0,0 +1,16 @@
+/*
+ * Coefficients for single-precision asin(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Approximate asinf(x) directly in [0x1p-24, 0.25]. See for tools/asinf.sollya
+ for these coeffs were generated. */
+const float __asinf_poly[] = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on
+ [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */
+ 0x1.55555ep-3, 0x1.33261ap-4, 0x1.70d7dcp-5, 0x1.b059dp-6, 0x1.3af7d8p-5,
+};
diff --git a/contrib/arm-optimized-routines/pl/math/asinh_2u5.c b/contrib/arm-optimized-routines/pl/math/asinh_2u5.c
new file mode 100644
index 000000000000..b7fc81a2b94f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/asinh_2u5.c
@@ -0,0 +1,85 @@
+/*
+ * Double-precision asinh(x) function
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "poly_scalar_f64.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define ExpM26 0x3e50000000000000 /* asuint64(0x1.0p-26). */
+#define One 0x3ff0000000000000 /* asuint64(1.0). */
+#define Exp511 0x5fe0000000000000 /* asuint64(0x1.0p511). */
+#define Ln2 0x1.62e42fefa39efp-1
+
+double
+optr_aor_log_f64 (double);
+
+/* Scalar double-precision asinh implementation. This routine uses different
+ approaches on different intervals:
+
+ |x| < 2^-26: Return x. Function is exact in this region.
+
+ |x| < 1: Use custom order-17 polynomial. This is least accurate close to 1.
+ The largest observed error in this region is 1.47 ULPs:
+ asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
+ want 0x1.c1d6bf874019cp-1.
+
+ |x| < 2^511: Upper bound of this region is close to sqrt(DBL_MAX). Calculate
+ the result directly using the definition asinh(x) = ln(x + sqrt(x*x + 1)).
+ The largest observed error in this region is 2.03 ULPs:
+ asinh(-0x1.00094e0f39574p+0) got -0x1.c3508eb6a681ep-1
+ want -0x1.c3508eb6a682p-1.
+
+ |x| >= 2^511: We cannot square x without overflow at a low
+ cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
+ even double x without overflow, so calculate this as ln(x) +
+ ln(2). The largest observed error in this region is 0.98 ULPs at many
+ values, for instance:
+ asinh(0x1.5255a4cf10319p+975) got 0x1.52652f4cb26cbp+9
+ want 0x1.52652f4cb26ccp+9. */
+double
+asinh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & AbsMask;
+ double ax = asdouble (ia);
+ uint64_t sign = ix & ~AbsMask;
+
+ if (ia < ExpM26)
+ {
+ return x;
+ }
+
+ if (ia < One)
+ {
+ double x2 = x * x;
+ double z2 = x2 * x2;
+ double z4 = z2 * z2;
+ double z8 = z4 * z4;
+ double p = estrin_17_f64 (x2, z2, z4, z8, z8 * z8, __asinh_data.poly);
+ double y = fma (p, x2 * ax, ax);
+ return asdouble (asuint64 (y) | sign);
+ }
+
+ if (unlikely (ia >= Exp511))
+ {
+ return asdouble (asuint64 (optr_aor_log_f64 (ax) + Ln2) | sign);
+ }
+
+ return asdouble (asuint64 (optr_aor_log_f64 (ax + sqrt (ax * ax + 1)))
+ | sign);
+}
+
+PL_SIG (S, D, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (asinh, 1.54)
+PL_TEST_INTERVAL (asinh, -0x1p-26, 0x1p-26, 50000)
+PL_TEST_INTERVAL (asinh, 0x1p-26, 1.0, 40000)
+PL_TEST_INTERVAL (asinh, -0x1p-26, -1.0, 10000)
+PL_TEST_INTERVAL (asinh, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (asinh, -1.0, -100.0, 10000)
+PL_TEST_INTERVAL (asinh, 100.0, inf, 50000)
+PL_TEST_INTERVAL (asinh, -100.0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/asinh_data.c b/contrib/arm-optimized-routines/pl/math/asinh_data.c
new file mode 100644
index 000000000000..073b19799bda
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/asinh_data.c
@@ -0,0 +1,22 @@
+/*
+ * Double-precision polynomial coefficients for scalar asinh(x)
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* asinh(x) is odd, and the first term of the Taylor expansion is x, so we can
+ approximate the function by x + x^3 * P(x^2), where P(z) has the form:
+ C0 + C1 * z + C2 * z^2 + C3 * z^3 + ...
+ Note P is evaluated on even powers of x only. See tools/asinh.sollya for the
+ algorithm used to generate these coefficients. */
+const struct asinh_data __asinh_data
+ = {.poly
+ = {-0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5,
+ 0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6,
+ -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7,
+ 0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8,
+ -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11,
+ 0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 0x1.93d4ba83d34dap-18}};
diff --git a/contrib/arm-optimized-routines/pl/math/asinhf_3u5.c b/contrib/arm-optimized-routines/pl/math/asinhf_3u5.c
new file mode 100644
index 000000000000..ec26b80ec2ec
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/asinhf_3u5.c
@@ -0,0 +1,76 @@
+/*
+ * Single-precision asinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "poly_scalar_f32.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask (0x7fffffff)
+#define SqrtFltMax (0x1.749e96p+10f)
+#define Ln2 (0x1.62e4p-1f)
+#define One (0x3f8)
+#define ExpM12 (0x398)
+
+float
+optr_aor_log_f32 (float);
+
+/* asinhf approximation using a variety of approaches on different intervals:
+
+ |x| < 2^-12: Return x. Function is exactly rounded in this region.
+
+ |x| < 1.0: Use custom order-8 polynomial. The largest observed
+ error in this region is 1.3ulps:
+ asinhf(0x1.f0f74cp-1) got 0x1.b88de4p-1 want 0x1.b88de2p-1.
+
+ |x| <= SqrtFltMax: Calculate the result directly using the
+ definition of asinh(x) = ln(x + sqrt(x*x + 1)). The largest
+ observed error in this region is 1.99ulps.
+ asinhf(0x1.00e358p+0) got 0x1.c4849ep-1 want 0x1.c484a2p-1.
+
+ |x| > SqrtFltMax: We cannot square x without overflow at a low
+ cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
+ even double x without overflow, so calculate this as ln(x) +
+ ln(2). This largest observed error in this region is 3.39ulps.
+ asinhf(0x1.749e9ep+10) got 0x1.fffff8p+2 want 0x1.fffffep+2. */
+float
+asinhf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 20;
+ float ax = asfloat (ia);
+ uint32_t sign = ix & ~AbsMask;
+
+ if (unlikely (ia12 < ExpM12 || ia == 0x7f800000))
+ return x;
+
+ if (unlikely (ia12 >= 0x7f8))
+ return __math_invalidf (x);
+
+ if (ia12 < One)
+ {
+ float x2 = ax * ax;
+ float p = estrin_7_f32 (ax, x2, x2 * x2, __asinhf_data.coeffs);
+ float y = fmaf (x2, p, ax);
+ return asfloat (asuint (y) | sign);
+ }
+
+ if (unlikely (ax > SqrtFltMax))
+ {
+ return asfloat (asuint (optr_aor_log_f32 (ax) + Ln2) | sign);
+ }
+
+ return asfloat (asuint (optr_aor_log_f32 (ax + sqrtf (ax * ax + 1))) | sign);
+}
+
+PL_SIG (S, F, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (asinhf, 2.9)
+PL_TEST_INTERVAL (asinhf, 0, 0x1p-12, 5000)
+PL_TEST_INTERVAL (asinhf, 0x1p-12, 1.0, 50000)
+PL_TEST_INTERVAL (asinhf, 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (asinhf, 0x1p11, 0x1p127, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/asinhf_data.c b/contrib/arm-optimized-routines/pl/math/asinhf_data.c
new file mode 100644
index 000000000000..cd1ef16b3b6a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/asinhf_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients for single-precision asinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya for
+ these coeffs were generated. */
+const struct asinhf_data __asinhf_data
+ = {.coeffs
+ = {-0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f, 0x1.3a81dcp-4f,
+ 0x1.65bbaap-10f, -0x1.057f1p-4f, 0x1.6c1d46p-5f, -0x1.4cafe8p-7f}};
diff --git a/contrib/arm-optimized-routines/pl/math/atan2_2u5.c b/contrib/arm-optimized-routines/pl/math/atan2_2u5.c
new file mode 100644
index 000000000000..c909ac99fa22
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atan2_2u5.c
@@ -0,0 +1,159 @@
+/*
+ * Double-precision scalar atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <stdbool.h>
+
+#include "atan_common.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Pi (0x1.921fb54442d18p+1)
+#define PiOver2 (0x1.921fb54442d18p+0)
+#define PiOver4 (0x1.921fb54442d18p-1)
+#define SignMask (0x8000000000000000)
+#define ExpMask (0x7ff0000000000000)
+
+/* We calculate atan2 by P(n/d), where n and d are similar to the input
+ arguments, and P is a polynomial. Evaluating P(x) requires calculating x^8,
+ which may underflow if n and d have very different magnitude.
+ POW8_EXP_UFLOW_BOUND is the lower bound of the difference in exponents of n
+ and d for which P underflows, and is used to special-case such inputs. */
+#define POW8_EXP_UFLOW_BOUND 62
+
+static inline int64_t
+biased_exponent (double f)
+{
+ uint64_t fi = asuint64 (f);
+ return (fi & ExpMask) >> 52;
+}
+
+/* Fast implementation of scalar atan2. Largest errors are when y and x are
+ close together. The greatest observed error is 2.28 ULP:
+ atan2(-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
+ got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */
+double
+atan2 (double y, double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iy = asuint64 (y);
+
+ uint64_t sign_x = ix & SignMask;
+ uint64_t sign_y = iy & SignMask;
+
+ uint64_t iax = ix & ~SignMask;
+ uint64_t iay = iy & ~SignMask;
+
+ bool xisnan = isnan (x);
+ if (unlikely (isnan (y) && !xisnan))
+ return __math_invalid (y);
+ if (unlikely (xisnan))
+ return __math_invalid (x);
+
+ /* m = 2 * sign(x) + sign(y). */
+ uint32_t m = ((iy >> 63) & 1) | ((ix >> 62) & 2);
+
+ int64_t exp_diff = biased_exponent (x) - biased_exponent (y);
+
+ /* y = 0. */
+ if (iay == 0)
+ {
+ switch (m)
+ {
+ case 0:
+ case 1:
+ return y; /* atan(+-0,+anything)=+-0. */
+ case 2:
+ return Pi; /* atan(+0,-anything) = pi. */
+ case 3:
+ return -Pi; /* atan(-0,-anything) =-pi. */
+ }
+ }
+ /* Special case for (x, y) either on or very close to the y axis. Either x =
+ 0, or y is much larger than x (difference in exponents >=
+ POW8_EXP_UFLOW_BOUND). */
+ if (unlikely (iax == 0 || exp_diff <= -POW8_EXP_UFLOW_BOUND))
+ return sign_y ? -PiOver2 : PiOver2;
+
+ /* Special case for either x is INF or (x, y) is very close to x axis and x is
+ negative. */
+ if (unlikely (iax == 0x7ff0000000000000
+ || (exp_diff >= POW8_EXP_UFLOW_BOUND && m >= 2)))
+ {
+ if (iay == 0x7ff0000000000000)
+ {
+ switch (m)
+ {
+ case 0:
+ return PiOver4; /* atan(+INF,+INF). */
+ case 1:
+ return -PiOver4; /* atan(-INF,+INF). */
+ case 2:
+ return 3.0 * PiOver4; /* atan(+INF,-INF). */
+ case 3:
+ return -3.0 * PiOver4; /* atan(-INF,-INF). */
+ }
+ }
+ else
+ {
+ switch (m)
+ {
+ case 0:
+ return 0.0; /* atan(+...,+INF). */
+ case 1:
+ return -0.0; /* atan(-...,+INF). */
+ case 2:
+ return Pi; /* atan(+...,-INF). */
+ case 3:
+ return -Pi; /* atan(-...,-INF). */
+ }
+ }
+ }
+ /* y is INF. */
+ if (iay == 0x7ff0000000000000)
+ return sign_y ? -PiOver2 : PiOver2;
+
+ uint64_t sign_xy = sign_x ^ sign_y;
+
+ double ax = asdouble (iax);
+ double ay = asdouble (iay);
+ uint64_t pred_aygtax = (ay > ax);
+
+ /* Set up z for call to atan. */
+ double n = pred_aygtax ? -ax : ay;
+ double d = pred_aygtax ? ay : ax;
+ double z = n / d;
+
+ double ret;
+ if (unlikely (m < 2 && exp_diff >= POW8_EXP_UFLOW_BOUND))
+ {
+ /* If (x, y) is very close to x axis and x is positive, the polynomial
+ will underflow and evaluate to z. */
+ ret = z;
+ }
+ else
+ {
+ /* Work out the correct shift. */
+ double shift = sign_x ? -2.0 : 0.0;
+ shift = pred_aygtax ? shift + 1.0 : shift;
+ shift *= PiOver2;
+
+ ret = eval_poly (z, z, shift);
+ }
+
+ /* Account for the sign of x and y. */
+ return asdouble (asuint64 (ret) ^ sign_xy);
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (S, D, 2, atan2)
+PL_TEST_ULP (atan2, 1.78)
+PL_TEST_INTERVAL (atan2, -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (atan2, -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (atan2, 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (atan2, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (atan2, 1e6, 1e32, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/atan2f_3u.c b/contrib/arm-optimized-routines/pl/math/atan2f_3u.c
new file mode 100644
index 000000000000..38e1df59c102
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atan2f_3u.c
@@ -0,0 +1,167 @@
+/*
+ * Single-precision scalar atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <stdbool.h>
+
+#include "atanf_common.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Pi (0x1.921fb6p+1f)
+#define PiOver2 (0x1.921fb6p+0f)
+#define PiOver4 (0x1.921fb6p-1f)
+#define SignMask (0x80000000)
+
+/* We calculate atan2f by P(n/d), where n and d are similar to the input
+ arguments, and P is a polynomial. The polynomial may underflow.
+ POLY_UFLOW_BOUND is the lower bound of the difference in exponents of n and d
+ for which P underflows, and is used to special-case such inputs. */
+#define POLY_UFLOW_BOUND 24
+
+static inline int32_t
+biased_exponent (float f)
+{
+ uint32_t fi = asuint (f);
+ int32_t ex = (int32_t) ((fi & 0x7f800000) >> 23);
+ if (unlikely (ex == 0))
+ {
+ /* Subnormal case - we still need to get the exponent right for subnormal
+ numbers as division may take us back inside the normal range. */
+ return ex - __builtin_clz (fi << 9);
+ }
+ return ex;
+}
+
+/* Fast implementation of scalar atan2f. Largest observed error is
+ 2.88ulps in [99.0, 101.0] x [99.0, 101.0]:
+ atan2f(0x1.9332d8p+6, 0x1.8cb6c4p+6) got 0x1.964646p-1
+ want 0x1.964640p-1. */
+float
+atan2f (float y, float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iy = asuint (y);
+
+ uint32_t sign_x = ix & SignMask;
+ uint32_t sign_y = iy & SignMask;
+
+ uint32_t iax = ix & ~SignMask;
+ uint32_t iay = iy & ~SignMask;
+
+ /* x or y is NaN. */
+ if ((iax > 0x7f800000) || (iay > 0x7f800000))
+ return x + y;
+
+ /* m = 2 * sign(x) + sign(y). */
+ uint32_t m = ((iy >> 31) & 1) | ((ix >> 30) & 2);
+
+ /* The following follows glibc ieee754 implementation, except
+ that we do not use +-tiny shifts (non-nearest rounding mode). */
+
+ int32_t exp_diff = biased_exponent (x) - biased_exponent (y);
+
+ /* Special case for (x, y) either on or very close to the x axis. Either y =
+ 0, or y is tiny and x is huge (difference in exponents >=
+ POLY_UFLOW_BOUND). In the second case, we only want to use this special
+ case when x is negative (i.e. quadrants 2 or 3). */
+ if (unlikely (iay == 0 || (exp_diff >= POLY_UFLOW_BOUND && m >= 2)))
+ {
+ switch (m)
+ {
+ case 0:
+ case 1:
+ return y; /* atan(+-0,+anything)=+-0. */
+ case 2:
+ return Pi; /* atan(+0,-anything) = pi. */
+ case 3:
+ return -Pi; /* atan(-0,-anything) =-pi. */
+ }
+ }
+ /* Special case for (x, y) either on or very close to the y axis. Either x =
+ 0, or x is tiny and y is huge (difference in exponents >=
+ POLY_UFLOW_BOUND). */
+ if (unlikely (iax == 0 || exp_diff <= -POLY_UFLOW_BOUND))
+ return sign_y ? -PiOver2 : PiOver2;
+
+ /* x is INF. */
+ if (iax == 0x7f800000)
+ {
+ if (iay == 0x7f800000)
+ {
+ switch (m)
+ {
+ case 0:
+ return PiOver4; /* atan(+INF,+INF). */
+ case 1:
+ return -PiOver4; /* atan(-INF,+INF). */
+ case 2:
+ return 3.0f * PiOver4; /* atan(+INF,-INF). */
+ case 3:
+ return -3.0f * PiOver4; /* atan(-INF,-INF). */
+ }
+ }
+ else
+ {
+ switch (m)
+ {
+ case 0:
+ return 0.0f; /* atan(+...,+INF). */
+ case 1:
+ return -0.0f; /* atan(-...,+INF). */
+ case 2:
+ return Pi; /* atan(+...,-INF). */
+ case 3:
+ return -Pi; /* atan(-...,-INF). */
+ }
+ }
+ }
+ /* y is INF. */
+ if (iay == 0x7f800000)
+ return sign_y ? -PiOver2 : PiOver2;
+
+ uint32_t sign_xy = sign_x ^ sign_y;
+
+ float ax = asfloat (iax);
+ float ay = asfloat (iay);
+
+ bool pred_aygtax = (ay > ax);
+
+ /* Set up z for call to atanf. */
+ float n = pred_aygtax ? -ax : ay;
+ float d = pred_aygtax ? ay : ax;
+ float z = n / d;
+
+ float ret;
+ if (unlikely (m < 2 && exp_diff >= POLY_UFLOW_BOUND))
+ {
+ /* If (x, y) is very close to x axis and x is positive, the polynomial
+ will underflow and evaluate to z. */
+ ret = z;
+ }
+ else
+ {
+ /* Work out the correct shift. */
+ float shift = sign_x ? -2.0f : 0.0f;
+ shift = pred_aygtax ? shift + 1.0f : shift;
+ shift *= PiOver2;
+
+ ret = eval_poly (z, z, shift);
+ }
+
+ /* Account for the sign of x and y. */
+ return asfloat (asuint (ret) ^ sign_xy);
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (S, F, 2, atan2)
+PL_TEST_ULP (atan2f, 2.4)
+PL_TEST_INTERVAL (atan2f, -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (atan2f, -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (atan2f, 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (atan2f, 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (atan2f, 1e6, 1e32, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/atan_2u5.c b/contrib/arm-optimized-routines/pl/math/atan_2u5.c
new file mode 100644
index 000000000000..ee4770101758
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atan_2u5.c
@@ -0,0 +1,73 @@
+/*
+ * Double-precision atan(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "atan_common.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define PiOver2 0x1.921fb54442d18p+0
+#define TinyBound 0x3e1 /* top12(asuint64(0x1p-30)). */
+#define BigBound 0x434 /* top12(asuint64(0x1p53)). */
+#define OneTop 0x3ff
+
+/* Fast implementation of double-precision atan.
+ Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
+ z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps:
+ atan(0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
+ want 0x1.9225645bdd7c3p-1. */
+double
+atan (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t sign = ix & ~AbsMask;
+ uint64_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 52;
+
+ if (unlikely (ia12 >= BigBound || ia12 < TinyBound))
+ {
+ if (ia12 < TinyBound)
+ /* Avoid underflow by returning x. */
+ return x;
+ if (ia > 0x7ff0000000000000)
+ /* Propagate NaN. */
+ return __math_invalid (x);
+ /* atan(x) rounds to PiOver2 for large x. */
+ return asdouble (asuint64 (PiOver2) ^ sign);
+ }
+
+ double z, az, shift;
+ if (ia12 >= OneTop)
+ {
+ /* For x > 1, use atan(x) = pi / 2 + atan(-1 / x). */
+ z = -1.0 / x;
+ shift = PiOver2;
+ /* Use absolute value only when needed (odd powers of z). */
+ az = -fabs (z);
+ }
+ else
+ {
+ /* For x < 1, approximate atan(x) directly. */
+ z = x;
+ shift = 0;
+ az = asdouble (ia);
+ }
+
+ /* Calculate polynomial, shift + z + z^3 * P(z^2). */
+ double y = eval_poly (z, az, shift);
+ /* Copy sign. */
+ return asdouble (asuint64 (y) ^ sign);
+}
+
+PL_SIG (S, D, 1, atan, -10.0, 10.0)
+PL_TEST_ULP (atan, 1.78)
+PL_TEST_INTERVAL (atan, 0, 0x1p-30, 10000)
+PL_TEST_INTERVAL (atan, -0, -0x1p-30, 1000)
+PL_TEST_INTERVAL (atan, 0x1p-30, 0x1p53, 900000)
+PL_TEST_INTERVAL (atan, -0x1p-30, -0x1p53, 90000)
+PL_TEST_INTERVAL (atan, 0x1p53, inf, 10000)
+PL_TEST_INTERVAL (atan, -0x1p53, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/atan_common.h b/contrib/arm-optimized-routines/pl/math/atan_common.h
new file mode 100644
index 000000000000..798cc22cc40a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atan_common.h
@@ -0,0 +1,33 @@
+/*
+ * Double-precision polynomial evaluation function for scalar
+ * atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "poly_scalar_f64.h"
+
+/* Polynomial used in fast atan(x) and atan2(y,x) implementations
+ The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
+static inline double
+eval_poly (double z, double az, double shift)
+{
+ /* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
+ full scheme to avoid underflow in x^16. */
+ double z2 = z * z;
+ double x2 = z2 * z2;
+ double x4 = x2 * x2;
+ double x8 = x4 * x4;
+ double y = fma (estrin_11_f64 (z2, x2, x4, x8, __atan_poly_data.poly + 8),
+ x8, estrin_7_f64 (z2, x2, x4, __atan_poly_data.poly));
+
+ /* Finalize. y = shift + z + z^3 * P(z^2). */
+ y = fma (y, z2 * az, az);
+ y = y + shift;
+
+ return y;
+}
+
+#undef P
diff --git a/contrib/arm-optimized-routines/pl/math/atan_data.c b/contrib/arm-optimized-routines/pl/math/atan_data.c
new file mode 100644
index 000000000000..91d0f61d2eaf
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atan_data.c
@@ -0,0 +1,20 @@
+/*
+ * Double-precision polynomial coefficients for vector atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct atan_poly_data __atan_poly_data = {
+ .poly = {/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ [2**-1022, 1.0]. See atan.sollya for details of how these were
+ generated. */
+ -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
+ 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
+ -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
+ 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
+ -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
+ 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
+ -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16}};
diff --git a/contrib/arm-optimized-routines/pl/math/atanf_2u9.c b/contrib/arm-optimized-routines/pl/math/atanf_2u9.c
new file mode 100644
index 000000000000..ba6f68089de1
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atanf_2u9.c
@@ -0,0 +1,72 @@
+/*
+ * Single-precision atan(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "atanf_common.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define PiOver2 0x1.921fb6p+0f
+#define AbsMask 0x7fffffff
+#define TinyBound 0x30800000 /* asuint(0x1p-30). */
+#define BigBound 0x4e800000 /* asuint(0x1p30). */
+#define One 0x3f800000
+
+/* Approximation of single-precision atan(x) based on
+ atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1]
+ using z=-1/x and shift = pi/2.
+ Maximum error is 2.88 ulps:
+ atanf(0x1.0565ccp+0) got 0x1.97771p-1
+ want 0x1.97770ap-1. */
+float
+atanf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t sign = ix & ~AbsMask;
+ uint32_t ia = ix & AbsMask;
+
+ if (unlikely (ia < TinyBound))
+ /* Avoid underflow by returning x. */
+ return x;
+
+ if (unlikely (ia > BigBound))
+ {
+ if (ia > 0x7f800000)
+ /* Propagate NaN. */
+ return __math_invalidf (x);
+ /* atan(x) rounds to PiOver2 for large x. */
+ return asfloat (asuint (PiOver2) ^ sign);
+ }
+
+ float z, az, shift;
+ if (ia > One)
+ {
+ /* For x > 1, use atan(x) = pi / 2 + atan(-1 / x). */
+ z = -1.0f / x;
+ shift = PiOver2;
+ /* Use absolute value only when needed (odd powers of z). */
+ az = -fabsf (z);
+ }
+ else
+ {
+ /* For x < 1, approximate atan(x) directly. */
+ z = x;
+ az = asfloat (ia);
+ shift = 0;
+ }
+
+ /* Calculate polynomial, shift + z + z^3 * P(z^2). */
+ float y = eval_poly (z, az, shift);
+ /* Copy sign. */
+ return asfloat (asuint (y) ^ sign);
+}
+
+PL_SIG (S, F, 1, atan, -10.0, 10.0)
+PL_TEST_ULP (atanf, 2.38)
+PL_TEST_SYM_INTERVAL (atanf, 0, 0x1p-30, 5000)
+PL_TEST_SYM_INTERVAL (atanf, 0x1p-30, 1, 40000)
+PL_TEST_SYM_INTERVAL (atanf, 1, 0x1p30, 40000)
+PL_TEST_SYM_INTERVAL (atanf, 0x1p30, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/atanf_common.h b/contrib/arm-optimized-routines/pl/math/atanf_common.h
new file mode 100644
index 000000000000..8952e7e0078b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atanf_common.h
@@ -0,0 +1,38 @@
+/*
+ * Single-precision polynomial evaluation function for scalar
+ * atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_ATANF_COMMON_H
+#define PL_MATH_ATANF_COMMON_H
+
+#include "math_config.h"
+#include "poly_scalar_f32.h"
+
+/* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
+ The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */
+static inline float
+eval_poly (float z, float az, float shift)
+{
+ /* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
+ a standard implementation using z8 creates spurious underflow
+ in the very last fma (when z^8 is small enough).
+ Therefore, we split the last fma into a mul and and an fma.
+ Horner and single-level Estrin have higher errors that exceed
+ threshold. */
+ float z2 = z * z;
+ float z4 = z2 * z2;
+
+ /* Then assemble polynomial. */
+ float y = fmaf (
+ z4, z4 * pairwise_poly_3_f32 (z2, z4, __atanf_poly_data.poly + 4),
+ pairwise_poly_3_f32 (z2, z4, __atanf_poly_data.poly));
+ /* Finalize:
+ y = shift + z * P(z^2). */
+ return fmaf (y, z2 * az, az) + shift;
+}
+
+#endif // PL_MATH_ATANF_COMMON_H
diff --git a/contrib/arm-optimized-routines/pl/math/atanf_data.c b/contrib/arm-optimized-routines/pl/math/atanf_data.c
new file mode 100644
index 000000000000..c4cba2378cea
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atanf_data.c
@@ -0,0 +1,15 @@
+/*
+ * Single-precision polynomial coefficients for vector atan(x) and atan2(y,x).
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
+ */
+const struct atanf_poly_data __atanf_poly_data = {
+ .poly = {/* See atanf.sollya for details of how these were generated. */
+ -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
+ -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f}};
diff --git a/contrib/arm-optimized-routines/pl/math/atanh_3u.c b/contrib/arm-optimized-routines/pl/math/atanh_3u.c
new file mode 100644
index 000000000000..dcfbe8192a22
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atanh_3u.c
@@ -0,0 +1,83 @@
+/*
+ * Double-precision atanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "poly_scalar_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define One 0x3ff0000000000000
+#define Ln2Hi 0x1.62e42fefa3800p-1
+#define Ln2Lo 0x1.ef35793c76730p-45
+#define OneMHfRt2Top \
+ 0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)). */
+#define OneTop12 0x3ff
+#define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)). */
+#define BottomMask 0xffffffff
+
+static inline double
+log1p_inline (double x)
+{
+ /* Helper for calculating log(1 + x) using order-18 polynomial on a reduced
+ interval. Copied from log1p_2u.c, with no special-case handling. See that
+ file for details of the algorithm. */
+ double m = x + 1;
+ uint64_t mi = asuint64 (m);
+
+ /* Decompose x + 1 into (f + 1) * 2^k, with k chosen such that f is in
+ [sqrt(2)/2, sqrt(2)]. */
+ uint32_t u = (mi >> 32) + OneMHfRt2Top;
+ int32_t k = (int32_t) (u >> 20) - OneTop12;
+ uint32_t utop = (u & 0x000fffff) + HfRt2Top;
+ uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
+ double f = asdouble (u_red) - 1;
+
+ /* Correction term for round-off in f. */
+ double cm = (x - (m - 1)) / m;
+
+ /* Approximate log1p(f) with polynomial. */
+ double f2 = f * f;
+ double f4 = f2 * f2;
+ double f8 = f4 * f4;
+ double p = fma (
+ f, estrin_18_f64 (f, f2, f4, f8, f8 * f8, __log1p_data.coeffs) * f, f);
+
+ /* Recombine log1p(x) = k*log2 + log1p(f) + c/m. */
+ double kd = k;
+ double y = fma (Ln2Lo, kd, cm);
+ return y + fma (Ln2Hi, kd, p);
+}
+
+/* Approximation for double-precision inverse tanh(x), using a simplified
+ version of log1p. Greatest observed error is 3.00 ULP:
+ atanh(0x1.e58f3c108d714p-4) got 0x1.e7da77672a647p-4
+ want 0x1.e7da77672a64ap-4. */
+double
+atanh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t sign = ix & ~AbsMask;
+ uint64_t ia = ix & AbsMask;
+
+ if (unlikely (ia == One))
+ return __math_divzero (sign >> 32);
+
+ if (unlikely (ia > One))
+ return __math_invalid (x);
+
+ double halfsign = asdouble (Half | sign);
+ double ax = asdouble (ia);
+ return halfsign * log1p_inline ((2 * ax) / (1 - ax));
+}
+
+PL_SIG (S, D, 1, atanh, -1.0, 1.0)
+PL_TEST_ULP (atanh, 3.00)
+PL_TEST_SYM_INTERVAL (atanh, 0, 0x1p-23, 10000)
+PL_TEST_SYM_INTERVAL (atanh, 0x1p-23, 1, 90000)
+PL_TEST_SYM_INTERVAL (atanh, 1, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/atanhf_3u1.c b/contrib/arm-optimized-routines/pl/math/atanhf_3u1.c
new file mode 100644
index 000000000000..e99d5a9900a9
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/atanhf_3u1.c
@@ -0,0 +1,86 @@
+/*
+ * Single-precision atanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define One 0x3f800000
+#define Four 0x40800000
+#define Ln2 0x1.62e43p-1f
+/* asuint(0x1p-12), below which atanhf(x) rounds to x. */
+#define TinyBound 0x39800000
+
+#define C(i) __log1pf_data.coeffs[i]
+
+static inline float
+eval_poly (float m)
+{
+ /* Approximate log(1+m) on [-0.25, 0.5] using Estrin scheme. */
+ float p_12 = fmaf (m, C (1), C (0));
+ float p_34 = fmaf (m, C (3), C (2));
+ float p_56 = fmaf (m, C (5), C (4));
+ float p_78 = fmaf (m, C (7), C (6));
+
+ float m2 = m * m;
+ float p_02 = fmaf (m2, p_12, m);
+ float p_36 = fmaf (m2, p_56, p_34);
+ float p_79 = fmaf (m2, C (8), p_78);
+
+ float m4 = m2 * m2;
+ float p_06 = fmaf (m4, p_36, p_02);
+
+ return fmaf (m4 * p_79, m4, p_06);
+}
+
+static inline float
+log1pf_inline (float x)
+{
+ /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
+ special-case handling. See that file for details of the algorithm. */
+ float m = x + 1.0f;
+ int k = (asuint (m) - 0x3f400000) & 0xff800000;
+ float s = asfloat (Four - k);
+ float m_scale = asfloat (asuint (x) - k) + fmaf (0.25f, s, -1.0f);
+ float p = eval_poly (m_scale);
+ float scale_back = (float) k * 0x1.0p-23f;
+ return fmaf (scale_back, Ln2, p);
+}
+
+/* Approximation for single-precision inverse tanh(x), using a simplified
+ version of log1p. Maximum error is 3.08 ULP:
+ atanhf(0x1.ff0d5p-5) got 0x1.ffb768p-5
+ want 0x1.ffb76ep-5. */
+float
+atanhf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ uint32_t sign = ix & ~AbsMask;
+
+ if (unlikely (iax < TinyBound))
+ return x;
+
+ if (iax == One)
+ return __math_divzero (sign);
+
+ if (unlikely (iax > One))
+ return __math_invalidf (x);
+
+ float halfsign = asfloat (Half | sign);
+ float ax = asfloat (iax);
+ return halfsign * log1pf_inline ((2 * ax) / (1 - ax));
+}
+
+PL_SIG (S, F, 1, atanh, -1.0, 1.0)
+PL_TEST_ULP (atanhf, 2.59)
+PL_TEST_SYM_INTERVAL (atanhf, 0, 0x1p-12, 500)
+PL_TEST_SYM_INTERVAL (atanhf, 0x1p-12, 1, 200000)
+PL_TEST_SYM_INTERVAL (atanhf, 1, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/cbrt_2u.c b/contrib/arm-optimized-routines/pl/math/cbrt_2u.c
new file mode 100644
index 000000000000..80be83c4470c
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/cbrt_2u.c
@@ -0,0 +1,69 @@
+/*
+ * Double-precision cbrt(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+PL_SIG (S, D, 1, cbrt, -10.0, 10.0)
+
+#define AbsMask 0x7fffffffffffffff
+#define TwoThirds 0x1.5555555555555p-1
+
+#define C(i) __cbrt_data.poly[i]
+#define T(i) __cbrt_data.table[i]
+
+/* Approximation for double-precision cbrt(x), using low-order polynomial and
+ two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
+ according to the exponent, for instance an error observed for double value
+ m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
+ integer.
+ cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
+ want 0x1.965fe72821e99p+0. */
+double
+cbrt (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iax = ix & AbsMask;
+ uint64_t sign = ix & ~AbsMask;
+
+ if (unlikely (iax == 0 || iax == 0x7ff0000000000000))
+ return x;
+
+ /* |x| = m * 2^e, where m is in [0.5, 1.0].
+ We can easily decompose x into m and e using frexp. */
+ int e;
+ double m = frexp (asdouble (iax), &e);
+
+ /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
+ Newton iterations. */
+ double p_01 = fma (C (1), m, C (0));
+ double p_23 = fma (C (3), m, C (2));
+ double p = fma (p_23, m * m, p_01);
+
+ /* Two iterations of Newton's method for iteratively approximating cbrt. */
+ double m_by_3 = m / 3;
+ double a = fma (TwoThirds, p, m_by_3 / (p * p));
+ a = fma (TwoThirds, a, m_by_3 / (a * a));
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ Let t = (2 ^ (e / 3)) / (2 ^ round(e / 3)).
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3.
+ i is an integer in [-2, 2], so t can be looked up in the table T.
+ Hence the result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.
+ Which can be done easily using ldexp. */
+ return asdouble (asuint64 (ldexp (a * T (2 + e % 3), e / 3)) | sign);
+}
+
+PL_TEST_ULP (cbrt, 1.30)
+PL_TEST_SYM_INTERVAL (cbrt, 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/cbrt_data.c b/contrib/arm-optimized-routines/pl/math/cbrt_data.c
new file mode 100644
index 000000000000..3d484c2779e2
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/cbrt_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients and table entries for double-precision cbrt(x).
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct cbrt_data __cbrt_data
+ = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1].
+ See cbrt.sollya for details of generation. */
+ 0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1, 0x1.2c74eaa3ba428p-3},
+ .table = { /* table[i] = 2^((i - 2) / 3). */
+ 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0}};
diff --git a/contrib/arm-optimized-routines/pl/math/cbrtf_1u5.c b/contrib/arm-optimized-routines/pl/math/cbrtf_1u5.c
new file mode 100644
index 000000000000..88fcb7162ef6
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/cbrtf_1u5.c
@@ -0,0 +1,66 @@
+/*
+ * Single-precision cbrt(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "poly_scalar_f32.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffff
+#define SignMask 0x80000000
+#define TwoThirds 0x1.555556p-1f
+
+#define T(i) __cbrtf_data.table[i]
+
+/* Approximation for single-precision cbrt(x), using low-order polynomial and
+ one Newton iteration on a reduced interval. Greatest error is 1.5 ULP. This
+ is observed for every value where the mantissa is 0x1.81410e and the exponent
+ is a multiple of 3, for example:
+ cbrtf(0x1.81410ep+30) got 0x1.255d96p+10
+ want 0x1.255d92p+10. */
+float
+cbrtf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ uint32_t sign = ix & SignMask;
+
+ if (unlikely (iax == 0 || iax == 0x7f800000))
+ return x;
+
+ /* |x| = m * 2^e, where m is in [0.5, 1.0].
+ We can easily decompose x into m and e using frexpf. */
+ int e;
+ float m = frexpf (asfloat (iax), &e);
+
+ /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
+ the less accurate the next stage of the algorithm needs to be. An order-4
+ polynomial is enough for one Newton iteration. */
+ float p = pairwise_poly_3_f32 (m, m * m, __cbrtf_data.poly);
+
+ /* One iteration of Newton's method for iteratively approximating cbrt. */
+ float m_by_3 = m / 3;
+ float a = fmaf (TwoThirds, p, m_by_3 / (p * p));
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ Let t = (2 ^ (e / 3)) / (2 ^ round(e / 3)).
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3.
+ i is an integer in [-2, 2], so t can be looked up in the table T.
+ Hence the result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.
+ Which can be done easily using ldexpf. */
+ return asfloat (asuint (ldexpf (a * T (2 + e % 3), e / 3)) | sign);
+}
+
+PL_SIG (S, F, 1, cbrt, -10.0, 10.0)
+PL_TEST_ULP (cbrtf, 1.03)
+PL_TEST_SYM_INTERVAL (cbrtf, 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/cbrtf_data.c b/contrib/arm-optimized-routines/pl/math/cbrtf_data.c
new file mode 100644
index 000000000000..c6cdb4de0d65
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/cbrtf_data.c
@@ -0,0 +1,15 @@
+/*
+ * Coefficients and table entries for single-precision cbrt(x).
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct cbrtf_data __cbrtf_data
+ = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1].
+ See cbrtf.sollya for details of generation. */
+ 0x1.c14e96p-2, 0x1.dd2d3p-1, -0x1.08e81ap-1, 0x1.2c74c2p-3},
+ .table = { /* table[i] = 2^((i - 2) / 3). */
+ 0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0}};
diff --git a/contrib/arm-optimized-routines/pl/math/cosh_2u.c b/contrib/arm-optimized-routines/pl/math/cosh_2u.c
new file mode 100644
index 000000000000..2240a9c56f15
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/cosh_2u.c
@@ -0,0 +1,63 @@
+/*
+ * Double-precision cosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define SpecialBound \
+ 0x40861da04cbafe44 /* 0x1.61da04cbafe44p+9, above which exp overflows. */
+
+double
+__exp_dd (double, double);
+
+static double
+specialcase (double x, uint64_t iax)
+{
+ if (iax == 0x7ff0000000000000)
+ return INFINITY;
+ if (iax > 0x7ff0000000000000)
+ return __math_invalid (x);
+ /* exp overflows above SpecialBound. At this magnitude cosh(x) is dominated by
+ exp(x), so we can approximate cosh(x) by (exp(|x|/2)) ^ 2 / 2. */
+ double t = __exp_dd (asdouble (iax) / 2, 0);
+ return (0.5 * t) * t;
+}
+
+/* Approximation for double-precision cosh(x).
+ cosh(x) = (exp(x) + exp(-x)) / 2.
+ The greatest observed error is in the special region, 1.93 ULP:
+ cosh(0x1.628af341989dap+9) got 0x1.fdf28623ef921p+1021
+ want 0x1.fdf28623ef923p+1021.
+
+ The greatest observed error in the non-special region is 1.03 ULP:
+ cosh(0x1.502cd8e56ab3bp+0) got 0x1.fe54962842d0ep+0
+ want 0x1.fe54962842d0fp+0. */
+double
+cosh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iax = ix & AbsMask;
+
+ /* exp overflows a little bit before cosh, so use special-case handler for the
+ gap, as well as special values. */
+ if (unlikely (iax >= SpecialBound))
+ return specialcase (x, iax);
+
+ double ax = asdouble (iax);
+ /* Use double-precision exp helper to calculate exp(x), then:
+ cosh(x) = exp(|x|) / 2 + 1 / (exp(|x| * 2). */
+ double t = __exp_dd (ax, 0);
+ return 0.5 * t + 0.5 / t;
+}
+
+PL_SIG (S, D, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (cosh, 1.43)
+PL_TEST_SYM_INTERVAL (cosh, 0, 0x1.61da04cbafe44p+9, 100000)
+PL_TEST_SYM_INTERVAL (cosh, 0x1.61da04cbafe44p+9, 0x1p10, 1000)
+PL_TEST_SYM_INTERVAL (cosh, 0x1p10, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/coshf_1u9.c b/contrib/arm-optimized-routines/pl/math/coshf_1u9.c
new file mode 100644
index 000000000000..cf737840e0d6
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/coshf_1u9.c
@@ -0,0 +1,68 @@
+/*
+ * Single-precision cosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffff
+#define TinyBound 0x20000000 /* 0x1p-63: Round to 1 below this. */
+#define SpecialBound \
+ 0x42ad496c /* 0x1.5a92d8p+6: expf overflows above this, so have to use \
+ special case. */
+
+float
+optr_aor_exp_f32 (float);
+
+static NOINLINE float
+specialcase (float x, uint32_t iax)
+{
+ if (iax == 0x7f800000)
+ return INFINITY;
+ if (iax > 0x7f800000)
+ return __math_invalidf (x);
+ if (iax <= TinyBound)
+ /* For tiny x, avoid underflow by just returning 1. */
+ return 1;
+ /* Otherwise SpecialBound <= |x| < Inf. x is too large to calculate exp(x)
+ without overflow, so use exp(|x|/2) instead. For large x cosh(x) is
+ dominated by exp(x), so return:
+ cosh(x) ~= (exp(|x|/2))^2 / 2. */
+ float t = optr_aor_exp_f32 (asfloat (iax) / 2);
+ return (0.5 * t) * t;
+}
+
+/* Approximation for single-precision cosh(x) using exp.
+ cosh(x) = (exp(x) + exp(-x)) / 2.
+ The maximum error is 1.89 ULP, observed for |x| > SpecialBound:
+ coshf(0x1.65898cp+6) got 0x1.f00aep+127 want 0x1.f00adcp+127.
+ The maximum error observed for TinyBound < |x| < SpecialBound is 1.02 ULP:
+ coshf(0x1.50a3cp+0) got 0x1.ff21dcp+0 want 0x1.ff21dap+0. */
+float
+coshf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ float ax = asfloat (iax);
+
+ if (unlikely (iax <= TinyBound || iax >= SpecialBound))
+ {
+ /* x is tiny, large or special. */
+ return specialcase (x, iax);
+ }
+
+ /* Compute cosh using the definition:
+ coshf(x) = exp(x) / 2 + exp(-x) / 2. */
+ float t = optr_aor_exp_f32 (ax);
+ return 0.5f * t + 0.5f / t;
+}
+
+PL_SIG (S, F, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (coshf, 1.89)
+PL_TEST_SYM_INTERVAL (coshf, 0, 0x1p-63, 100)
+PL_TEST_SYM_INTERVAL (coshf, 0, 0x1.5a92d8p+6, 80000)
+PL_TEST_SYM_INTERVAL (coshf, 0x1.5a92d8p+6, inf, 2000)
diff --git a/contrib/arm-optimized-routines/pl/math/cospi_3u1.c b/contrib/arm-optimized-routines/pl/math/cospi_3u1.c
new file mode 100644
index 000000000000..4a688a076829
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/cospi_3u1.c
@@ -0,0 +1,89 @@
+/*
+ * Double-precision scalar cospi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_scalar_f64.h"
+
+/* Taylor series coefficents for sin(pi * x).
+ C2 coefficient (orginally ~=5.16771278) has been split into two parts:
+ C2_hi = 4, C2_lo = C2 - C2_hi (~=1.16771278)
+ This change in magnitude reduces floating point rounding errors.
+ C2_hi is then reintroduced after the polynomial approxmation. */
+static const double poly[]
+ = { 0x1.921fb54442d184p1, -0x1.2aef39896f94bp0, 0x1.466bc6775ab16p1,
+ -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
+ 0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16, 0x1.af86ae521260bp-21,
+ -0x1.012a9870eeb7dp-25 };
+
+#define Shift 0x1.8p+52
+
+/* Approximation for scalar double-precision cospi(x).
+ Maximum error: 3.13 ULP:
+ cospi(0x1.160b129300112p-21) got 0x1.fffffffffd16bp-1
+ want 0x1.fffffffffd16ep-1. */
+double
+cospi (double x)
+{
+ if (isinf (x))
+ return __math_invalid (x);
+
+ double ax = asdouble (asuint64 (x) & ~0x8000000000000000);
+
+ /* Edge cases for when cospif should be exactly 1. (Integers)
+ 0x1p53 is the limit for single precision to store any decimal places. */
+ if (ax >= 0x1p53)
+ return 1;
+
+ /* If x is an integer, return +- 1, based upon if x is odd. */
+ uint64_t m = (uint64_t) ax;
+ if (m == ax)
+ return (m & 1) ? -1 : 1;
+
+ /* For very small inputs, squaring r causes underflow.
+ Values below this threshold can be approximated via
+ cospi(x) ~= 1. */
+ if (ax < 0x1p-63)
+ return 1;
+
+ /* Any non-integer values >= 0x1x51 will be int +0.5.
+ These values should return exactly 0. */
+ if (ax >= 0x1p51)
+ return 0;
+
+ /* n = rint(|x|). */
+ double n = ax + Shift;
+ uint64_t sign = asuint64 (n) << 63;
+ n = n - Shift;
+
+ /* We know that cospi(x) = sinpi(0.5 - x)
+ range reduction and offset into sinpi range -1/2 .. 1/2
+ r = 0.5 - |x - rint(x)|. */
+ double r = 0.5 - fabs (ax - n);
+
+ /* y = sin(r). */
+ double r2 = r * r;
+ double y = horner_9_f64 (r2, poly);
+ y = y * r;
+
+ /* Reintroduce C2_hi. */
+ y = fma (-4 * r2, r, y);
+
+ /* As all values are reduced to -1/2 .. 1/2, the result of cos(x) always be
+ positive, therefore, the sign must be introduced based upon if x rounds to
+ odd or even. */
+ return asdouble (asuint64 (y) ^ sign);
+}
+
+PL_SIG (S, D, 1, cospi, -0.9, 0.9)
+PL_TEST_ULP (cospi, 2.63)
+PL_TEST_SYM_INTERVAL (cospi, 0, 0x1p-63, 5000)
+PL_TEST_SYM_INTERVAL (cospi, 0x1p-63, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (cospi, 0.5, 0x1p51f, 10000)
+PL_TEST_SYM_INTERVAL (cospi, 0x1p51f, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/cospif_2u6.c b/contrib/arm-optimized-routines/pl/math/cospif_2u6.c
new file mode 100644
index 000000000000..d78a98ed8b2d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/cospif_2u6.c
@@ -0,0 +1,84 @@
+/*
+ * Single-precision scalar cospi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* Taylor series coefficents for sin(pi * x). */
+#define C0 0x1.921fb6p1f
+#define C1 -0x1.4abbcep2f
+#define C2 0x1.466bc6p1f
+#define C3 -0x1.32d2ccp-1f
+#define C4 0x1.50783p-4f
+#define C5 -0x1.e30750p-8f
+
+#define Shift 0x1.0p+23f
+
+/* Approximation for scalar single-precision cospi(x) - cospif.
+ Maximum error: 2.64 ULP:
+ cospif(0x1.37e844p-4) got 0x1.f16b3p-1
+ want 0x1.f16b2ap-1. */
+float
+cospif (float x)
+{
+ if (isinf (x))
+ return __math_invalidf (x);
+
+ float ax = asfloat (asuint (x) & ~0x80000000);
+
+ /* Edge cases for when cospif should be exactly +/- 1. (Integers)
+ 0x1p23 is the limit for single precision to store any decimal places. */
+ if (ax >= 0x1p24f)
+ return 1;
+
+ uint32_t m = roundf (ax);
+ if (m == ax)
+ return (m & 1) ? -1 : 1;
+
+ /* Any non-integer values >= 0x1p22f will be int +0.5.
+ These values should return exactly 0. */
+ if (ax >= 0x1p22f)
+ return 0;
+
+ /* For very small inputs, squaring r causes underflow.
+ Values below this threshold can be approximated via cospi(x) ~= 1 -
+ (pi*x). */
+ if (ax < 0x1p-31f)
+ return 1 - (C0 * x);
+
+ /* n = rint(|x|). */
+ float n = ax + Shift;
+ uint32_t sign = asuint (n) << 31;
+ n = n - Shift;
+
+ /* We know that cospi(x) = sinpi(0.5 - x)
+ range reduction and offset into sinpi range -1/2 .. 1/2
+ r = 0.5 - |x - rint(x)|. */
+ float r = 0.5f - fabs (ax - n);
+
+ /* y = sin(pi * r). */
+ float r2 = r * r;
+ float y = fmaf (C5, r2, C4);
+ y = fmaf (y, r2, C3);
+ y = fmaf (y, r2, C2);
+ y = fmaf (y, r2, C1);
+ y = fmaf (y, r2, C0);
+
+ /* As all values are reduced to -1/2 .. 1/2, the result of cos(x) always be
+ positive, therefore, the sign must be introduced based upon if x rounds to
+ odd or even. */
+ return asfloat (asuint (y * r) ^ sign);
+}
+
+PL_SIG (S, F, 1, cospi, -0.9, 0.9)
+PL_TEST_ULP (cospif, 2.15)
+PL_TEST_SYM_INTERVAL (cospif, 0, 0x1p-31, 5000)
+PL_TEST_SYM_INTERVAL (cospif, 0x1p-31, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (cospif, 0.5, 0x1p22f, 10000)
+PL_TEST_SYM_INTERVAL (cospif, 0x1p22f, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/erf_2u5.c b/contrib/arm-optimized-routines/pl/math/erf_2u5.c
new file mode 100644
index 000000000000..3ca2a1332c1f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erf_2u5.c
@@ -0,0 +1,102 @@
+/*
+ * Double-precision erf(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define TwoOverSqrtPiMinusOne 0x1.06eba8214db69p-3
+#define Shift 0x1p45
+
+/* Polynomial coefficients. */
+#define OneThird 0x1.5555555555555p-2
+#define TwoThird 0x1.5555555555555p-1
+
+#define TwoOverFifteen 0x1.1111111111111p-3
+#define TwoOverFive 0x1.999999999999ap-2
+#define Tenth 0x1.999999999999ap-4
+
+#define TwoOverNine 0x1.c71c71c71c71cp-3
+#define TwoOverFortyFive 0x1.6c16c16c16c17p-5
+#define Sixth 0x1.555555555555p-3
+
+/* Fast erf approximation based on series expansion near x rounded to
+ nearest multiple of 1/128.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erf(x) ~ erf(r)
+ + scale * d * [
+ + 1
+ - r d
+ + 1/3 (2 r^2 - 1) d^2
+ - 1/6 (r (2 r^2 - 3)) d^3
+ + 1/30 (4 r^4 - 12 r^2 + 3) d^4
+ - 1/90 (4 r^4 - 20 r^2 + 15) d^5
+ ]
+
+ Maximum measure error: 2.29 ULP
+ erf(-0x1.00003c924e5d1p-8) got -0x1.20dd59132ebadp-8
+ want -0x1.20dd59132ebafp-8. */
+double
+erf (double x)
+{
+ /* Get absolute value and sign. */
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & 0x7fffffffffffffff;
+ uint64_t sign = ix & ~0x7fffffffffffffff;
+
+ /* |x| < 0x1p-508. Triggers exceptions. */
+ if (unlikely (ia < 0x2030000000000000))
+ return fma (TwoOverSqrtPiMinusOne, x, x);
+
+ if (ia < 0x4017f80000000000) /* |x| < 6 - 1 / 128 = 5.9921875. */
+ {
+ /* Set r to multiple of 1/128 nearest to |x|. */
+ double a = asdouble (ia);
+ double z = a + Shift;
+ uint64_t i = asuint64 (z) - asuint64 (Shift);
+ double r = z - Shift;
+ /* Lookup erf(r) and scale(r) in table.
+ Set erf(r) to 0 and scale to 2/sqrt(pi) for |x| <= 0x1.cp-9. */
+ double erfr = __erf_data.tab[i].erf;
+ double scale = __erf_data.tab[i].scale;
+
+ /* erf(x) ~ erf(r) + scale * d * poly (d, r). */
+ double d = a - r;
+ double r2 = r * r;
+ double d2 = d * d;
+
+ /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5. */
+ double p1 = -r;
+ double p2 = fma (TwoThird, r2, -OneThird);
+ double p3 = -r * fma (OneThird, r2, -0.5);
+ double p4 = fma (fma (TwoOverFifteen, r2, -TwoOverFive), r2, Tenth);
+ double p5
+ = -r * fma (fma (TwoOverFortyFive, r2, -TwoOverNine), r2, Sixth);
+
+ double p34 = fma (p4, d, p3);
+ double p12 = fma (p2, d, p1);
+ double y = fma (p5, d2, p34);
+ y = fma (y, d2, p12);
+
+ y = fma (fma (y, d2, d), scale, erfr);
+ return asdouble (asuint64 (y) | sign);
+ }
+
+ /* Special cases : erf(nan)=nan, erf(+inf)=+1 and erf(-inf)=-1. */
+ if (unlikely (ia >= 0x7ff0000000000000))
+ return (1.0 - (double) (sign >> 62)) + 1.0 / x;
+
+ /* Boring domain (|x| >= 6.0). */
+ return asdouble (sign | asuint64 (1.0));
+}
+
+PL_SIG (S, D, 1, erf, -6.0, 6.0)
+PL_TEST_ULP (erf, 1.79)
+PL_TEST_SYM_INTERVAL (erf, 0, 5.9921875, 40000)
+PL_TEST_SYM_INTERVAL (erf, 5.9921875, inf, 40000)
+PL_TEST_SYM_INTERVAL (erf, 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erf_data.c b/contrib/arm-optimized-routines/pl/math/erf_data.c
new file mode 100644
index 000000000000..138e03578e77
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erf_data.c
@@ -0,0 +1,788 @@
+/*
+ * Data for approximation of erf.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Lookup table used in erf.
+ For each possible rounded input r (multiples of 1/128), between
+ r = 0.0 and r = 6.0 (769 values):
+ - the first entry __erff_data.tab.erf contains the values of erf(r),
+ - the second entry __erff_data.tab.scale contains the values of
+ 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the
+ algorithm, since lookup is performed only for x >= 1/64-1/512. */
+const struct erf_data __erf_data = {
+ .tab = { { 0x0.0000000000000p+0, 0x1.20dd750429b6dp+0 },
+ { 0x1.20dbf3deb1340p-7, 0x1.20d8f1975c85dp+0 },
+ { 0x1.20d77083f17a0p-6, 0x1.20cb67bd452c7p+0 },
+ { 0x1.b137e0cf584dcp-6, 0x1.20b4d8bac36c1p+0 },
+ { 0x1.20c5645dd2538p-5, 0x1.209546ad13ccfp+0 },
+ { 0x1.68e5d3bbc9526p-5, 0x1.206cb4897b148p+0 },
+ { 0x1.b0fafef135745p-5, 0x1.203b261cd0052p+0 },
+ { 0x1.f902a77bd3821p-5, 0x1.2000a00ae3804p+0 },
+ { 0x1.207d480e90658p-4, 0x1.1fbd27cdc72d3p+0 },
+ { 0x1.44703e87e8593p-4, 0x1.1f70c3b4f2cc7p+0 },
+ { 0x1.68591a1e83b5dp-4, 0x1.1f1b7ae44867fp+0 },
+ { 0x1.8c36beb8a8d23p-4, 0x1.1ebd5552f795bp+0 },
+ { 0x1.b0081148a873ap-4, 0x1.1e565bca400d4p+0 },
+ { 0x1.d3cbf7e70a4b3p-4, 0x1.1de697e413d28p+0 },
+ { 0x1.f78159ec8bb50p-4, 0x1.1d6e14099944ap+0 },
+ { 0x1.0d939005f65e5p-3, 0x1.1cecdb718d61cp+0 },
+ { 0x1.1f5e1a35c3b89p-3, 0x1.1c62fa1e869b6p+0 },
+ { 0x1.311fc15f56d14p-3, 0x1.1bd07cdd189acp+0 },
+ { 0x1.42d7fc2f64959p-3, 0x1.1b357141d95d5p+0 },
+ { 0x1.548642321d7c6p-3, 0x1.1a91e5a748165p+0 },
+ { 0x1.662a0bdf7a89fp-3, 0x1.19e5e92b964abp+0 },
+ { 0x1.77c2d2a765f9ep-3, 0x1.19318bae53a04p+0 },
+ { 0x1.895010fdbdbfdp-3, 0x1.1874ddcdfce24p+0 },
+ { 0x1.9ad142662e14dp-3, 0x1.17aff0e56ec10p+0 },
+ { 0x1.ac45e37fe2526p-3, 0x1.16e2d7093cd8cp+0 },
+ { 0x1.bdad72110a648p-3, 0x1.160da304ed92fp+0 },
+ { 0x1.cf076d1233237p-3, 0x1.153068581b781p+0 },
+ { 0x1.e05354b96ff36p-3, 0x1.144b3b337c90cp+0 },
+ { 0x1.f190aa85540e2p-3, 0x1.135e3075d076bp+0 },
+ { 0x1.015f78a3dcf3dp-2, 0x1.12695da8b5bdep+0 },
+ { 0x1.09eed6982b948p-2, 0x1.116cd8fd67618p+0 },
+ { 0x1.127631eb8de32p-2, 0x1.1068b94962e5ep+0 },
+ { 0x1.1af54e232d609p-2, 0x1.0f5d1602f7e41p+0 },
+ { 0x1.236bef825d9a2p-2, 0x1.0e4a073dc1b91p+0 },
+ { 0x1.2bd9db0f7827fp-2, 0x1.0d2fa5a70c168p+0 },
+ { 0x1.343ed6989b7d9p-2, 0x1.0c0e0a8223359p+0 },
+ { 0x1.3c9aa8b84bedap-2, 0x1.0ae54fa490722p+0 },
+ { 0x1.44ed18d9f6462p-2, 0x1.09b58f724416bp+0 },
+ { 0x1.4d35ef3e5372ep-2, 0x1.087ee4d9ad247p+0 },
+ { 0x1.5574f4ffac98ep-2, 0x1.07416b4fbfe7cp+0 },
+ { 0x1.5da9f415ff23fp-2, 0x1.05fd3ecbec297p+0 },
+ { 0x1.65d4b75b00471p-2, 0x1.04b27bc403d30p+0 },
+ { 0x1.6df50a8dff772p-2, 0x1.03613f2812dafp+0 },
+ { 0x1.760aba57a76bfp-2, 0x1.0209a65e29545p+0 },
+ { 0x1.7e15944d9d3e4p-2, 0x1.00abcf3e187a9p+0 },
+ { 0x1.861566f5fd3c0p-2, 0x1.fe8fb01a47307p-1 },
+ { 0x1.8e0a01cab516bp-2, 0x1.fbbbbef34b4b2p-1 },
+ { 0x1.95f3353cbb146p-2, 0x1.f8dc092d58ff8p-1 },
+ { 0x1.9dd0d2b721f39p-2, 0x1.f5f0cdaf15313p-1 },
+ { 0x1.a5a2aca209394p-2, 0x1.f2fa4c16c0019p-1 },
+ { 0x1.ad68966569a87p-2, 0x1.eff8c4b1375dbp-1 },
+ { 0x1.b522646bbda68p-2, 0x1.ecec7870ebca7p-1 },
+ { 0x1.bccfec24855b8p-2, 0x1.e9d5a8e4c934ep-1 },
+ { 0x1.c4710406a65fcp-2, 0x1.e6b4982f158b9p-1 },
+ { 0x1.cc058392a6d2dp-2, 0x1.e38988fc46e72p-1 },
+ { 0x1.d38d4354c3bd0p-2, 0x1.e054be79d3042p-1 },
+ { 0x1.db081ce6e2a48p-2, 0x1.dd167c4cf9d2ap-1 },
+ { 0x1.e275eaf25e458p-2, 0x1.d9cf06898cdafp-1 },
+ { 0x1.e9d68931ae650p-2, 0x1.d67ea1a8b5368p-1 },
+ { 0x1.f129d471eabb1p-2, 0x1.d325927fb9d89p-1 },
+ { 0x1.f86faa9428f9dp-2, 0x1.cfc41e36c7df9p-1 },
+ { 0x1.ffa7ea8eb5fd0p-2, 0x1.cc5a8a3fbea40p-1 },
+ { 0x1.03693a371519cp-1, 0x1.c8e91c4d01368p-1 },
+ { 0x1.06f794ab2cae7p-1, 0x1.c5701a484ef9dp-1 },
+ { 0x1.0a7ef5c18edd2p-1, 0x1.c1efca49a5011p-1 },
+ { 0x1.0dff4f247f6c6p-1, 0x1.be68728e29d5dp-1 },
+ { 0x1.1178930ada115p-1, 0x1.bada596f25436p-1 },
+ { 0x1.14eab43841b55p-1, 0x1.b745c55905bf8p-1 },
+ { 0x1.1855a5fd3dd50p-1, 0x1.b3aafcc27502ep-1 },
+ { 0x1.1bb95c3746199p-1, 0x1.b00a46237d5bep-1 },
+ { 0x1.1f15cb50bc4dep-1, 0x1.ac63e7ecc1411p-1 },
+ { 0x1.226ae840d4d70p-1, 0x1.a8b8287ec6a09p-1 },
+ { 0x1.25b8a88b6dd7fp-1, 0x1.a5074e2157620p-1 },
+ { 0x1.28ff0240d52cdp-1, 0x1.a1519efaf889ep-1 },
+ { 0x1.2c3debfd7d6c1p-1, 0x1.9d97610879642p-1 },
+ { 0x1.2f755ce9a21f4p-1, 0x1.99d8da149c13fp-1 },
+ { 0x1.32a54cb8db67bp-1, 0x1.96164fafd8de3p-1 },
+ { 0x1.35cdb3a9a144dp-1, 0x1.925007283d7aap-1 },
+ { 0x1.38ee8a84beb71p-1, 0x1.8e86458169af8p-1 },
+ { 0x1.3c07ca9cb4f9ep-1, 0x1.8ab94f6caa71dp-1 },
+ { 0x1.3f196dcd0f135p-1, 0x1.86e9694134b9ep-1 },
+ { 0x1.42236e79a5fa6p-1, 0x1.8316d6f48133dp-1 },
+ { 0x1.4525c78dd5966p-1, 0x1.7f41dc12c9e89p-1 },
+ { 0x1.4820747ba2dc2p-1, 0x1.7b6abbb7aaf19p-1 },
+ { 0x1.4b13713ad3513p-1, 0x1.7791b886e7403p-1 },
+ { 0x1.4dfeba47f63ccp-1, 0x1.73b714a552763p-1 },
+ { 0x1.50e24ca35fd2cp-1, 0x1.6fdb11b1e0c34p-1 },
+ { 0x1.53be25d016a4fp-1, 0x1.6bfdf0beddaf5p-1 },
+ { 0x1.569243d2b3a9bp-1, 0x1.681ff24b4ab04p-1 },
+ { 0x1.595ea53035283p-1, 0x1.6441563c665d4p-1 },
+ { 0x1.5c2348ecc4dc3p-1, 0x1.60625bd75d07bp-1 },
+ { 0x1.5ee02e8a71a53p-1, 0x1.5c8341bb23767p-1 },
+ { 0x1.61955607dd15dp-1, 0x1.58a445da7c74cp-1 },
+ { 0x1.6442bfdedd397p-1, 0x1.54c5a57629db0p-1 },
+ { 0x1.66e86d0312e82p-1, 0x1.50e79d1749ac9p-1 },
+ { 0x1.69865ee075011p-1, 0x1.4d0a6889dfd9fp-1 },
+ { 0x1.6c1c9759d0e5fp-1, 0x1.492e42d78d2c5p-1 },
+ { 0x1.6eab18c74091bp-1, 0x1.4553664273d24p-1 },
+ { 0x1.7131e5f496a5ap-1, 0x1.417a0c4049fd0p-1 },
+ { 0x1.73b1021fc0cb8p-1, 0x1.3da26d759aef5p-1 },
+ { 0x1.762870f720c6fp-1, 0x1.39ccc1b136d5ap-1 },
+ { 0x1.78983697dc96fp-1, 0x1.35f93fe7d1b3dp-1 },
+ { 0x1.7b00578c26037p-1, 0x1.32281e2fd1a92p-1 },
+ { 0x1.7d60d8c979f7bp-1, 0x1.2e5991bd4cbfcp-1 },
+ { 0x1.7fb9bfaed8078p-1, 0x1.2a8dcede3673bp-1 },
+ { 0x1.820b1202f27fbp-1, 0x1.26c508f6bd0ffp-1 },
+ { 0x1.8454d5f25760dp-1, 0x1.22ff727dd6f7bp-1 },
+ { 0x1.8697120d92a4ap-1, 0x1.1f3d3cf9ffe5ap-1 },
+ { 0x1.88d1cd474a2e0p-1, 0x1.1b7e98fe26217p-1 },
+ { 0x1.8b050ef253c37p-1, 0x1.17c3b626c7a11p-1 },
+ { 0x1.8d30debfc572ep-1, 0x1.140cc3173f007p-1 },
+ { 0x1.8f5544bd00c04p-1, 0x1.1059ed7740313p-1 },
+ { 0x1.91724951b8fc6p-1, 0x1.0cab61f084b93p-1 },
+ { 0x1.9387f53df5238p-1, 0x1.09014c2ca74dap-1 },
+ { 0x1.959651980da31p-1, 0x1.055bd6d32e8d7p-1 },
+ { 0x1.979d67caa6631p-1, 0x1.01bb2b87c6968p-1 },
+ { 0x1.999d4192a5715p-1, 0x1.fc3ee5d1524b0p-2 },
+ { 0x1.9b95e8fd26abap-1, 0x1.f511a91a67d2ap-2 },
+ { 0x1.9d8768656cc42p-1, 0x1.edeeee0959518p-2 },
+ { 0x1.9f71ca72cffb6p-1, 0x1.e6d6ffaa65a25p-2 },
+ { 0x1.a1551a16aaeafp-1, 0x1.dfca26f5bbf88p-2 },
+ { 0x1.a331628a45b92p-1, 0x1.d8c8aace11e63p-2 },
+ { 0x1.a506af4cc00f4p-1, 0x1.d1d2cfff91594p-2 },
+ { 0x1.a6d50c20fa293p-1, 0x1.cae8d93f1d7b6p-2 },
+ { 0x1.a89c850b7d54dp-1, 0x1.c40b0729ed547p-2 },
+ { 0x1.aa5d265064366p-1, 0x1.bd3998457afdap-2 },
+ { 0x1.ac16fc7143263p-1, 0x1.b674c8ffc6283p-2 },
+ { 0x1.adca142b10f98p-1, 0x1.afbcd3afe8ab6p-2 },
+ { 0x1.af767a741088bp-1, 0x1.a911f096fbc26p-2 },
+ { 0x1.b11c3c79bb424p-1, 0x1.a27455e14c93cp-2 },
+ { 0x1.b2bb679ead19cp-1, 0x1.9be437a7de946p-2 },
+ { 0x1.b4540978921eep-1, 0x1.9561c7f23a47bp-2 },
+ { 0x1.b5e62fce16095p-1, 0x1.8eed36b886d93p-2 },
+ { 0x1.b771e894d602ep-1, 0x1.8886b1e5ecfd1p-2 },
+ { 0x1.b8f741ef54f83p-1, 0x1.822e655b417e6p-2 },
+ { 0x1.ba764a2af2b78p-1, 0x1.7be47af1f5d89p-2 },
+ { 0x1.bbef0fbde6221p-1, 0x1.75a91a7f4d2edp-2 },
+ { 0x1.bd61a1453ab44p-1, 0x1.6f7c69d7d3ef8p-2 },
+ { 0x1.bece0d82d1a5cp-1, 0x1.695e8cd31867ep-2 },
+ { 0x1.c034635b66e23p-1, 0x1.634fa54fa285fp-2 },
+ { 0x1.c194b1d49a184p-1, 0x1.5d4fd33729015p-2 },
+ { 0x1.c2ef0812fc1bdp-1, 0x1.575f3483021c3p-2 },
+ { 0x1.c443755820d64p-1, 0x1.517de540ce2a3p-2 },
+ { 0x1.c5920900b5fd1p-1, 0x1.4babff975a04cp-2 },
+ { 0x1.c6dad2829ec62p-1, 0x1.45e99bcbb7915p-2 },
+ { 0x1.c81de16b14cefp-1, 0x1.4036d0468a7a2p-2 },
+ { 0x1.c95b455cce69dp-1, 0x1.3a93b1998736cp-2 },
+ { 0x1.ca930e0e2a825p-1, 0x1.35005285227f1p-2 },
+ { 0x1.cbc54b476248dp-1, 0x1.2f7cc3fe6f423p-2 },
+ { 0x1.ccf20ce0c0d27p-1, 0x1.2a09153529381p-2 },
+ { 0x1.ce1962c0e0d8bp-1, 0x1.24a55399ea239p-2 },
+ { 0x1.cf3b5cdaf0c39p-1, 0x1.1f518ae487dc8p-2 },
+ { 0x1.d0580b2cfd249p-1, 0x1.1a0dc51a9934dp-2 },
+ { 0x1.d16f7dbe41ca0p-1, 0x1.14da0a961fd14p-2 },
+ { 0x1.d281c49d818d0p-1, 0x1.0fb6620c550afp-2 },
+ { 0x1.d38eefdf64fddp-1, 0x1.0aa2d09497f2bp-2 },
+ { 0x1.d4970f9ce00d9p-1, 0x1.059f59af7a906p-2 },
+ { 0x1.d59a33f19ed42p-1, 0x1.00abff4dec7a3p-2 },
+ { 0x1.d6986cfa798e7p-1, 0x1.f79183b101c5bp-3 },
+ { 0x1.d791cad3eff01p-1, 0x1.edeb406d9c824p-3 },
+ { 0x1.d8865d98abe01p-1, 0x1.e4652fadcb6b2p-3 },
+ { 0x1.d97635600bb89p-1, 0x1.daff4969c0b04p-3 },
+ { 0x1.da61623cb41e0p-1, 0x1.d1b982c501370p-3 },
+ { 0x1.db47f43b2980dp-1, 0x1.c893ce1dcbef7p-3 },
+ { 0x1.dc29fb60715afp-1, 0x1.bf8e1b1ca2279p-3 },
+ { 0x1.dd0787a8bb39dp-1, 0x1.b6a856c3ed54fp-3 },
+ { 0x1.dde0a90611a0dp-1, 0x1.ade26b7fbed95p-3 },
+ { 0x1.deb56f5f12d28p-1, 0x1.a53c4135a6526p-3 },
+ { 0x1.df85ea8db188ep-1, 0x1.9cb5bd549b111p-3 },
+ { 0x1.e0522a5dfda73p-1, 0x1.944ec2e4f5630p-3 },
+ { 0x1.e11a3e8cf4eb8p-1, 0x1.8c07329874652p-3 },
+ { 0x1.e1de36c75ba58p-1, 0x1.83deeada4d25ap-3 },
+ { 0x1.e29e22a89d766p-1, 0x1.7bd5c7df3fe9cp-3 },
+ { 0x1.e35a11b9b61cep-1, 0x1.73eba3b5b07b7p-3 },
+ { 0x1.e4121370224ccp-1, 0x1.6c205655be71fp-3 },
+ { 0x1.e4c6372cd8927p-1, 0x1.6473b5b15a7a1p-3 },
+ { 0x1.e5768c3b4a3fcp-1, 0x1.5ce595c455b0ap-3 },
+ { 0x1.e62321d06c5e0p-1, 0x1.5575c8a468361p-3 },
+ { 0x1.e6cc0709c8a0dp-1, 0x1.4e241e912c305p-3 },
+ { 0x1.e7714aec96534p-1, 0x1.46f066040a832p-3 },
+ { 0x1.e812fc64db369p-1, 0x1.3fda6bc016994p-3 },
+ { 0x1.e8b12a44944a8p-1, 0x1.38e1fae1d6a9dp-3 },
+ { 0x1.e94be342e6743p-1, 0x1.3206dceef5f87p-3 },
+ { 0x1.e9e335fb56f87p-1, 0x1.2b48d9e5dea1cp-3 },
+ { 0x1.ea7730ed0bbb9p-1, 0x1.24a7b84d38971p-3 },
+ { 0x1.eb07e27a133aap-1, 0x1.1e233d434b813p-3 },
+ { 0x1.eb9558e6b42cep-1, 0x1.17bb2c8d41535p-3 },
+ { 0x1.ec1fa258c4beap-1, 0x1.116f48a6476ccp-3 },
+ { 0x1.eca6ccd709544p-1, 0x1.0b3f52ce8c383p-3 },
+ { 0x1.ed2ae6489ac1ep-1, 0x1.052b0b1a174eap-3 },
+ { 0x1.edabfc7453e63p-1, 0x1.fe6460fef4680p-4 },
+ { 0x1.ee2a1d004692cp-1, 0x1.f2a901ccafb37p-4 },
+ { 0x1.eea5557137ae0p-1, 0x1.e723726b824a9p-4 },
+ { 0x1.ef1db32a2277cp-1, 0x1.dbd32ac4c99b0p-4 },
+ { 0x1.ef93436bc2daap-1, 0x1.d0b7a0f921e7cp-4 },
+ { 0x1.f006135426b26p-1, 0x1.c5d0497c09e74p-4 },
+ { 0x1.f0762fde45ee6p-1, 0x1.bb1c972f23e50p-4 },
+ { 0x1.f0e3a5e1a1788p-1, 0x1.b09bfb7d11a83p-4 },
+ { 0x1.f14e8211e8c55p-1, 0x1.a64de673e8837p-4 },
+ { 0x1.f1b6d0fea5f4dp-1, 0x1.9c31c6df3b1b8p-4 },
+ { 0x1.f21c9f12f0677p-1, 0x1.92470a61b6965p-4 },
+ { 0x1.f27ff89525acfp-1, 0x1.888d1d8e510a3p-4 },
+ { 0x1.f2e0e9a6a8b09p-1, 0x1.7f036c0107294p-4 },
+ { 0x1.f33f7e43a706bp-1, 0x1.75a96077274bap-4 },
+ { 0x1.f39bc242e43e6p-1, 0x1.6c7e64e7281cbp-4 },
+ { 0x1.f3f5c1558b19ep-1, 0x1.6381e2980956bp-4 },
+ { 0x1.f44d870704911p-1, 0x1.5ab342383d177p-4 },
+ { 0x1.f4a31ebcd47dfp-1, 0x1.5211ebf41880bp-4 },
+ { 0x1.f4f693b67bd77p-1, 0x1.499d478bca735p-4 },
+ { 0x1.f547f10d60597p-1, 0x1.4154bc68d75c3p-4 },
+ { 0x1.f59741b4b97cfp-1, 0x1.3937b1b319259p-4 },
+ { 0x1.f5e4907982a07p-1, 0x1.31458e6542847p-4 },
+ { 0x1.f62fe80272419p-1, 0x1.297db960e4f63p-4 },
+ { 0x1.f67952cff6282p-1, 0x1.21df9981f8e53p-4 },
+ { 0x1.f6c0db3c34641p-1, 0x1.1a6a95b1e786fp-4 },
+ { 0x1.f7068b7b10fd9p-1, 0x1.131e14fa1625dp-4 },
+ { 0x1.f74a6d9a38383p-1, 0x1.0bf97e95f2a64p-4 },
+ { 0x1.f78c8b812d498p-1, 0x1.04fc3a0481321p-4 },
+ { 0x1.f7cceef15d631p-1, 0x1.fc4b5e32d6259p-5 },
+ { 0x1.f80ba18636f07p-1, 0x1.eeea8c1b1db93p-5 },
+ { 0x1.f848acb544e95p-1, 0x1.e1d4cf1e2450ap-5 },
+ { 0x1.f88419ce4e184p-1, 0x1.d508f9a1ea64ep-5 },
+ { 0x1.f8bdf1fb78370p-1, 0x1.c885df3451a07p-5 },
+ { 0x1.f8f63e416ebffp-1, 0x1.bc4a54a84e834p-5 },
+ { 0x1.f92d077f8d56dp-1, 0x1.b055303221015p-5 },
+ { 0x1.f96256700da8ep-1, 0x1.a4a549829587ep-5 },
+ { 0x1.f99633a838a57p-1, 0x1.993979e14fffdp-5 },
+ { 0x1.f9c8a7989af0dp-1, 0x1.8e109c4622913p-5 },
+ { 0x1.f9f9ba8d3c733p-1, 0x1.83298d717210ep-5 },
+ { 0x1.fa2974addae45p-1, 0x1.78832c03aa2b1p-5 },
+ { 0x1.fa57ddfe27376p-1, 0x1.6e1c5893c380bp-5 },
+ { 0x1.fa84fe5e05c8dp-1, 0x1.63f3f5c4de13bp-5 },
+ { 0x1.fab0dd89d1309p-1, 0x1.5a08e85af27e0p-5 },
+ { 0x1.fadb831a9f9c3p-1, 0x1.505a174e9c929p-5 },
+ { 0x1.fb04f6868a944p-1, 0x1.46e66be002240p-5 },
+ { 0x1.fb2d3f20f9101p-1, 0x1.3dacd1a8d8ccdp-5 },
+ { 0x1.fb54641aebbc9p-1, 0x1.34ac36ad8dafep-5 },
+ { 0x1.fb7a6c834b5a2p-1, 0x1.2be38b6d92415p-5 },
+ { 0x1.fb9f5f4739170p-1, 0x1.2351c2f2d1449p-5 },
+ { 0x1.fbc3433260ca5p-1, 0x1.1af5d2e04f3f6p-5 },
+ { 0x1.fbe61eef4cf6ap-1, 0x1.12ceb37ff9bc3p-5 },
+ { 0x1.fc07f907bc794p-1, 0x1.0adb5fcfa8c75p-5 },
+ { 0x1.fc28d7e4f9cd0p-1, 0x1.031ad58d56279p-5 },
+ { 0x1.fc48c1d033c7ap-1, 0x1.f7182a851bca2p-6 },
+ { 0x1.fc67bcf2d7b8fp-1, 0x1.e85c449e377f2p-6 },
+ { 0x1.fc85cf56ecd38p-1, 0x1.da0005e5f28dfp-6 },
+ { 0x1.fca2fee770c79p-1, 0x1.cc0180af00a8bp-6 },
+ { 0x1.fcbf5170b578bp-1, 0x1.be5ecd2fcb5f9p-6 },
+ { 0x1.fcdacca0bfb73p-1, 0x1.b1160991ff737p-6 },
+ { 0x1.fcf57607a6e7cp-1, 0x1.a4255a00b9f03p-6 },
+ { 0x1.fd0f5317f582fp-1, 0x1.978ae8b55ce1bp-6 },
+ { 0x1.fd2869270a56fp-1, 0x1.8b44e6031383ep-6 },
+ { 0x1.fd40bd6d7a785p-1, 0x1.7f5188610ddc8p-6 },
+ { 0x1.fd58550773cb5p-1, 0x1.73af0c737bb45p-6 },
+ { 0x1.fd6f34f52013ap-1, 0x1.685bb5134ef13p-6 },
+ { 0x1.fd85621b0876dp-1, 0x1.5d55cb54cd53ap-6 },
+ { 0x1.fd9ae142795e3p-1, 0x1.529b9e8cf9a1ep-6 },
+ { 0x1.fdafb719e6a69p-1, 0x1.482b8455dc491p-6 },
+ { 0x1.fdc3e835500b3p-1, 0x1.3e03d891b37dep-6 },
+ { 0x1.fdd7790ea5bc0p-1, 0x1.3422fd6d12e2bp-6 },
+ { 0x1.fdea6e062d0c9p-1, 0x1.2a875b5ffab56p-6 },
+ { 0x1.fdfccb62e52d3p-1, 0x1.212f612dee7fbp-6 },
+ { 0x1.fe0e9552ebdd6p-1, 0x1.181983e5133ddp-6 },
+ { 0x1.fe1fcfebe2083p-1, 0x1.0f443edc5ce49p-6 },
+ { 0x1.fe307f2b503d0p-1, 0x1.06ae13b0d3255p-6 },
+ { 0x1.fe40a6f70af4bp-1, 0x1.fcab1483ea7fcp-7 },
+ { 0x1.fe504b1d9696cp-1, 0x1.ec72615a894c4p-7 },
+ { 0x1.fe5f6f568b301p-1, 0x1.dcaf3691fc448p-7 },
+ { 0x1.fe6e1742f7cf6p-1, 0x1.cd5ec93c12431p-7 },
+ { 0x1.fe7c466dc57a1p-1, 0x1.be7e5ac24963bp-7 },
+ { 0x1.fe8a004c19ae6p-1, 0x1.b00b38d6b3575p-7 },
+ { 0x1.fe97483db8670p-1, 0x1.a202bd6372dcep-7 },
+ { 0x1.fea4218d6594ap-1, 0x1.94624e78e0fafp-7 },
+ { 0x1.feb08f7146046p-1, 0x1.87275e3a6869dp-7 },
+ { 0x1.febc950b3fa75p-1, 0x1.7a4f6aca256cbp-7 },
+ { 0x1.fec835695932ep-1, 0x1.6dd7fe3358230p-7 },
+ { 0x1.fed37386190fbp-1, 0x1.61beae53b72b7p-7 },
+ { 0x1.fede5248e38f4p-1, 0x1.56011cc3b036dp-7 },
+ { 0x1.fee8d486585eep-1, 0x1.4a9cf6bda3f4cp-7 },
+ { 0x1.fef2fd00af31ap-1, 0x1.3f8ff5042a88ep-7 },
+ { 0x1.fefcce6813974p-1, 0x1.34d7dbc76d7e5p-7 },
+ { 0x1.ff064b5afffbep-1, 0x1.2a727a89a3f14p-7 },
+ { 0x1.ff0f766697c76p-1, 0x1.205dac02bd6b9p-7 },
+ { 0x1.ff18520700971p-1, 0x1.1697560347b25p-7 },
+ { 0x1.ff20e0a7ba8c2p-1, 0x1.0d1d69569b82dp-7 },
+ { 0x1.ff2924a3f7a83p-1, 0x1.03ede1a45bfeep-7 },
+ { 0x1.ff312046f2339p-1, 0x1.f60d8aa2a88f2p-8 },
+ { 0x1.ff38d5cc4227fp-1, 0x1.e4cc4abf7d065p-8 },
+ { 0x1.ff404760319b4p-1, 0x1.d4143a9dfe965p-8 },
+ { 0x1.ff47772010262p-1, 0x1.c3e1a5f5c077cp-8 },
+ { 0x1.ff4e671a85425p-1, 0x1.b430ecf4a83a8p-8 },
+ { 0x1.ff55194fe19dfp-1, 0x1.a4fe83fb9db25p-8 },
+ { 0x1.ff5b8fb26f5f6p-1, 0x1.9646f35a76623p-8 },
+ { 0x1.ff61cc26c1578p-1, 0x1.8806d70b2fc36p-8 },
+ { 0x1.ff67d08401202p-1, 0x1.7a3ade6c8b3e4p-8 },
+ { 0x1.ff6d9e943c231p-1, 0x1.6cdfcbfc1e263p-8 },
+ { 0x1.ff733814af88cp-1, 0x1.5ff2750fe7820p-8 },
+ { 0x1.ff789eb6130c9p-1, 0x1.536fc18f7ce5cp-8 },
+ { 0x1.ff7dd41ce2b4dp-1, 0x1.4754abacdf1dcp-8 },
+ { 0x1.ff82d9e1a76d8p-1, 0x1.3b9e3f9d06e3fp-8 },
+ { 0x1.ff87b1913e853p-1, 0x1.30499b503957fp-8 },
+ { 0x1.ff8c5cad200a5p-1, 0x1.2553ee2a336bfp-8 },
+ { 0x1.ff90dcaba4096p-1, 0x1.1aba78ba3af89p-8 },
+ { 0x1.ff9532f846ab0p-1, 0x1.107a8c7323a6ep-8 },
+ { 0x1.ff9960f3eb327p-1, 0x1.06918b6355624p-8 },
+ { 0x1.ff9d67f51ddbap-1, 0x1.f9f9cfd9c3035p-9 },
+ { 0x1.ffa14948549a7p-1, 0x1.e77448fb66bb9p-9 },
+ { 0x1.ffa506302ebaep-1, 0x1.d58da68fd1170p-9 },
+ { 0x1.ffa89fe5b3625p-1, 0x1.c4412bf4b8f0bp-9 },
+ { 0x1.ffac17988ef4bp-1, 0x1.b38a3af2e55b4p-9 },
+ { 0x1.ffaf6e6f4f5c0p-1, 0x1.a3645330550ffp-9 },
+ { 0x1.ffb2a5879f35ep-1, 0x1.93cb11a30d765p-9 },
+ { 0x1.ffb5bdf67fe6fp-1, 0x1.84ba3004a50d0p-9 },
+ { 0x1.ffb8b8c88295fp-1, 0x1.762d84469c18fp-9 },
+ { 0x1.ffbb970200110p-1, 0x1.6821000795a03p-9 },
+ { 0x1.ffbe599f4f9d9p-1, 0x1.5a90b00981d93p-9 },
+ { 0x1.ffc10194fcb64p-1, 0x1.4d78bba8ca5fdp-9 },
+ { 0x1.ffc38fcffbb7cp-1, 0x1.40d564548fad7p-9 },
+ { 0x1.ffc60535dd7f5p-1, 0x1.34a305080681fp-9 },
+ { 0x1.ffc862a501fd7p-1, 0x1.28de11c5031ebp-9 },
+ { 0x1.ffcaa8f4c9beap-1, 0x1.1d83170fbf6fbp-9 },
+ { 0x1.ffccd8f5c66d1p-1, 0x1.128eb96be8798p-9 },
+ { 0x1.ffcef371ea4d7p-1, 0x1.07fdb4dafea5fp-9 },
+ { 0x1.ffd0f92cb6ba7p-1, 0x1.fb99b8b8279e1p-10 },
+ { 0x1.ffd2eae369a07p-1, 0x1.e7f232d9e2630p-10 },
+ { 0x1.ffd4c94d29fdbp-1, 0x1.d4fed7195d7e8p-10 },
+ { 0x1.ffd6951b33686p-1, 0x1.c2b9cf7f893bfp-10 },
+ { 0x1.ffd84ef9009eep-1, 0x1.b11d702b3deb1p-10 },
+ { 0x1.ffd9f78c7524ap-1, 0x1.a024365f771bdp-10 },
+ { 0x1.ffdb8f7605ee7p-1, 0x1.8fc8c794b03b5p-10 },
+ { 0x1.ffdd1750e1220p-1, 0x1.8005f08d6f1efp-10 },
+ { 0x1.ffde8fb314ebfp-1, 0x1.70d6a46e07ddap-10 },
+ { 0x1.ffdff92db56e5p-1, 0x1.6235fbd7a4345p-10 },
+ { 0x1.ffe1544d01ccbp-1, 0x1.541f340697987p-10 },
+ { 0x1.ffe2a1988857cp-1, 0x1.468dadf4080abp-10 },
+ { 0x1.ffe3e19349dc7p-1, 0x1.397ced7af2b15p-10 },
+ { 0x1.ffe514bbdc197p-1, 0x1.2ce898809244ep-10 },
+ { 0x1.ffe63b8c8b5f7p-1, 0x1.20cc76202c5fap-10 },
+ { 0x1.ffe7567b7b5e1p-1, 0x1.15246dda49d47p-10 },
+ { 0x1.ffe865fac722bp-1, 0x1.09ec86c75d497p-10 },
+ { 0x1.ffe96a78a04a9p-1, 0x1.fe41cd9bb4eeep-11 },
+ { 0x1.ffea645f6d6dap-1, 0x1.e97ba3b77f306p-11 },
+ { 0x1.ffeb5415e7c44p-1, 0x1.d57f524723822p-11 },
+ { 0x1.ffec39ff380b9p-1, 0x1.c245d4b998479p-11 },
+ { 0x1.ffed167b12ac2p-1, 0x1.afc85e0f82e12p-11 },
+ { 0x1.ffede9e5d3262p-1, 0x1.9e005769dbc1dp-11 },
+ { 0x1.ffeeb49896c6dp-1, 0x1.8ce75e9f6f8a0p-11 },
+ { 0x1.ffef76e956a9fp-1, 0x1.7c7744d9378f7p-11 },
+ { 0x1.fff0312b010b5p-1, 0x1.6caa0d3582fe9p-11 },
+ { 0x1.fff0e3ad91ec2p-1, 0x1.5d79eb71e893bp-11 },
+ { 0x1.fff18ebe2b0e1p-1, 0x1.4ee1429bf7cc0p-11 },
+ { 0x1.fff232a72b48ep-1, 0x1.40daa3c89f5b6p-11 },
+ { 0x1.fff2cfb0453d9p-1, 0x1.3360ccd23db3ap-11 },
+ { 0x1.fff3661e9569dp-1, 0x1.266ea71d4f71ap-11 },
+ { 0x1.fff3f634b79f9p-1, 0x1.19ff4663ae9dfp-11 },
+ { 0x1.fff48032dbe40p-1, 0x1.0e0de78654d1ep-11 },
+ { 0x1.fff50456dab8cp-1, 0x1.0295ef6591848p-11 },
+ { 0x1.fff582dc48d30p-1, 0x1.ef25d37f49fe1p-12 },
+ { 0x1.fff5fbfc8a439p-1, 0x1.da01102b5f851p-12 },
+ { 0x1.fff66feee5129p-1, 0x1.c5b5412dcafadp-12 },
+ { 0x1.fff6dee89352ep-1, 0x1.b23a5a23e4210p-12 },
+ { 0x1.fff7491cd4af6p-1, 0x1.9f8893d8fd1c1p-12 },
+ { 0x1.fff7aebcff755p-1, 0x1.8d986a4187285p-12 },
+ { 0x1.fff80ff8911fdp-1, 0x1.7c629a822bc9ep-12 },
+ { 0x1.fff86cfd3e657p-1, 0x1.6be02102b3520p-12 },
+ { 0x1.fff8c5f702ccfp-1, 0x1.5c0a378c90bcap-12 },
+ { 0x1.fff91b102fca8p-1, 0x1.4cda5374ea275p-12 },
+ { 0x1.fff96c717b695p-1, 0x1.3e4a23d1f4702p-12 },
+ { 0x1.fff9ba420e834p-1, 0x1.30538fbb77ecdp-12 },
+ { 0x1.fffa04a7928b1p-1, 0x1.22f0b496539bdp-12 },
+ { 0x1.fffa4bc63ee9ap-1, 0x1.161be46ad3b50p-12 },
+ { 0x1.fffa8fc0e5f33p-1, 0x1.09cfa445b00ffp-12 },
+ { 0x1.fffad0b901755p-1, 0x1.fc0d55470cf51p-13 },
+ { 0x1.fffb0ecebee1bp-1, 0x1.e577bbcd49935p-13 },
+ { 0x1.fffb4a210b172p-1, 0x1.cfd4a5adec5bfp-13 },
+ { 0x1.fffb82cd9dcbfp-1, 0x1.bb1a9657ce465p-13 },
+ { 0x1.fffbb8f1049c6p-1, 0x1.a740684026555p-13 },
+ { 0x1.fffbeca6adbe9p-1, 0x1.943d4a1d1ed39p-13 },
+ { 0x1.fffc1e08f25f5p-1, 0x1.8208bc334a6a5p-13 },
+ { 0x1.fffc4d3120aa1p-1, 0x1.709a8db59f25cp-13 },
+ { 0x1.fffc7a37857d2p-1, 0x1.5feada379d8b7p-13 },
+ { 0x1.fffca53375ce3p-1, 0x1.4ff207314a102p-13 },
+ { 0x1.fffcce3b57bffp-1, 0x1.40a8c1949f75ep-13 },
+ { 0x1.fffcf564ab6b7p-1, 0x1.3207fb7420eb9p-13 },
+ { 0x1.fffd1ac4135f9p-1, 0x1.2408e9ba3327fp-13 },
+ { 0x1.fffd3e6d5cd87p-1, 0x1.16a501f0e42cap-13 },
+ { 0x1.fffd607387b07p-1, 0x1.09d5f819c9e29p-13 },
+ { 0x1.fffd80e8ce0dap-1, 0x1.fb2b792b40a22p-14 },
+ { 0x1.fffd9fdeabccep-1, 0x1.e3bcf436a1a95p-14 },
+ { 0x1.fffdbd65e5ad0p-1, 0x1.cd55277c18d05p-14 },
+ { 0x1.fffdd98e903b2p-1, 0x1.b7e94604479dcp-14 },
+ { 0x1.fffdf46816833p-1, 0x1.a36eec00926ddp-14 },
+ { 0x1.fffe0e0140857p-1, 0x1.8fdc1b2dcf7b9p-14 },
+ { 0x1.fffe26683972ap-1, 0x1.7d2737527c3f9p-14 },
+ { 0x1.fffe3daa95b18p-1, 0x1.6b4702d7d5849p-14 },
+ { 0x1.fffe53d558ae9p-1, 0x1.5a329b7d30748p-14 },
+ { 0x1.fffe68f4fa777p-1, 0x1.49e17724f4d41p-14 },
+ { 0x1.fffe7d156d244p-1, 0x1.3a4b60ba9aa4dp-14 },
+ { 0x1.fffe904222101p-1, 0x1.2b6875310f785p-14 },
+ { 0x1.fffea2860ee1ep-1, 0x1.1d312098e9dbap-14 },
+ { 0x1.fffeb3ebb267bp-1, 0x1.0f9e1b4dd36dfp-14 },
+ { 0x1.fffec47d19457p-1, 0x1.02a8673a94691p-14 },
+ { 0x1.fffed443e2787p-1, 0x1.ec929a665b449p-15 },
+ { 0x1.fffee34943b15p-1, 0x1.d4f4b4c8e09edp-15 },
+ { 0x1.fffef1960d85dp-1, 0x1.be6abbb10a5aap-15 },
+ { 0x1.fffeff32af7afp-1, 0x1.a8e8cc1fadef6p-15 },
+ { 0x1.ffff0c273bea2p-1, 0x1.94637d5bacfdbp-15 },
+ { 0x1.ffff187b6bc0ep-1, 0x1.80cfdc72220cfp-15 },
+ { 0x1.ffff2436a21dcp-1, 0x1.6e2367dc27f95p-15 },
+ { 0x1.ffff2f5fefcaap-1, 0x1.5c540b4936fd2p-15 },
+ { 0x1.ffff39fe16963p-1, 0x1.4b581b8d170fcp-15 },
+ { 0x1.ffff44178c8d2p-1, 0x1.3b2652b06c2b2p-15 },
+ { 0x1.ffff4db27f146p-1, 0x1.2bb5cc22e5db6p-15 },
+ { 0x1.ffff56d4d5e5ep-1, 0x1.1cfe010e2052dp-15 },
+ { 0x1.ffff5f8435efcp-1, 0x1.0ef6c4c84a0fep-15 },
+ { 0x1.ffff67c604180p-1, 0x1.01984165a5f36p-15 },
+ { 0x1.ffff6f9f67e55p-1, 0x1.e9b5e8d00ce76p-16 },
+ { 0x1.ffff77154e0d6p-1, 0x1.d16f5716c6c1ap-16 },
+ { 0x1.ffff7e2c6aea2p-1, 0x1.ba4f035d60e02p-16 },
+ { 0x1.ffff84e93cd75p-1, 0x1.a447b7b03f045p-16 },
+ { 0x1.ffff8b500e77cp-1, 0x1.8f4ccca7fc90dp-16 },
+ { 0x1.ffff9164f8e46p-1, 0x1.7b5223dac7336p-16 },
+ { 0x1.ffff972be5c59p-1, 0x1.684c227fcacefp-16 },
+ { 0x1.ffff9ca891572p-1, 0x1.562fac4329b48p-16 },
+ { 0x1.ffffa1de8c582p-1, 0x1.44f21e49054f2p-16 },
+ { 0x1.ffffa6d13de73p-1, 0x1.34894a5e24657p-16 },
+ { 0x1.ffffab83e54b8p-1, 0x1.24eb7254ccf83p-16 },
+ { 0x1.ffffaff99bac4p-1, 0x1.160f438c70913p-16 },
+ { 0x1.ffffb43555b5fp-1, 0x1.07ebd2a2d2844p-16 },
+ { 0x1.ffffb839e52f3p-1, 0x1.f4f12e9ab070ap-17 },
+ { 0x1.ffffbc09fa7cdp-1, 0x1.db5ad0b27805cp-17 },
+ { 0x1.ffffbfa82616bp-1, 0x1.c304efa2c6f4ep-17 },
+ { 0x1.ffffc316d9ed0p-1, 0x1.abe09e9144b5ep-17 },
+ { 0x1.ffffc6586abf6p-1, 0x1.95df988e76644p-17 },
+ { 0x1.ffffc96f1165ep-1, 0x1.80f439b4ee04bp-17 },
+ { 0x1.ffffcc5cec0c1p-1, 0x1.6d11788a69c64p-17 },
+ { 0x1.ffffcf23ff5fcp-1, 0x1.5a2adfa0b4bc4p-17 },
+ { 0x1.ffffd1c637b2bp-1, 0x1.4834877429b8fp-17 },
+ { 0x1.ffffd4456a10dp-1, 0x1.37231085c7d9ap-17 },
+ { 0x1.ffffd6a3554a1p-1, 0x1.26eb9daed6f7ep-17 },
+ { 0x1.ffffd8e1a2f22p-1, 0x1.1783ceac28910p-17 },
+ { 0x1.ffffdb01e8546p-1, 0x1.08e1badf0fcedp-17 },
+ { 0x1.ffffdd05a75eap-1, 0x1.f5f7d88472604p-18 },
+ { 0x1.ffffdeee4f810p-1, 0x1.db92b5212fb8dp-18 },
+ { 0x1.ffffe0bd3e852p-1, 0x1.c282cd3957edap-18 },
+ { 0x1.ffffe273c15b7p-1, 0x1.aab7abace48dcp-18 },
+ { 0x1.ffffe41314e06p-1, 0x1.94219bfcb4928p-18 },
+ { 0x1.ffffe59c6698bp-1, 0x1.7eb1a2075864dp-18 },
+ { 0x1.ffffe710d565ep-1, 0x1.6a597219a93d9p-18 },
+ { 0x1.ffffe8717232dp-1, 0x1.570b69502f313p-18 },
+ { 0x1.ffffe9bf4098cp-1, 0x1.44ba864670882p-18 },
+ { 0x1.ffffeafb377d5p-1, 0x1.335a62115bce2p-18 },
+ { 0x1.ffffec2641a9ep-1, 0x1.22df298214423p-18 },
+ { 0x1.ffffed413e5b7p-1, 0x1.133d96ae7e0ddp-18 },
+ { 0x1.ffffee4d01cd6p-1, 0x1.046aeabcfcdecp-18 },
+ { 0x1.ffffef4a55bd4p-1, 0x1.ecb9cfe1d8642p-19 },
+ { 0x1.fffff039f9e8fp-1, 0x1.d21397ead99cbp-19 },
+ { 0x1.fffff11ca4876p-1, 0x1.b8d094c86d374p-19 },
+ { 0x1.fffff1f302bc1p-1, 0x1.a0df0f0c626dcp-19 },
+ { 0x1.fffff2bdb904dp-1, 0x1.8a2e269750a39p-19 },
+ { 0x1.fffff37d63a36p-1, 0x1.74adc8f4064d3p-19 },
+ { 0x1.fffff43297019p-1, 0x1.604ea819f007cp-19 },
+ { 0x1.fffff4dde0118p-1, 0x1.4d0231928c6f9p-19 },
+ { 0x1.fffff57fc4a95p-1, 0x1.3aba85fe22e1fp-19 },
+ { 0x1.fffff618c3da6p-1, 0x1.296a70f414053p-19 },
+ { 0x1.fffff6a956450p-1, 0x1.1905613b3abf2p-19 },
+ { 0x1.fffff731ee681p-1, 0x1.097f6156f32c5p-19 },
+ { 0x1.fffff7b2f8ed6p-1, 0x1.f59a20caf6695p-20 },
+ { 0x1.fffff82cdcf1bp-1, 0x1.d9c73698fb1dcp-20 },
+ { 0x1.fffff89ffc4aap-1, 0x1.bf716c6168baep-20 },
+ { 0x1.fffff90cb3c81p-1, 0x1.a6852c6b58392p-20 },
+ { 0x1.fffff9735b73bp-1, 0x1.8eefd70594a88p-20 },
+ { 0x1.fffff9d446cccp-1, 0x1.789fb715aae95p-20 },
+ { 0x1.fffffa2fc5015p-1, 0x1.6383f726a8e04p-20 },
+ { 0x1.fffffa8621251p-1, 0x1.4f8c96f26a26ap-20 },
+ { 0x1.fffffad7a2652p-1, 0x1.3caa61607f920p-20 },
+ { 0x1.fffffb248c39dp-1, 0x1.2acee2f5ecdb8p-20 },
+ { 0x1.fffffb6d1e95dp-1, 0x1.19ec60b1242edp-20 },
+ { 0x1.fffffbb196132p-1, 0x1.09f5cf4dd2877p-20 },
+ { 0x1.fffffbf22c1e2p-1, 0x1.f5bd95d8730d8p-21 },
+ { 0x1.fffffc2f171e3p-1, 0x1.d9371e2ff7c35p-21 },
+ { 0x1.fffffc688a9cfp-1, 0x1.be41de54d155ap-21 },
+ { 0x1.fffffc9eb76acp-1, 0x1.a4c89e08ef4f3p-21 },
+ { 0x1.fffffcd1cbc28p-1, 0x1.8cb738399b12cp-21 },
+ { 0x1.fffffd01f36afp-1, 0x1.75fa8dbc84becp-21 },
+ { 0x1.fffffd2f57d68p-1, 0x1.608078a70dcbcp-21 },
+ { 0x1.fffffd5a2041fp-1, 0x1.4c37c0394d094p-21 },
+ { 0x1.fffffd8271d12p-1, 0x1.39100d5687bfep-21 },
+ { 0x1.fffffda86faa9p-1, 0x1.26f9df8519bd6p-21 },
+ { 0x1.fffffdcc3b117p-1, 0x1.15e6827001f18p-21 },
+ { 0x1.fffffdedf37edp-1, 0x1.05c803e4831c1p-21 },
+ { 0x1.fffffe0db6b91p-1, 0x1.ed22548cffd35p-22 },
+ { 0x1.fffffe2ba0ea5p-1, 0x1.d06ad6ecdf971p-22 },
+ { 0x1.fffffe47ccb60p-1, 0x1.b551c847fbc96p-22 },
+ { 0x1.fffffe62534d4p-1, 0x1.9bc09f112b494p-22 },
+ { 0x1.fffffe7b4c81ep-1, 0x1.83a1ff0aa239dp-22 },
+ { 0x1.fffffe92ced93p-1, 0x1.6ce1aa3fd7bddp-22 },
+ { 0x1.fffffea8ef9cfp-1, 0x1.576c72b514859p-22 },
+ { 0x1.fffffebdc2ec6p-1, 0x1.43302cc4a0da8p-22 },
+ { 0x1.fffffed15bcbap-1, 0x1.301ba221dc9bbp-22 },
+ { 0x1.fffffee3cc32cp-1, 0x1.1e1e857adc568p-22 },
+ { 0x1.fffffef5251c2p-1, 0x1.0d2966b1746f7p-22 },
+ { 0x1.ffffff0576917p-1, 0x1.fa5b4f49cc6b2p-23 },
+ { 0x1.ffffff14cfb92p-1, 0x1.dc3ae30b55c16p-23 },
+ { 0x1.ffffff233ee1dp-1, 0x1.bfd7555a3bd68p-23 },
+ { 0x1.ffffff30d18e8p-1, 0x1.a517d9e61628ap-23 },
+ { 0x1.ffffff3d9480fp-1, 0x1.8be4f8f6c951fp-23 },
+ { 0x1.ffffff4993c46p-1, 0x1.74287ded49339p-23 },
+ { 0x1.ffffff54dab72p-1, 0x1.5dcd669f2cd34p-23 },
+ { 0x1.ffffff5f74141p-1, 0x1.48bfd38302870p-23 },
+ { 0x1.ffffff6969fb8p-1, 0x1.34ecf8a3c124ap-23 },
+ { 0x1.ffffff72c5fb6p-1, 0x1.22430f521cbcfp-23 },
+ { 0x1.ffffff7b91176p-1, 0x1.10b1488aeb235p-23 },
+ { 0x1.ffffff83d3d07p-1, 0x1.0027c00a263a6p-23 },
+ { 0x1.ffffff8b962bep-1, 0x1.e12ee004efc37p-24 },
+ { 0x1.ffffff92dfba2p-1, 0x1.c3e44ae32b16bp-24 },
+ { 0x1.ffffff99b79d2p-1, 0x1.a854ea14102a8p-24 },
+ { 0x1.ffffffa0248e8p-1, 0x1.8e6761569f45dp-24 },
+ { 0x1.ffffffa62ce54p-1, 0x1.7603bac345f65p-24 },
+ { 0x1.ffffffabd69b4p-1, 0x1.5f1353cdad001p-24 },
+ { 0x1.ffffffb127525p-1, 0x1.4980cb3c80949p-24 },
+ { 0x1.ffffffb624592p-1, 0x1.3537f00b6ad4dp-24 },
+ { 0x1.ffffffbad2affp-1, 0x1.2225b12bffc68p-24 },
+ { 0x1.ffffffbf370cdp-1, 0x1.10380e1adb7e9p-24 },
+ { 0x1.ffffffc355dfdp-1, 0x1.febc107d5efaap-25 },
+ { 0x1.ffffffc733572p-1, 0x1.df0f2a0ee6946p-25 },
+ { 0x1.ffffffcad3626p-1, 0x1.c14b2188bcee4p-25 },
+ { 0x1.ffffffce39b67p-1, 0x1.a553644f7f07dp-25 },
+ { 0x1.ffffffd169d0cp-1, 0x1.8b0cfce0579dfp-25 },
+ { 0x1.ffffffd466fa5p-1, 0x1.725e7c5dd20f7p-25 },
+ { 0x1.ffffffd7344aap-1, 0x1.5b2fe547a1340p-25 },
+ { 0x1.ffffffd9d4aabp-1, 0x1.456a974e92e93p-25 },
+ { 0x1.ffffffdc4ad7ap-1, 0x1.30f93c3699078p-25 },
+ { 0x1.ffffffde9964ep-1, 0x1.1dc7b5b978cf8p-25 },
+ { 0x1.ffffffe0c2bf0p-1, 0x1.0bc30c5d52f15p-25 },
+ { 0x1.ffffffe2c92dbp-1, 0x1.f5b2be65a0c7fp-26 },
+ { 0x1.ffffffe4aed5ep-1, 0x1.d5f3a8dea7357p-26 },
+ { 0x1.ffffffe675bbdp-1, 0x1.b82915b03515bp-26 },
+ { 0x1.ffffffe81fc4ep-1, 0x1.9c3517e789488p-26 },
+ { 0x1.ffffffe9aeb97p-1, 0x1.81fb7df06136ep-26 },
+ { 0x1.ffffffeb24467p-1, 0x1.6961b8d641d06p-26 },
+ { 0x1.ffffffec81ff2p-1, 0x1.524ec4d916caep-26 },
+ { 0x1.ffffffedc95e7p-1, 0x1.3cab1343d18d1p-26 },
+ { 0x1.ffffffeefbc85p-1, 0x1.2860757487a01p-26 },
+ { 0x1.fffffff01a8b6p-1, 0x1.155a09065d4f7p-26 },
+ { 0x1.fffffff126e1ep-1, 0x1.0384250e4c9fcp-26 },
+ { 0x1.fffffff221f30p-1, 0x1.e59890b926c78p-27 },
+ { 0x1.fffffff30cd3fp-1, 0x1.c642116a8a9e3p-27 },
+ { 0x1.fffffff3e8892p-1, 0x1.a8e405e651ab6p-27 },
+ { 0x1.fffffff4b606fp-1, 0x1.8d5f98114f872p-27 },
+ { 0x1.fffffff57632dp-1, 0x1.7397c5a66e307p-27 },
+ { 0x1.fffffff629e44p-1, 0x1.5b71456c5a4c4p-27 },
+ { 0x1.fffffff6d1e56p-1, 0x1.44d26de513197p-27 },
+ { 0x1.fffffff76ef3fp-1, 0x1.2fa31d6371537p-27 },
+ { 0x1.fffffff801c1fp-1, 0x1.1bcca373b7b43p-27 },
+ { 0x1.fffffff88af67p-1, 0x1.0939ab853339fp-27 },
+ { 0x1.fffffff90b2e3p-1, 0x1.efac5187b2863p-28 },
+ { 0x1.fffffff982fc1p-1, 0x1.cf1e86235d0e6p-28 },
+ { 0x1.fffffff9f2e9fp-1, 0x1.b0a68a2128babp-28 },
+ { 0x1.fffffffa5b790p-1, 0x1.9423165bc4444p-28 },
+ { 0x1.fffffffabd229p-1, 0x1.7974e743dea3cp-28 },
+ { 0x1.fffffffb18582p-1, 0x1.607e9eacd1050p-28 },
+ { 0x1.fffffffb6d844p-1, 0x1.4924a74dec728p-28 },
+ { 0x1.fffffffbbd0aap-1, 0x1.334d19e0c2160p-28 },
+ { 0x1.fffffffc0748fp-1, 0x1.1edfa3c5f5ccap-28 },
+ { 0x1.fffffffc4c96cp-1, 0x1.0bc56f1b54701p-28 },
+ { 0x1.fffffffc8d462p-1, 0x1.f3d2185e047d9p-29 },
+ { 0x1.fffffffcc9a41p-1, 0x1.d26cb87945e87p-29 },
+ { 0x1.fffffffd01f89p-1, 0x1.b334fac4b9f99p-29 },
+ { 0x1.fffffffd36871p-1, 0x1.96076f7918d1cp-29 },
+ { 0x1.fffffffd678edp-1, 0x1.7ac2d72fc2c63p-29 },
+ { 0x1.fffffffd954aep-1, 0x1.614801550319ep-29 },
+ { 0x1.fffffffdbff2ap-1, 0x1.4979ac8b28926p-29 },
+ { 0x1.fffffffde7ba0p-1, 0x1.333c68e2d0548p-29 },
+ { 0x1.fffffffe0cd16p-1, 0x1.1e767bce37dd7p-29 },
+ { 0x1.fffffffe2f664p-1, 0x1.0b0fc5b6d05a0p-29 },
+ { 0x1.fffffffe4fa30p-1, 0x1.f1e3523b41d7dp-30 },
+ { 0x1.fffffffe6daf7p-1, 0x1.d00de6608effep-30 },
+ { 0x1.fffffffe89b0cp-1, 0x1.b0778b7b3301ap-30 },
+ { 0x1.fffffffea3c9ap-1, 0x1.92fb04ec0f6cfp-30 },
+ { 0x1.fffffffebc1a9p-1, 0x1.77756ec9f78fap-30 },
+ { 0x1.fffffffed2c21p-1, 0x1.5dc61922d5a06p-30 },
+ { 0x1.fffffffee7dc8p-1, 0x1.45ce65699ff6dp-30 },
+ { 0x1.fffffffefb847p-1, 0x1.2f71a5f159970p-30 },
+ { 0x1.ffffffff0dd2bp-1, 0x1.1a94ff571654fp-30 },
+ { 0x1.ffffffff1ede9p-1, 0x1.071f4bbea09ecp-30 },
+ { 0x1.ffffffff2ebdap-1, 0x1.e9f1ff8ddd774p-31 },
+ { 0x1.ffffffff3d843p-1, 0x1.c818223a202c7p-31 },
+ { 0x1.ffffffff4b453p-1, 0x1.a887bd2b4404dp-31 },
+ { 0x1.ffffffff58126p-1, 0x1.8b1a336c5eb6bp-31 },
+ { 0x1.ffffffff63fc3p-1, 0x1.6fab63324088ap-31 },
+ { 0x1.ffffffff6f121p-1, 0x1.56197e30205bap-31 },
+ { 0x1.ffffffff79626p-1, 0x1.3e44e45301b92p-31 },
+ { 0x1.ffffffff82fabp-1, 0x1.281000bfe4c3fp-31 },
+ { 0x1.ffffffff8be77p-1, 0x1.135f28f2d50b4p-31 },
+ { 0x1.ffffffff94346p-1, 0x1.00187dded5975p-31 },
+ { 0x1.ffffffff9bec8p-1, 0x1.dc479de0ef001p-32 },
+ { 0x1.ffffffffa319fp-1, 0x1.bad4fdad3caa1p-32 },
+ { 0x1.ffffffffa9c63p-1, 0x1.9baed3ed27ab8p-32 },
+ { 0x1.ffffffffaffa4p-1, 0x1.7ead9ce4285bbp-32 },
+ { 0x1.ffffffffb5be5p-1, 0x1.63ac6b4edc88ep-32 },
+ { 0x1.ffffffffbb1a2p-1, 0x1.4a88be2a6390cp-32 },
+ { 0x1.ffffffffc014ep-1, 0x1.332259185f1a0p-32 },
+ { 0x1.ffffffffc4b56p-1, 0x1.1d5b1f3793044p-32 },
+ { 0x1.ffffffffc901cp-1, 0x1.0916f04b6e18bp-32 },
+ { 0x1.ffffffffccfffp-1, 0x1.ec77101de6926p-33 },
+ { 0x1.ffffffffd0b56p-1, 0x1.c960bf23153e0p-33 },
+ { 0x1.ffffffffd4271p-1, 0x1.a8bd20fc65ef7p-33 },
+ { 0x1.ffffffffd759dp-1, 0x1.8a61745ec7d1dp-33 },
+ { 0x1.ffffffffda520p-1, 0x1.6e25d0e756261p-33 },
+ { 0x1.ffffffffdd13cp-1, 0x1.53e4f7d1666cbp-33 },
+ { 0x1.ffffffffdfa2dp-1, 0x1.3b7c27a7ddb0ep-33 },
+ { 0x1.ffffffffe202dp-1, 0x1.24caf2c32af14p-33 },
+ { 0x1.ffffffffe4371p-1, 0x1.0fb3186804d0fp-33 },
+ { 0x1.ffffffffe642ap-1, 0x1.f830c0bb41fd7p-34 },
+ { 0x1.ffffffffe8286p-1, 0x1.d3c0f1a91c846p-34 },
+ { 0x1.ffffffffe9eb0p-1, 0x1.b1e5acf351d87p-34 },
+ { 0x1.ffffffffeb8d0p-1, 0x1.92712d259ce66p-34 },
+ { 0x1.ffffffffed10ap-1, 0x1.7538c60a04476p-34 },
+ { 0x1.ffffffffee782p-1, 0x1.5a14b04b47879p-34 },
+ { 0x1.ffffffffefc57p-1, 0x1.40dfd87456f4cp-34 },
+ { 0x1.fffffffff0fa7p-1, 0x1.2977b1172b9d5p-34 },
+ { 0x1.fffffffff218fp-1, 0x1.13bc07e891491p-34 },
+ { 0x1.fffffffff3227p-1, 0x1.ff1dbb4300811p-35 },
+ { 0x1.fffffffff4188p-1, 0x1.d9a880f306bd8p-35 },
+ { 0x1.fffffffff4fc9p-1, 0x1.b6e45220b55e0p-35 },
+ { 0x1.fffffffff5cfdp-1, 0x1.96a0b33f2c4dap-35 },
+ { 0x1.fffffffff6939p-1, 0x1.78b07e9e924acp-35 },
+ { 0x1.fffffffff748ep-1, 0x1.5ce9ab1670dd2p-35 },
+ { 0x1.fffffffff7f0dp-1, 0x1.4325167006bb0p-35 },
+ { 0x1.fffffffff88c5p-1, 0x1.2b3e53538ff3fp-35 },
+ { 0x1.fffffffff91c6p-1, 0x1.15137a7f44864p-35 },
+ { 0x1.fffffffff9a1bp-1, 0x1.0084ff125639dp-35 },
+ { 0x1.fffffffffa1d2p-1, 0x1.daeb0b7311ec7p-36 },
+ { 0x1.fffffffffa8f6p-1, 0x1.b7937d1c40c52p-36 },
+ { 0x1.fffffffffaf92p-1, 0x1.96d082f59ab06p-36 },
+ { 0x1.fffffffffb5b0p-1, 0x1.7872d9fa10aadp-36 },
+ { 0x1.fffffffffbb58p-1, 0x1.5c4e8e37bc7d0p-36 },
+ { 0x1.fffffffffc095p-1, 0x1.423ac0df49a40p-36 },
+ { 0x1.fffffffffc56dp-1, 0x1.2a117230ad284p-36 },
+ { 0x1.fffffffffc9e8p-1, 0x1.13af4f04f9998p-36 },
+ { 0x1.fffffffffce0dp-1, 0x1.fde703724e560p-37 },
+ { 0x1.fffffffffd1e1p-1, 0x1.d77f0c82e7641p-37 },
+ { 0x1.fffffffffd56cp-1, 0x1.b3ee02611d7ddp-37 },
+ { 0x1.fffffffffd8b3p-1, 0x1.92ff33023d5bdp-37 },
+ { 0x1.fffffffffdbbap-1, 0x1.7481a9e69f53fp-37 },
+ { 0x1.fffffffffde86p-1, 0x1.5847eda620959p-37 },
+ { 0x1.fffffffffe11dp-1, 0x1.3e27c1fcc74bdp-37 },
+ { 0x1.fffffffffe380p-1, 0x1.25f9ee0b923dcp-37 },
+ { 0x1.fffffffffe5b6p-1, 0x1.0f9a0686531ffp-37 },
+ { 0x1.fffffffffe7c0p-1, 0x1.f5cc7718082afp-38 },
+ { 0x1.fffffffffe9a2p-1, 0x1.cf7e53d6a2ca5p-38 },
+ { 0x1.fffffffffeb60p-1, 0x1.ac0f5f3229372p-38 },
+ { 0x1.fffffffffecfbp-1, 0x1.8b498644847eap-38 },
+ { 0x1.fffffffffee77p-1, 0x1.6cfa9bcca59dcp-38 },
+ { 0x1.fffffffffefd6p-1, 0x1.50f411d4fd2cdp-38 },
+ { 0x1.ffffffffff11ap-1, 0x1.370ab8327af5ep-38 },
+ { 0x1.ffffffffff245p-1, 0x1.1f167f88c6b6ep-38 },
+ { 0x1.ffffffffff359p-1, 0x1.08f24085d4597p-38 },
+ { 0x1.ffffffffff457p-1, 0x1.e8f70e181d619p-39 },
+ { 0x1.ffffffffff542p-1, 0x1.c324c20e337dcp-39 },
+ { 0x1.ffffffffff61bp-1, 0x1.a03261574b54ep-39 },
+ { 0x1.ffffffffff6e3p-1, 0x1.7fe903cdf5855p-39 },
+ { 0x1.ffffffffff79bp-1, 0x1.6215c58da3450p-39 },
+ { 0x1.ffffffffff845p-1, 0x1.46897d4b69fc6p-39 },
+ { 0x1.ffffffffff8e2p-1, 0x1.2d1877d731b7bp-39 },
+ { 0x1.ffffffffff973p-1, 0x1.159a386b11517p-39 },
+ { 0x1.ffffffffff9f8p-1, 0x1.ffd27ae9393cep-40 },
+ { 0x1.ffffffffffa73p-1, 0x1.d7c593130dd0bp-40 },
+ { 0x1.ffffffffffae4p-1, 0x1.b2cd607c79bcfp-40 },
+ { 0x1.ffffffffffb4cp-1, 0x1.90ae4d3405651p-40 },
+ { 0x1.ffffffffffbadp-1, 0x1.71312dd1759e2p-40 },
+ { 0x1.ffffffffffc05p-1, 0x1.5422ef5d8949dp-40 },
+ { 0x1.ffffffffffc57p-1, 0x1.39544b0ecc957p-40 },
+ { 0x1.ffffffffffca2p-1, 0x1.20997f73e73ddp-40 },
+ { 0x1.ffffffffffce7p-1, 0x1.09ca0eaacd277p-40 },
+ { 0x1.ffffffffffd27p-1, 0x1.e9810295890ecp-41 },
+ { 0x1.ffffffffffd62p-1, 0x1.c2b45b5aa4a1dp-41 },
+ { 0x1.ffffffffffd98p-1, 0x1.9eee068fa7596p-41 },
+ { 0x1.ffffffffffdcap-1, 0x1.7df2b399c10a8p-41 },
+ { 0x1.ffffffffffdf8p-1, 0x1.5f8b87a31bd85p-41 },
+ { 0x1.ffffffffffe22p-1, 0x1.4385c96e9a2d9p-41 },
+ { 0x1.ffffffffffe49p-1, 0x1.29b2933ef4cbcp-41 },
+ { 0x1.ffffffffffe6cp-1, 0x1.11e68a6378f8ap-41 },
+ { 0x1.ffffffffffe8dp-1, 0x1.f7f338086a86bp-42 },
+ { 0x1.ffffffffffeabp-1, 0x1.cf8d7d9ce040ap-42 },
+ { 0x1.ffffffffffec7p-1, 0x1.aa577251ae484p-42 },
+ { 0x1.ffffffffffee1p-1, 0x1.8811d739efb5ep-42 },
+ { 0x1.ffffffffffef8p-1, 0x1.68823e52970bep-42 },
+ { 0x1.fffffffffff0ep-1, 0x1.4b72ae68e8b4cp-42 },
+ { 0x1.fffffffffff22p-1, 0x1.30b14dbe876bcp-42 },
+ { 0x1.fffffffffff34p-1, 0x1.181012ef86610p-42 },
+ { 0x1.fffffffffff45p-1, 0x1.01647ba798744p-42 },
+ { 0x1.fffffffffff54p-1, 0x1.d90e917701675p-43 },
+ { 0x1.fffffffffff62p-1, 0x1.b2a87e86d0c8ap-43 },
+ { 0x1.fffffffffff6fp-1, 0x1.8f53dcb377293p-43 },
+ { 0x1.fffffffffff7bp-1, 0x1.6ed2f2515e933p-43 },
+ { 0x1.fffffffffff86p-1, 0x1.50ecc9ed47f19p-43 },
+ { 0x1.fffffffffff90p-1, 0x1.356cd5ce7799ep-43 },
+ { 0x1.fffffffffff9ap-1, 0x1.1c229a587ab78p-43 },
+ { 0x1.fffffffffffa2p-1, 0x1.04e15ecc7f3f6p-43 },
+ { 0x1.fffffffffffaap-1, 0x1.deffc7e6a6017p-44 },
+ { 0x1.fffffffffffb1p-1, 0x1.b7b040832f310p-44 },
+ { 0x1.fffffffffffb8p-1, 0x1.938e021f36d76p-44 },
+ { 0x1.fffffffffffbep-1, 0x1.7258610b3b233p-44 },
+ { 0x1.fffffffffffc3p-1, 0x1.53d3bfc82a909p-44 },
+ { 0x1.fffffffffffc8p-1, 0x1.37c92babdc2fdp-44 },
+ { 0x1.fffffffffffcdp-1, 0x1.1e06010120f6ap-44 },
+ { 0x1.fffffffffffd1p-1, 0x1.065b9616170d4p-44 },
+ { 0x1.fffffffffffd5p-1, 0x1.e13dd96b3753ap-45 },
+ { 0x1.fffffffffffd9p-1, 0x1.b950d32467392p-45 },
+ { 0x1.fffffffffffdcp-1, 0x1.94a72263259a5p-45 },
+ { 0x1.fffffffffffdfp-1, 0x1.72fd93e036cdcp-45 },
+ { 0x1.fffffffffffe2p-1, 0x1.54164576929abp-45 },
+ { 0x1.fffffffffffe4p-1, 0x1.37b83c521fe96p-45 },
+ { 0x1.fffffffffffe7p-1, 0x1.1daf033182e96p-45 },
+ { 0x1.fffffffffffe9p-1, 0x1.05ca50205d26ap-45 },
+ { 0x1.fffffffffffebp-1, 0x1.dfbb6235639fap-46 },
+ { 0x1.fffffffffffedp-1, 0x1.b7807e294781fp-46 },
+ { 0x1.fffffffffffeep-1, 0x1.9298add70a734p-46 },
+ { 0x1.ffffffffffff0p-1, 0x1.70beaf9c7ffb6p-46 },
+ { 0x1.ffffffffffff1p-1, 0x1.51b2cd6709222p-46 },
+ { 0x1.ffffffffffff3p-1, 0x1.353a6cf7f7fffp-46 },
+ { 0x1.ffffffffffff4p-1, 0x1.1b1fa8cbe84a7p-46 },
+ { 0x1.ffffffffffff5p-1, 0x1.0330f0fd69921p-46 },
+ { 0x1.ffffffffffff6p-1, 0x1.da81670f96f9bp-47 },
+ { 0x1.ffffffffffff7p-1, 0x1.b24a16b4d09aap-47 },
+ { 0x1.ffffffffffff7p-1, 0x1.8d6eeb6efdbd6p-47 },
+ { 0x1.ffffffffffff8p-1, 0x1.6ba91ac734785p-47 },
+ { 0x1.ffffffffffff9p-1, 0x1.4cb7966770ab5p-47 },
+ { 0x1.ffffffffffff9p-1, 0x1.305e9721d0981p-47 },
+ { 0x1.ffffffffffffap-1, 0x1.1667311fff70ap-47 },
+ { 0x1.ffffffffffffbp-1, 0x1.fd3de10d62855p-48 },
+ { 0x1.ffffffffffffbp-1, 0x1.d1aefbcd48d0cp-48 },
+ { 0x1.ffffffffffffbp-1, 0x1.a9cc93c25aca9p-48 },
+ { 0x1.ffffffffffffcp-1, 0x1.85487ee3ea735p-48 },
+ { 0x1.ffffffffffffcp-1, 0x1.63daf8b4b1e0cp-48 },
+ { 0x1.ffffffffffffdp-1, 0x1.45421e69a6ca1p-48 },
+ { 0x1.ffffffffffffdp-1, 0x1.294175802d99ap-48 },
+ { 0x1.ffffffffffffdp-1, 0x1.0fa17bf41068fp-48 },
+ { 0x1.ffffffffffffdp-1, 0x1.f05e82aae2bb9p-49 },
+ { 0x1.ffffffffffffep-1, 0x1.c578101b29058p-49 },
+ { 0x1.ffffffffffffep-1, 0x1.9e39dc5dd2f7cp-49 },
+ { 0x1.ffffffffffffep-1, 0x1.7a553a728bbf2p-49 },
+ { 0x1.ffffffffffffep-1, 0x1.5982008db1304p-49 },
+ { 0x1.ffffffffffffep-1, 0x1.3b7e00422e51bp-49 },
+ { 0x1.ffffffffffffep-1, 0x1.200c898d9ee3ep-49 },
+ { 0x1.fffffffffffffp-1, 0x1.06f5f7eb65a56p-49 },
+ { 0x1.fffffffffffffp-1, 0x1.e00e9148a1d25p-50 },
+ { 0x1.fffffffffffffp-1, 0x1.b623734024e92p-50 },
+ { 0x1.fffffffffffffp-1, 0x1.8fd4e01891bf8p-50 },
+ { 0x1.fffffffffffffp-1, 0x1.6cd44c7470d89p-50 },
+ { 0x1.fffffffffffffp-1, 0x1.4cd9c04158cd7p-50 },
+ { 0x1.fffffffffffffp-1, 0x1.2fa34bf5c8344p-50 },
+ { 0x1.fffffffffffffp-1, 0x1.14f4890ff2461p-50 },
+ { 0x1.fffffffffffffp-1, 0x1.f92c49dfa4df5p-51 },
+ { 0x1.fffffffffffffp-1, 0x1.ccaaea71ab0dfp-51 },
+ { 0x1.fffffffffffffp-1, 0x1.a40829f001197p-51 },
+ { 0x1.0000000000000p+0, 0x1.7eef13b59e96cp-51 },
+ { 0x1.0000000000000p+0, 0x1.5d11e1a252bf5p-51 },
+ { 0x1.0000000000000p+0, 0x1.3e296303b2297p-51 },
+ { 0x1.0000000000000p+0, 0x1.21f47009f43cep-51 },
+ { 0x1.0000000000000p+0, 0x1.083768c5e4541p-51 },
+ { 0x1.0000000000000p+0, 0x1.e1777d831265ep-52 },
+ { 0x1.0000000000000p+0, 0x1.b69f10b0191b5p-52 },
+ { 0x1.0000000000000p+0, 0x1.8f8a3a05b5b52p-52 },
+ { 0x1.0000000000000p+0, 0x1.6be573c40c8e7p-52 },
+ { 0x1.0000000000000p+0, 0x1.4b645ba991fdbp-52 },
+ { 0x1.0000000000000p+0, 0x1.2dc119095729fp-52 },
+ },
+};
diff --git a/contrib/arm-optimized-routines/pl/math/erfc_1u8.c b/contrib/arm-optimized-routines/pl/math/erfc_1u8.c
new file mode 100644
index 000000000000..7f2004e9335d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erfc_1u8.c
@@ -0,0 +1,153 @@
+/*
+ * Double-precision erfc(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Shift 0x1p45
+#define P20 0x1.5555555555555p-2 /* 1/3. */
+#define P21 0x1.5555555555555p-1 /* 2/3. */
+
+#define P40 0x1.999999999999ap-4 /* 1/10. */
+#define P41 0x1.999999999999ap-2 /* 2/5. */
+#define P42 0x1.11111111111111p-3 /* 2/15. */
+
+#define P50 0x1.5555555555555p-3 /* 1/6. */
+#define P51 0x1.c71c71c71c71cp-3 /* 2/9. */
+#define P52 0x1.6c16c16c16c17p-5 /* 2/45. */
+
+/* Qi = (i+1) / i. */
+#define Q5 0x1.3333333333333p0
+#define Q6 0x1.2aaaaaaaaaaabp0
+#define Q7 0x1.2492492492492p0
+#define Q8 0x1.2p0
+#define Q9 0x1.1c71c71c71c72p0
+
+/* Ri = -2 * i / ((i+1)*(i+2)). */
+#define R5 -0x1.e79e79e79e79ep-3
+#define R6 -0x1.b6db6db6db6dbp-3
+#define R7 -0x1.8e38e38e38e39p-3
+#define R8 -0x1.6c16c16c16c17p-3
+#define R9 -0x1.4f2094f2094f2p-3
+
+/* Fast erfc approximation based on series expansion near x rounded to
+ nearest multiple of 1/128.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
+
+ poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
+ + (2/15 r^4 - 2/5 r^2 + 1/10) d^4
+ - r * (2/45 r^4 - 2/9 r^2 + 1/6) d^5
+ + p6(r) d^6 + ... + p10(r) d^10
+
+ Polynomials p6(r) to p10(r) are computed using recurrence relation
+
+ 2(i+1)p_i + 2r(i+2)p_{i+1} + (i+2)(i+3)p_{i+2} = 0,
+ with p0 = 1, and p1(r) = -r.
+
+ Values of erfc(r) and scale(r) are read from lookup tables. Stored values
+ are scaled to avoid hitting the subnormal range.
+
+ Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
+
+ Maximum measured error: 1.71 ULP
+ erfc(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608
+ want 0x1.e15fcbea3e7adp-608. */
+double
+erfc (double x)
+{
+ /* Get top words and sign. */
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & 0x7fffffffffffffff;
+ double a = asdouble (ia);
+ uint64_t sign = ix & ~0x7fffffffffffffff;
+
+ /* erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2. */
+ if (unlikely (ia >= 0x7ff0000000000000))
+ return asdouble (sign >> 1) + 1.0 / x; /* Special cases. */
+
+ /* Return early for large enough negative values. */
+ if (x < -6.0)
+ return 2.0;
+
+ /* For |x| < 3487.0/128.0, the following approximation holds. */
+ if (likely (ia < 0x403b3e0000000000))
+ {
+ /* |x| < 0x1p-511 => accurate to 0.5 ULP. */
+ if (unlikely (ia < asuint64 (0x1p-511)))
+ return 1.0 - x;
+
+ /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale
+ to 2/sqrt(pi), when x reduced to r = 0. */
+ double z = a + Shift;
+ uint64_t i = asuint64 (z);
+ double r = z - Shift;
+ /* These values are scaled by 2^128. */
+ double erfcr = __erfc_data.tab[i].erfc;
+ double scale = __erfc_data.tab[i].scale;
+
+ /* erfc(x) ~ erfc(r) - scale * d * poly (r, d). */
+ double d = a - r;
+ double d2 = d * d;
+ double r2 = r * r;
+ /* Compute p_i as a regular (low-order) polynomial. */
+ double p1 = -r;
+ double p2 = fma (P21, r2, -P20);
+ double p3 = -r * fma (P20, r2, -0.5);
+ double p4 = fma (fma (P42, r2, -P41), r2, P40);
+ double p5 = -r * fma (fma (P52, r2, -P51), r2, P50);
+ /* Compute p_i using recurrence relation:
+ p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */
+ double p6 = fma (Q5 * r, p5, p4) * R5;
+ double p7 = fma (Q6 * r, p6, p5) * R6;
+ double p8 = fma (Q7 * r, p7, p6) * R7;
+ double p9 = fma (Q8 * r, p8, p7) * R8;
+ double p10 = fma (Q9 * r, p9, p8) * R9;
+ /* Compute polynomial in d using pairwise Horner scheme. */
+ double p90 = fma (p10, d, p9);
+ double p78 = fma (p8, d, p7);
+ double p56 = fma (p6, d, p5);
+ double p34 = fma (p4, d, p3);
+ double p12 = fma (p2, d, p1);
+ double y = fma (p90, d2, p78);
+ y = fma (y, d2, p56);
+ y = fma (y, d2, p34);
+ y = fma (y, d2, p12);
+
+ y = fma (-fma (y, d2, d), scale, erfcr);
+
+ /* Handle sign and scale back in a single fma. */
+ double off = asdouble (sign >> 1);
+ double fac = asdouble (asuint64 (0x1p-128) | sign);
+ y = fma (y, fac, off);
+
+ if (unlikely (x > 26.0))
+ {
+ /* The underflow exception needs to be signaled explicitly when
+ result gets into the subnormal range. */
+ if (unlikely (y < 0x1p-1022))
+ force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+ /* Set errno to ERANGE if result rounds to 0. */
+ return __math_check_uflow (y);
+ }
+
+ return y;
+ }
+ /* Above the threshold (x > 3487.0/128.0) erfc is constant and needs to raise
+ underflow exception for positive x. */
+ return __math_uflow (0);
+}
+
+PL_SIG (S, D, 1, erfc, -6.0, 28.0)
+PL_TEST_ULP (erfc, 1.21)
+PL_TEST_SYM_INTERVAL (erfc, 0, 0x1p-26, 40000)
+PL_TEST_INTERVAL (erfc, 0x1p-26, 28.0, 100000)
+PL_TEST_INTERVAL (erfc, -0x1p-26, -6.0, 100000)
+PL_TEST_INTERVAL (erfc, 28.0, inf, 40000)
+PL_TEST_INTERVAL (erfc, -6.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erfc_data.c b/contrib/arm-optimized-routines/pl/math/erfc_data.c
new file mode 100644
index 000000000000..40f72a4d6d5b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erfc_data.c
@@ -0,0 +1,3507 @@
+/*
+ * Data used in double-precision erfc(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Lookup table used in erfc.
+ For each possible rounded input r (multiples of 1/128), between
+ r = 0.0 and r = ~27.0 (3488 values):
+ - the first entry __erfc_data.tab.erfc contains the values of erfc(r),
+ - the second entry __erfc_data.tab.scale contains the values of
+ 2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore
+ they are scaled by a large enough value 2^128 (fits in 8bit). */
+const struct erfc_data __erfc_data = {
+ .tab = { { 0x1p128, 0x1.20dd750429b6dp128 },
+ { 0x1.fb7c9030853b3p127, 0x1.20d8f1975c85dp128 },
+ { 0x1.f6f9447be0743p127, 0x1.20cb67bd452c7p128 },
+ { 0x1.f27640f9853d9p127, 0x1.20b4d8bac36c1p128 },
+ { 0x1.edf3a9ba22dadp127, 0x1.209546ad13ccfp128 },
+ { 0x1.e971a2c4436aep127, 0x1.206cb4897b148p128 },
+ { 0x1.e4f05010eca8cp127, 0x1.203b261cd0053p128 },
+ { 0x1.e06fd58842c7ep127, 0x1.2000a00ae3804p128 },
+ { 0x1.dbf056fe2df35p127, 0x1.1fbd27cdc72d3p128 },
+ { 0x1.d771f82f02f4ep127, 0x1.1f70c3b4f2cc8p128 },
+ { 0x1.d2f4dcbc2f894p127, 0x1.1f1b7ae44867fp128 },
+ { 0x1.ce792828eae5cp127, 0x1.1ebd5552f795bp128 },
+ { 0x1.c9fefdd6eaf19p127, 0x1.1e565bca400d4p128 },
+ { 0x1.c58681031eb6ap127, 0x1.1de697e413d29p128 },
+ { 0x1.c10fd4c26e896p127, 0x1.1d6e14099944ap128 },
+ { 0x1.bc9b1bfe82687p127, 0x1.1cecdb718d61cp128 },
+ { 0x1.b82879728f11ep127, 0x1.1c62fa1e869b6p128 },
+ { 0x1.b3b80fa82a4bbp127, 0x1.1bd07cdd189acp128 },
+ { 0x1.af4a00f426daap127, 0x1.1b357141d95d5p128 },
+ { 0x1.aade6f7378a0ep127, 0x1.1a91e5a748165p128 },
+ { 0x1.a6757d08215d8p127, 0x1.19e5e92b964abp128 },
+ { 0x1.a20f4b5626818p127, 0x1.19318bae53a04p128 },
+ { 0x1.9dabfbc090901p127, 0x1.1874ddcdfce24p128 },
+ { 0x1.994baf66747adp127, 0x1.17aff0e56ec1p128 },
+ { 0x1.94ee8720076b6p127, 0x1.16e2d7093cd8cp128 },
+ { 0x1.9094a37bbd66ep127, 0x1.160da304ed92fp128 },
+ { 0x1.8c3e24bb73372p127, 0x1.153068581b781p128 },
+ { 0x1.87eb2ad1a4032p127, 0x1.144b3b337c90cp128 },
+ { 0x1.839bd55eaafc8p127, 0x1.135e3075d076bp128 },
+ { 0x1.7f5043ae11862p127, 0x1.12695da8b5bdep128 },
+ { 0x1.7b0894b3ea35cp127, 0x1.116cd8fd67618p128 },
+ { 0x1.76c4e70a390e7p127, 0x1.1068b94962e5ep128 },
+ { 0x1.728558ee694fcp127, 0x1.0f5d1602f7e41p128 },
+ { 0x1.6e4a083ed132fp127, 0x1.0e4a073dc1b91p128 },
+ { 0x1.6a13127843ec1p127, 0x1.0d2fa5a70c168p128 },
+ { 0x1.65e094b3b2413p127, 0x1.0c0e0a8223359p128 },
+ { 0x1.61b2aba3da093p127, 0x1.0ae54fa490723p128 },
+ { 0x1.5d89739304dcfp127, 0x1.09b58f724416bp128 },
+ { 0x1.59650860d6469p127, 0x1.087ee4d9ad247p128 },
+ { 0x1.5545858029b39p127, 0x1.07416b4fbfe7cp128 },
+ { 0x1.512b05f5006e1p127, 0x1.05fd3ecbec298p128 },
+ { 0x1.4d15a4527fdc7p127, 0x1.04b27bc403d3p128 },
+ { 0x1.49057ab900447p127, 0x1.03613f2812dafp128 },
+ { 0x1.44faa2d42c4ap127, 0x1.0209a65e29545p128 },
+ { 0x1.40f535d93160ep127, 0x1.00abcf3e187a9p128 },
+ { 0x1.3cf54c850162p127, 0x1.fe8fb01a47307p127 },
+ { 0x1.38faff1aa574ap127, 0x1.fbbbbef34b4b2p127 },
+ { 0x1.35066561a275dp127, 0x1.f8dc092d58ff8p127 },
+ { 0x1.311796a46f064p127, 0x1.f5f0cdaf15313p127 },
+ { 0x1.2d2ea9aefb636p127, 0x1.f2fa4c16c0019p127 },
+ { 0x1.294bb4cd4b2bdp127, 0x1.eff8c4b1375dbp127 },
+ { 0x1.256ecdca212ccp127, 0x1.ecec7870ebca8p127 },
+ { 0x1.219809edbd524p127, 0x1.e9d5a8e4c934ep127 },
+ { 0x1.1dc77dfcacd02p127, 0x1.e6b4982f158b9p127 },
+ { 0x1.19fd3e36ac96ap127, 0x1.e38988fc46e72p127 },
+ { 0x1.16395e559e218p127, 0x1.e054be79d3042p127 },
+ { 0x1.127bf18c8eadcp127, 0x1.dd167c4cf9d2ap127 },
+ { 0x1.0ec50a86d0dd4p127, 0x1.d9cf06898cdafp127 },
+ { 0x1.0b14bb6728cd8p127, 0x1.d67ea1a8b5368p127 },
+ { 0x1.076b15c70aa28p127, 0x1.d325927fb9d89p127 },
+ { 0x1.03c82ab5eb831p127, 0x1.cfc41e36c7df9p127 },
+ { 0x1.002c0ab8a5018p127, 0x1.cc5a8a3fbea4p127 },
+ { 0x1.f92d8b91d5cc7p126, 0x1.c8e91c4d01368p127 },
+ { 0x1.f210d6a9a6a31p126, 0x1.c5701a484ef9dp127 },
+ { 0x1.eb02147ce245cp126, 0x1.c1efca49a5011p127 },
+ { 0x1.e40161b701275p126, 0x1.be68728e29d5ep127 },
+ { 0x1.dd0ed9ea4bdd6p126, 0x1.bada596f25436p127 },
+ { 0x1.d62a978f7c957p126, 0x1.b745c55905bf8p127 },
+ { 0x1.cf54b4058455fp126, 0x1.b3aafcc27502ep127 },
+ { 0x1.c88d479173ccep126, 0x1.b00a46237d5bep127 },
+ { 0x1.c1d4695e87644p126, 0x1.ac63e7ecc1411p127 },
+ { 0x1.bb2a2f7e5652p126, 0x1.a8b8287ec6a09p127 },
+ { 0x1.b48eaee924501p126, 0x1.a5074e215762p127 },
+ { 0x1.ae01fb7e55a66p126, 0x1.a1519efaf889ep127 },
+ { 0x1.a78428050527ep126, 0x1.9d97610879642p127 },
+ { 0x1.a115462cbbc17p126, 0x1.99d8da149c13fp127 },
+ { 0x1.9ab5668e4930ap126, 0x1.96164fafd8de3p127 },
+ { 0x1.946498acbd766p126, 0x1.925007283d7aap127 },
+ { 0x1.8e22eaf68291ep126, 0x1.8e86458169af8p127 },
+ { 0x1.87f06ac6960c4p126, 0x1.8ab94f6caa71dp127 },
+ { 0x1.81cd2465e1d96p126, 0x1.86e9694134b9ep127 },
+ { 0x1.7bb9230cb40b4p126, 0x1.8316d6f48133dp127 },
+ { 0x1.75b470e454d35p126, 0x1.7f41dc12c9e89p127 },
+ { 0x1.6fbf1708ba47cp126, 0x1.7b6abbb7aaf19p127 },
+ { 0x1.69d91d8a595dap126, 0x1.7791b886e7403p127 },
+ { 0x1.64028b7013867p126, 0x1.73b714a552763p127 },
+ { 0x1.5e3b66b9405a9p126, 0x1.6fdb11b1e0c34p127 },
+ { 0x1.5883b45fd2b63p126, 0x1.6bfdf0beddaf5p127 },
+ { 0x1.52db785a98acap126, 0x1.681ff24b4ab04p127 },
+ { 0x1.4d42b59f95afap126, 0x1.6441563c665d4p127 },
+ { 0x1.47b96e267647ap126, 0x1.60625bd75d07bp127 },
+ { 0x1.423fa2eb1cb59p126, 0x1.5c8341bb23767p127 },
+ { 0x1.3cd553f045d45p126, 0x1.58a445da7c74cp127 },
+ { 0x1.377a8042458d1p126, 0x1.54c5a57629dbp127 },
+ { 0x1.322f25f9da2fdp126, 0x1.50e79d1749ac9p127 },
+ { 0x1.2cf3423f15fdfp126, 0x1.4d0a6889dfd9fp127 },
+ { 0x1.27c6d14c5e341p126, 0x1.492e42d78d2c5p127 },
+ { 0x1.22a9ce717edcbp126, 0x1.4553664273d24p127 },
+ { 0x1.1d9c3416d2b4bp126, 0x1.417a0c4049fdp127 },
+ { 0x1.189dfbc07e69p126, 0x1.3da26d759aef5p127 },
+ { 0x1.13af1e11be721p126, 0x1.39ccc1b136d5ap127 },
+ { 0x1.0ecf92d046d22p126, 0x1.35f93fe7d1b3dp127 },
+ { 0x1.09ff50e7b3f93p126, 0x1.32281e2fd1a92p127 },
+ { 0x1.053e4e6d0c10bp126, 0x1.2e5991bd4cbfcp127 },
+ { 0x1.008c80a24ff1p126, 0x1.2a8dcede3673bp127 },
+ { 0x1.f7d3b7f436013p125, 0x1.26c508f6bd0ffp127 },
+ { 0x1.eeaca836a27ccp125, 0x1.22ff727dd6f7bp127 },
+ { 0x1.e5a3b7c9b56dap125, 0x1.1f3d3cf9ffe5ap127 },
+ { 0x1.dcb8cae2d747fp125, 0x1.1b7e98fe26217p127 },
+ { 0x1.d3ebc436b0f26p125, 0x1.17c3b626c7a12p127 },
+ { 0x1.cb3c8500ea349p125, 0x1.140cc3173f007p127 },
+ { 0x1.c2aaed0bfcfeep125, 0x1.1059ed7740313p127 },
+ { 0x1.ba36dab91c0e9p125, 0x1.0cab61f084b93p127 },
+ { 0x1.b1e02b082b72p125, 0x1.09014c2ca74dap127 },
+ { 0x1.a9a6b99fc973bp125, 0x1.055bd6d32e8d7p127 },
+ { 0x1.a18a60d56673ep125, 0x1.01bb2b87c6968p127 },
+ { 0x1.998af9b56a3aep125, 0x1.fc3ee5d1524bp126 },
+ { 0x1.91a85c0b65519p125, 0x1.f511a91a67d2ap126 },
+ { 0x1.89e25e6a4cef9p125, 0x1.edeeee0959518p126 },
+ { 0x1.8238d634c0127p125, 0x1.e6d6ffaa65a25p126 },
+ { 0x1.7aab97a554544p125, 0x1.dfca26f5bbf88p126 },
+ { 0x1.733a75d6e91b8p125, 0x1.d8c8aace11e63p126 },
+ { 0x1.6be542ccffc2fp125, 0x1.d1d2cfff91594p126 },
+ { 0x1.64abcf7c175b4p125, 0x1.cae8d93f1d7b7p126 },
+ { 0x1.5d8debd20aacep125, 0x1.c40b0729ed548p126 },
+ { 0x1.568b66be6f268p125, 0x1.bd3998457afdbp126 },
+ { 0x1.4fa40e3af3674p125, 0x1.b674c8ffc6283p126 },
+ { 0x1.48d7af53bc19fp125, 0x1.afbcd3afe8ab6p126 },
+ { 0x1.4226162fbddd5p125, 0x1.a911f096fbc26p126 },
+ { 0x1.3b8f0e1912f7p125, 0x1.a27455e14c93cp126 },
+ { 0x1.351261854b991p125, 0x1.9be437a7de946p126 },
+ { 0x1.2eafda1db784ap125, 0x1.9561c7f23a47bp126 },
+ { 0x1.286740c7a7dabp125, 0x1.8eed36b886d93p126 },
+ { 0x1.22385daca7f47p125, 0x1.8886b1e5ecfd1p126 },
+ { 0x1.1c22f842ac1f2p125, 0x1.822e655b417e7p126 },
+ { 0x1.1626d7543522p125, 0x1.7be47af1f5d89p126 },
+ { 0x1.1043c1086777dp125, 0x1.75a91a7f4d2edp126 },
+ { 0x1.0a797aeb152f2p125, 0x1.6f7c69d7d3ef8p126 },
+ { 0x1.04c7c9f4b969p125, 0x1.695e8cd31867ep126 },
+ { 0x1.fe5ce524c8ee5p124, 0x1.634fa54fa285fp126 },
+ { 0x1.f35a715b2f3e1p124, 0x1.5d4fd33729015p126 },
+ { 0x1.e887bf681f218p124, 0x1.575f3483021c3p126 },
+ { 0x1.dde4553ef94dep124, 0x1.517de540ce2a3p126 },
+ { 0x1.d36fb7fa50177p124, 0x1.4babff975a04cp126 },
+ { 0x1.c9296beb09cf1p124, 0x1.45e99bcbb7915p126 },
+ { 0x1.bf10f4a759889p124, 0x1.4036d0468a7a2p126 },
+ { 0x1.b525d5198cb1cp124, 0x1.3a93b1998736cp126 },
+ { 0x1.ab678f8eabedbp124, 0x1.35005285227f1p126 },
+ { 0x1.a1d5a5c4edb96p124, 0x1.2f7cc3fe6f423p126 },
+ { 0x1.986f98f9f96c8p124, 0x1.2a09153529381p126 },
+ { 0x1.8f34e9f8f93a6p124, 0x1.24a55399ea239p126 },
+ { 0x1.8625192879e39p124, 0x1.1f518ae487dc8p126 },
+ { 0x1.7d3fa69816db5p124, 0x1.1a0dc51a9934dp126 },
+ { 0x1.7484120df1b01p124, 0x1.14da0a961fd14p126 },
+ { 0x1.6bf1db13f3983p124, 0x1.0fb6620c550afp126 },
+ { 0x1.63888104d811ap124, 0x1.0aa2d09497f2bp126 },
+ { 0x1.5b478318ff939p124, 0x1.059f59af7a906p126 },
+ { 0x1.532e6073095f2p124, 0x1.00abff4dec7a3p126 },
+ { 0x1.4b3c982c338c7p124, 0x1.f79183b101c5bp125 },
+ { 0x1.4371a960807f8p124, 0x1.edeb406d9c825p125 },
+ { 0x1.3bcd133aa0ffcp124, 0x1.e4652fadcb6b2p125 },
+ { 0x1.344e54ffa23b9p124, 0x1.daff4969c0b04p125 },
+ { 0x1.2cf4ee1a5f0fcp124, 0x1.d1b982c50137p125 },
+ { 0x1.25c05e26b3f99p124, 0x1.c893ce1dcbef7p125 },
+ { 0x1.1eb024fc75285p124, 0x1.bf8e1b1ca2279p125 },
+ { 0x1.17c3c2ba26319p124, 0x1.b6a856c3ed54fp125 },
+ { 0x1.10fab7cf72f94p124, 0x1.ade26b7fbed95p125 },
+ { 0x1.0a548507696cp124, 0x1.a53c4135a6526p125 },
+ { 0x1.03d0ab9273b94p124, 0x1.9cb5bd549b111p125 },
+ { 0x1.fadd5a20258d3p123, 0x1.944ec2e4f563p125 },
+ { 0x1.ee5c1730b147cp123, 0x1.8c07329874652p125 },
+ { 0x1.e21c938a45a83p123, 0x1.83deeada4d25ap125 },
+ { 0x1.d61dd57628999p123, 0x1.7bd5c7df3fe9cp125 },
+ { 0x1.ca5ee4649e31fp123, 0x1.73eba3b5b07b7p125 },
+ { 0x1.bedec8fddb34p123, 0x1.6c205655be72p125 },
+ { 0x1.b39c8d3276d8ap123, 0x1.6473b5b15a7a1p125 },
+ { 0x1.a8973c4b5c03ep123, 0x1.5ce595c455b0ap125 },
+ { 0x1.9dcde2f93a207p123, 0x1.5575c8a468362p125 },
+ { 0x1.933f8f6375f2cp123, 0x1.4e241e912c305p125 },
+ { 0x1.88eb51369acb9p123, 0x1.46f066040a832p125 },
+ { 0x1.7ed039b24c96bp123, 0x1.3fda6bc016994p125 },
+ { 0x1.74ed5bb6bb581p123, 0x1.38e1fae1d6a9dp125 },
+ { 0x1.6b41cbd198bc8p123, 0x1.3206dceef5f87p125 },
+ { 0x1.61cca04a90795p123, 0x1.2b48d9e5dea1cp125 },
+ { 0x1.588cf12f4446bp123, 0x1.24a7b84d38971p125 },
+ { 0x1.4f81d85ecc55bp123, 0x1.1e233d434b813p125 },
+ { 0x1.46aa7194bd324p123, 0x1.17bb2c8d41535p125 },
+ { 0x1.3e05da73b4159p123, 0x1.116f48a6476ccp125 },
+ { 0x1.3593328f6abbep123, 0x1.0b3f52ce8c383p125 },
+ { 0x1.2d519b7653e1ep123, 0x1.052b0b1a174eap125 },
+ { 0x1.254038bac19d6p123, 0x1.fe6460fef468p124 },
+ { 0x1.1d5e2ffb96d4p123, 0x1.f2a901ccafb37p124 },
+ { 0x1.15aaa8ec85205p123, 0x1.e723726b824a9p124 },
+ { 0x1.0e24cd5dd8846p123, 0x1.dbd32ac4c99bp124 },
+ { 0x1.06cbc943d255ap123, 0x1.d0b7a0f921e7cp124 },
+ { 0x1.ff3d957b29b39p122, 0x1.c5d0497c09e74p124 },
+ { 0x1.f13a043742333p122, 0x1.bb1c972f23e5p124 },
+ { 0x1.e38b43cbd0f0fp122, 0x1.b09bfb7d11a84p124 },
+ { 0x1.d62fbdc2e756bp122, 0x1.a64de673e8837p124 },
+ { 0x1.c925e02b41668p122, 0x1.9c31c6df3b1b8p124 },
+ { 0x1.bc6c1da1f3121p122, 0x1.92470a61b6965p124 },
+ { 0x1.b000ed5b4a626p122, 0x1.888d1d8e510a3p124 },
+ { 0x1.a3e2cb2ae9edbp122, 0x1.7f036c0107294p124 },
+ { 0x1.9810378b1f299p122, 0x1.75a96077274bap124 },
+ { 0x1.8c87b7a37834fp122, 0x1.6c7e64e7281cbp124 },
+ { 0x1.8147d54e9cc33p122, 0x1.6381e2980956bp124 },
+ { 0x1.764f1f1f6ddeap122, 0x1.5ab342383d178p124 },
+ { 0x1.6b9c28657041ap122, 0x1.5211ebf41880bp124 },
+ { 0x1.612d893085125p122, 0x1.499d478bca735p124 },
+ { 0x1.5701de53f4d2ep122, 0x1.4154bc68d75c3p124 },
+ { 0x1.4d17c968d062bp122, 0x1.3937b1b31925ap124 },
+ { 0x1.436df0cfabf1dp122, 0x1.31458e6542847p124 },
+ { 0x1.3a02ffb1b7ceep122, 0x1.297db960e4f63p124 },
+ { 0x1.30d5a6013afc5p122, 0x1.21df9981f8e53p124 },
+ { 0x1.27e49879737d3p122, 0x1.1a6a95b1e786fp124 },
+ { 0x1.1f2e909de04d2p122, 0x1.131e14fa1625dp124 },
+ { 0x1.16b24cb8f8f92p122, 0x1.0bf97e95f2a64p124 },
+ { 0x1.0e6e8fda56cf7p122, 0x1.04fc3a0481321p124 },
+ { 0x1.066221d4539d8p122, 0x1.fc4b5e32d6259p123 },
+ { 0x1.fd179e7243e3cp121, 0x1.eeea8c1b1db94p123 },
+ { 0x1.edd4d2aec5adbp121, 0x1.e1d4cf1e2450ap123 },
+ { 0x1.def98c6c79efap121, 0x1.d508f9a1ea64fp123 },
+ { 0x1.d0838121f2418p121, 0x1.c885df3451a07p123 },
+ { 0x1.c2706fa45005ep121, 0x1.bc4a54a84e834p123 },
+ { 0x1.b4be201caa4b4p121, 0x1.b055303221015p123 },
+ { 0x1.a76a63fc95c79p121, 0x1.a4a549829587ep123 },
+ { 0x1.9a7315f1d6a55p121, 0x1.993979e14fffep123 },
+ { 0x1.8dd619d943ca1p121, 0x1.8e109c4622913p123 },
+ { 0x1.81915cb0e3323p121, 0x1.83298d717210ep123 },
+ { 0x1.75a2d48946eb1p121, 0x1.78832c03aa2b1p123 },
+ { 0x1.6a08807632262p121, 0x1.6e1c5893c380bp123 },
+ { 0x1.5ec0687e8dcb2p121, 0x1.63f3f5c4de13bp123 },
+ { 0x1.53c89d8bb3ddbp121, 0x1.5a08e85af27ep123 },
+ { 0x1.491f395818f54p121, 0x1.505a174e9c929p123 },
+ { 0x1.3ec25e5d5af12p121, 0x1.46e66be00224p123 },
+ { 0x1.34b037c1bbfc5p121, 0x1.3dacd1a8d8ccep123 },
+ { 0x1.2ae6f94510dd8p121, 0x1.34ac36ad8dafep123 },
+ { 0x1.2164df2d29765p121, 0x1.2be38b6d92415p123 },
+ { 0x1.18282e31ba3e8p121, 0x1.2351c2f2d1449p123 },
+ { 0x1.0f2f3367cd6aap121, 0x1.1af5d2e04f3f6p123 },
+ { 0x1.0678442cc256fp121, 0x1.12ceb37ff9bc3p123 },
+ { 0x1.fc037c21c3622p120, 0x1.0adb5fcfa8c75p123 },
+ { 0x1.eb940d8319831p120, 0x1.031ad58d56279p123 },
+ { 0x1.db9f17e61c31p120, 0x1.f7182a851bca2p122 },
+ { 0x1.cc218694238a2p120, 0x1.e85c449e377f3p122 },
+ { 0x1.bd18548996419p120, 0x1.da0005e5f28dfp122 },
+ { 0x1.ae808c479c371p120, 0x1.cc0180af00a8bp122 },
+ { 0x1.a05747a543aa7p120, 0x1.be5ecd2fcb5f9p122 },
+ { 0x1.9299afa0246a6p120, 0x1.b1160991ff737p122 },
+ { 0x1.8544fc2c8c1dap120, 0x1.a4255a00b9f03p122 },
+ { 0x1.785674053e8b9p120, 0x1.978ae8b55ce1bp122 },
+ { 0x1.6bcb6c7ad4854p120, 0x1.8b44e6031383ep122 },
+ { 0x1.5fa14942c3d54p120, 0x1.7f5188610ddc8p122 },
+ { 0x1.53d57c461a5a7p120, 0x1.73af0c737bb45p122 },
+ { 0x1.4865856ff632ap120, 0x1.685bb5134ef13p122 },
+ { 0x1.3d4ef27bc49a6p120, 0x1.5d55cb54cd53ap122 },
+ { 0x1.328f5ec350e67p120, 0x1.529b9e8cf9a1ep122 },
+ { 0x1.2824730cacbb4p120, 0x1.482b8455dc491p122 },
+ { 0x1.1e0be557fa673p120, 0x1.3e03d891b37dep122 },
+ { 0x1.144378ad22027p120, 0x1.3422fd6d12e2bp122 },
+ { 0x1.0ac8fce979b96p120, 0x1.2a875b5ffab56p122 },
+ { 0x1.019a4e8d69649p120, 0x1.212f612dee7fbp122 },
+ { 0x1.f16aad1422a55p119, 0x1.181983e5133ddp122 },
+ { 0x1.e030141df7d25p119, 0x1.0f443edc5ce49p122 },
+ { 0x1.cf80d4afc3019p119, 0x1.06ae13b0d3255p122 },
+ { 0x1.bf5908f50b4ap119, 0x1.fcab1483ea7fcp121 },
+ { 0x1.afb4e269693dfp119, 0x1.ec72615a894c4p121 },
+ { 0x1.a090a974cfebep119, 0x1.dcaf3691fc448p121 },
+ { 0x1.91e8bd0830a74p119, 0x1.cd5ec93c12432p121 },
+ { 0x1.83b9923a85f7bp119, 0x1.be7e5ac24963bp121 },
+ { 0x1.75ffb3e6519ap119, 0x1.b00b38d6b3575p121 },
+ { 0x1.68b7c2479902dp119, 0x1.a202bd6372dcep121 },
+ { 0x1.5bde729a6b60fp119, 0x1.94624e78e0fafp121 },
+ { 0x1.4f708eb9fba63p119, 0x1.87275e3a6869ep121 },
+ { 0x1.436af4c058acbp119, 0x1.7a4f6aca256cbp121 },
+ { 0x1.37ca96a6cd1d4p119, 0x1.6dd7fe335823p121 },
+ { 0x1.2c8c79e6f04a3p119, 0x1.61beae53b72b7p121 },
+ { 0x1.21adb71c70c75p119, 0x1.56011cc3b036dp121 },
+ { 0x1.172b79a7a1181p119, 0x1.4a9cf6bda3f4cp121 },
+ { 0x1.0d02ff50ce651p119, 0x1.3f8ff5042a88ep121 },
+ { 0x1.033197ec68c0ep119, 0x1.34d7dbc76d7e5p121 },
+ { 0x1.f3694a0008381p118, 0x1.2a727a89a3f14p121 },
+ { 0x1.e11332d0714c5p118, 0x1.205dac02bd6b9p121 },
+ { 0x1.cf5bf1fed1e7p118, 0x1.1697560347b26p121 },
+ { 0x1.be3eb08ae7c2p118, 0x1.0d1d69569b82dp121 },
+ { 0x1.adb6b810af9e2p118, 0x1.03ede1a45bfeep121 },
+ { 0x1.9dbf721b98dfap118, 0x1.f60d8aa2a88f2p120 },
+ { 0x1.8e54677bb0151p118, 0x1.e4cc4abf7d065p120 },
+ { 0x1.7f713f9cc9784p118, 0x1.d4143a9dfe965p120 },
+ { 0x1.7111bfdfb3cep118, 0x1.c3e1a5f5c077cp120 },
+ { 0x1.6331caf57b5dbp118, 0x1.b430ecf4a83a8p120 },
+ { 0x1.55cd603cc415p118, 0x1.a4fe83fb9db25p120 },
+ { 0x1.48e09b21414bfp118, 0x1.9646f35a76624p120 },
+ { 0x1.3c67b27d50fe7p118, 0x1.8806d70b2fc36p120 },
+ { 0x1.305ef7fdbfb95p118, 0x1.7a3ade6c8b3e5p120 },
+ { 0x1.24c2d787b9e37p118, 0x1.6cdfcbfc1e263p120 },
+ { 0x1.198fd6a0ee7bdp118, 0x1.5ff2750fe782p120 },
+ { 0x1.0ec293d9e6d85p118, 0x1.536fc18f7ce5cp120 },
+ { 0x1.0457c63a9669p118, 0x1.4754abacdf1dcp120 },
+ { 0x1.f49879624a021p117, 0x1.3b9e3f9d06e3fp120 },
+ { 0x1.e139bb05eb49ep117, 0x1.30499b503957fp120 },
+ { 0x1.ce8d4b7fd6c7p117, 0x1.2553ee2a336bfp120 },
+ { 0x1.bc8d516fda8bap117, 0x1.1aba78ba3af89p120 },
+ { 0x1.ab341ee553e25p117, 0x1.107a8c7323a6ep120 },
+ { 0x1.9a7c305336484p117, 0x1.06918b6355624p120 },
+ { 0x1.8a602b88919cp117, 0x1.f9f9cfd9c3035p119 },
+ { 0x1.7adadead962edp117, 0x1.e77448fb66bb9p119 },
+ { 0x1.6be73f45149fbp117, 0x1.d58da68fd117p119 },
+ { 0x1.5d80693276a6dp117, 0x1.c4412bf4b8f0bp119 },
+ { 0x1.4fa19dc42d409p117, 0x1.b38a3af2e55b4p119 },
+ { 0x1.424642c28ff75p117, 0x1.a3645330550ffp119 },
+ { 0x1.3569e18328604p117, 0x1.93cb11a30d765p119 },
+ { 0x1.29082600643fdp117, 0x1.84ba3004a50dp119 },
+ { 0x1.1d1cddf5a82dep117, 0x1.762d84469c18fp119 },
+ { 0x1.11a3f7ffbbfeap117, 0x1.6821000795a03p119 },
+ { 0x1.069982c189a9ep117, 0x1.5a90b00981d93p119 },
+ { 0x1.f7f3581a4dc2cp116, 0x1.4d78bba8ca5fdp119 },
+ { 0x1.e381802242163p116, 0x1.40d564548fad7p119 },
+ { 0x1.cfd6511405b2dp116, 0x1.34a305080681fp119 },
+ { 0x1.bcead7f01492fp116, 0x1.28de11c5031ebp119 },
+ { 0x1.aab859b20ac9ep116, 0x1.1d83170fbf6fbp119 },
+ { 0x1.993851cc9779ap116, 0x1.128eb96be8798p119 },
+ { 0x1.886470ad946a7p116, 0x1.07fdb4dafea5fp119 },
+ { 0x1.78369a4a2cbd6p116, 0x1.fb99b8b8279e1p118 },
+ { 0x1.68a8e4b2fc8c2p116, 0x1.e7f232d9e263p118 },
+ { 0x1.59b596b012aaap116, 0x1.d4fed7195d7e8p118 },
+ { 0x1.4b572664bd2dcp116, 0x1.c2b9cf7f893bfp118 },
+ { 0x1.3d8837fb08d1dp116, 0x1.b11d702b3deb2p118 },
+ { 0x1.30439c56dadf6p116, 0x1.a024365f771bdp118 },
+ { 0x1.23844fd08cb93p116, 0x1.8fc8c794b03b5p118 },
+ { 0x1.174578f6efd5dp116, 0x1.8005f08d6f1efp118 },
+ { 0x1.0b826758a086bp116, 0x1.70d6a46e07ddap118 },
+ { 0x1.003692548d98bp116, 0x1.6235fbd7a4345p118 },
+ { 0x1.eabb2fe335196p115, 0x1.541f340697987p118 },
+ { 0x1.d5e6777a83c2ap115, 0x1.468dadf4080abp118 },
+ { 0x1.c1e6cb6239574p115, 0x1.397ced7af2b15p118 },
+ { 0x1.aeb4423e690e7p115, 0x1.2ce898809244ep118 },
+ { 0x1.9c47374a0974ep115, 0x1.20cc76202c5fbp118 },
+ { 0x1.8a98484a1e8d3p115, 0x1.15246dda49d47p118 },
+ { 0x1.79a0538dd4fc7p115, 0x1.09ec86c75d497p118 },
+ { 0x1.695875fb574ap115, 0x1.fe41cd9bb4eeep117 },
+ { 0x1.59ba0929261c5p115, 0x1.e97ba3b77f306p117 },
+ { 0x1.4abea183bc47p115, 0x1.d57f524723822p117 },
+ { 0x1.3c600c7f477c5p115, 0x1.c245d4b99847ap117 },
+ { 0x1.2e984ed53e777p115, 0x1.afc85e0f82e12p117 },
+ { 0x1.2161a2cd9d894p115, 0x1.9e005769dbc1dp117 },
+ { 0x1.14b67693928cfp115, 0x1.8ce75e9f6f8ap117 },
+ { 0x1.08916a956172p115, 0x1.7c7744d9378f7p117 },
+ { 0x1.f9da9fde95755p114, 0x1.6caa0d3582fe9p117 },
+ { 0x1.e38a4dc27b11bp114, 0x1.5d79eb71e893bp117 },
+ { 0x1.ce283a9e3e33p114, 0x1.4ee1429bf7ccp117 },
+ { 0x1.b9ab1a96e3b3ep114, 0x1.40daa3c89f5b6p117 },
+ { 0x1.a609f7584d32bp114, 0x1.3360ccd23db3ap117 },
+ { 0x1.933c2d52c56c9p114, 0x1.266ea71d4f71ap117 },
+ { 0x1.8139690c0d187p114, 0x1.19ff4663ae9dfp117 },
+ { 0x1.6ff9a4837fa43p114, 0x1.0e0de78654d1ep117 },
+ { 0x1.5f7524a8e81a2p114, 0x1.0295ef6591848p117 },
+ { 0x1.4fa476e59f668p114, 0x1.ef25d37f49fe1p116 },
+ { 0x1.40806eb78e353p114, 0x1.da01102b5f851p116 },
+ { 0x1.3202235dada5p114, 0x1.c5b5412dcafadp116 },
+ { 0x1.2422ed95a3235p114, 0x1.b23a5a23e421p116 },
+ { 0x1.16dc656a14df6p114, 0x1.9f8893d8fd1c1p116 },
+ { 0x1.0a2860115569cp114, 0x1.8d986a4187285p116 },
+ { 0x1.fc01dbb80c841p113, 0x1.7c629a822bc9ep116 },
+ { 0x1.e4c0b066a497p113, 0x1.6be02102b352p116 },
+ { 0x1.ce823f4cc4badp113, 0x1.5c0a378c90bcap116 },
+ { 0x1.b93bf40d5eccbp113, 0x1.4cda5374ea275p116 },
+ { 0x1.a4e3a125adc76p113, 0x1.3e4a23d1f4703p116 },
+ { 0x1.916f7c5f2f764p113, 0x1.30538fbb77ecdp116 },
+ { 0x1.7ed61b5d3db0ap113, 0x1.22f0b496539bep116 },
+ { 0x1.6d0e7045988cbp113, 0x1.161be46ad3b5p116 },
+ { 0x1.5c0fc68335b0cp113, 0x1.09cfa445b00ffp116 },
+ { 0x1.4bd1bfa2aba3dp113, 0x1.fc0d55470cf51p115 },
+ { 0x1.3c4c504792bf8p113, 0x1.e577bbcd49935p115 },
+ { 0x1.2d77bd3a382bcp113, 0x1.cfd4a5adec5cp115 },
+ { 0x1.1f4c988d02149p113, 0x1.bb1a9657ce465p115 },
+ { 0x1.11c3bed8e716ap113, 0x1.a740684026555p115 },
+ { 0x1.04d654905dadp113, 0x1.943d4a1d1ed39p115 },
+ { 0x1.f0fb86d056745p112, 0x1.8208bc334a6a5p115 },
+ { 0x1.d9676faafa27fp112, 0x1.709a8db59f25cp115 },
+ { 0x1.c2e43d417197bp112, 0x1.5feada379d8b7p115 },
+ { 0x1.ad664518e771bp112, 0x1.4ff207314a102p115 },
+ { 0x1.98e25420092dap112, 0x1.40a8c1949f75ep115 },
+ { 0x1.854daa4a49b0fp112, 0x1.3207fb7420eb9p115 },
+ { 0x1.729df6503422ap112, 0x1.2408e9ba3327fp115 },
+ { 0x1.60c95193c542dp112, 0x1.16a501f0e42cap115 },
+ { 0x1.4fc63c27c71aep112, 0x1.09d5f819c9e29p115 },
+ { 0x1.3f8b98f93052ap112, 0x1.fb2b792b40a22p114 },
+ { 0x1.3010aa198de78p112, 0x1.e3bcf436a1a95p114 },
+ { 0x1.214d0d298365p112, 0x1.cd55277c18d05p114 },
+ { 0x1.1338b7e273194p112, 0x1.b7e94604479dcp114 },
+ { 0x1.05cbf4be650abp112, 0x1.a36eec00926ddp114 },
+ { 0x1.f1febf7a916aap111, 0x1.8fdc1b2dcf7b9p114 },
+ { 0x1.d997c68d65936p111, 0x1.7d2737527c3f9p114 },
+ { 0x1.c2556a4e7a90fp111, 0x1.6b4702d7d5849p114 },
+ { 0x1.ac2aa7516ade4p111, 0x1.5a329b7d30748p114 },
+ { 0x1.970b05888fda2p111, 0x1.49e17724f4d41p114 },
+ { 0x1.82ea92dbc1a27p111, 0x1.3a4b60ba9aa4ep114 },
+ { 0x1.6fbdddeff308fp111, 0x1.2b6875310f785p114 },
+ { 0x1.5d79f11e27f6bp111, 0x1.1d312098e9dbap114 },
+ { 0x1.4c144d984e1b8p111, 0x1.0f9e1b4dd36dfp114 },
+ { 0x1.3b82e6ba892a4p111, 0x1.02a8673a94692p114 },
+ { 0x1.2bbc1d878d272p111, 0x1.ec929a665b449p113 },
+ { 0x1.1cb6bc4eaa678p111, 0x1.d4f4b4c8e09edp113 },
+ { 0x1.0e69f27a37df3p111, 0x1.be6abbb10a5aap113 },
+ { 0x1.00cd508511266p111, 0x1.a8e8cc1fadef6p113 },
+ { 0x1.e7b1882bccac5p110, 0x1.94637d5bacfdbp113 },
+ { 0x1.cf09287e48bb9p110, 0x1.80cfdc72220cfp113 },
+ { 0x1.b792bbc489b04p110, 0x1.6e2367dc27f95p113 },
+ { 0x1.a140206ab945p110, 0x1.5c540b4936fd2p113 },
+ { 0x1.8c03d2d39119bp110, 0x1.4b581b8d170fcp113 },
+ { 0x1.77d0e6e5bed21p110, 0x1.3b2652b06c2b2p113 },
+ { 0x1.649b01d73110ap110, 0x1.2bb5cc22e5db6p113 },
+ { 0x1.525654343aad2p110, 0x1.1cfe010e2052dp113 },
+ { 0x1.40f79420887c7p110, 0x1.0ef6c4c84a0fep113 },
+ { 0x1.3073f7cff4a85p110, 0x1.01984165a5f36p113 },
+ { 0x1.20c1303550f0ep110, 0x1.e9b5e8d00ce77p112 },
+ { 0x1.11d563e54f40ep110, 0x1.d16f5716c6c1ap112 },
+ { 0x1.03a72a2bbdc06p110, 0x1.ba4f035d60e03p112 },
+ { 0x1.ec5b0ca2b20f5p109, 0x1.a447b7b03f045p112 },
+ { 0x1.d2bfc6210880ap109, 0x1.8f4ccca7fc90dp112 },
+ { 0x1.ba6c1c6e87c4p109, 0x1.7b5223dac7336p112 },
+ { 0x1.a35068e9c89cfp109, 0x1.684c227fcacefp112 },
+ { 0x1.8d5dbaa383b98p109, 0x1.562fac4329b48p112 },
+ { 0x1.7885ce9f67cdbp109, 0x1.44f21e49054f2p112 },
+ { 0x1.64bb0863504ddp109, 0x1.34894a5e24657p112 },
+ { 0x1.51f06ad20e4c3p109, 0x1.24eb7254ccf83p112 },
+ { 0x1.4019914f0b53ap109, 0x1.160f438c70913p112 },
+ { 0x1.2f2aa92823e8p109, 0x1.07ebd2a2d2844p112 },
+ { 0x1.1f186b432c98bp109, 0x1.f4f12e9ab070ap111 },
+ { 0x1.0fd8160ca94ap109, 0x1.db5ad0b27805cp111 },
+ { 0x1.015f67a552924p109, 0x1.c304efa2c6f4ep111 },
+ { 0x1.e749309831666p108, 0x1.abe09e9144b5ep111 },
+ { 0x1.cd3caa04cdd1bp108, 0x1.95df988e76644p111 },
+ { 0x1.b48774d0f8e45p108, 0x1.80f439b4ee04bp111 },
+ { 0x1.9d189f9f85cbfp108, 0x1.6d11788a69c64p111 },
+ { 0x1.86e0050236315p108, 0x1.5a2adfa0b4bc4p111 },
+ { 0x1.71ce426a561d3p108, 0x1.4834877429b8fp111 },
+ { 0x1.5dd4af79906a9p108, 0x1.37231085c7d9ap111 },
+ { 0x1.4ae555af52cdfp108, 0x1.26eb9daed6f7ep111 },
+ { 0x1.38f2e86f38216p108, 0x1.1783ceac2891p111 },
+ { 0x1.27f0bd5d0e6b1p108, 0x1.08e1badf0fcedp111 },
+ { 0x1.17d2c50b2bfafp108, 0x1.f5f7d88472604p110 },
+ { 0x1.088d83f7e4069p108, 0x1.db92b5212fb8dp110 },
+ { 0x1.f42c17ae0ebf6p107, 0x1.c282cd3957edap110 },
+ { 0x1.d8c3ea48f2889p107, 0x1.aab7abace48dcp110 },
+ { 0x1.beceb1f9f5b3dp107, 0x1.94219bfcb4928p110 },
+ { 0x1.a6399674d366bp107, 0x1.7eb1a2075864ep110 },
+ { 0x1.8ef2a9a18d857p107, 0x1.6a597219a93dap110 },
+ { 0x1.78e8dcd2e6bfdp107, 0x1.570b69502f313p110 },
+ { 0x1.640bf6745325ep107, 0x1.44ba864670882p110 },
+ { 0x1.504c882a97424p107, 0x1.335a62115bce2p110 },
+ { 0x1.3d9be56279ee9p107, 0x1.22df298214423p110 },
+ { 0x1.2bec1a4917edbp107, 0x1.133d96ae7e0ddp110 },
+ { 0x1.1b2fe32991d5cp107, 0x1.046aeabcfcdecp110 },
+ { 0x1.0b5aa42bf5054p107, 0x1.ecb9cfe1d8642p109 },
+ { 0x1.f8c0c2e2ce8dep106, 0x1.d21397ead99cbp109 },
+ { 0x1.dc6b6f1384e18p106, 0x1.b8d094c86d374p109 },
+ { 0x1.c19fa87de37fbp106, 0x1.a0df0f0c626dcp109 },
+ { 0x1.a848df650bea7p106, 0x1.8a2e269750a39p109 },
+ { 0x1.90538b942ea7cp106, 0x1.74adc8f4064d3p109 },
+ { 0x1.79ad1fce5b3d8p106, 0x1.604ea819f007cp109 },
+ { 0x1.6443fdcf0c327p106, 0x1.4d0231928c6f9p109 },
+ { 0x1.50076ad55cc39p106, 0x1.3aba85fe22e2p109 },
+ { 0x1.3ce784b411931p106, 0x1.296a70f414053p109 },
+ { 0x1.2ad53760d7287p106, 0x1.1905613b3abf2p109 },
+ { 0x1.19c232fd50b88p106, 0x1.097f6156f32c5p109 },
+ { 0x1.09a0e254c75ep106, 0x1.f59a20caf6695p108 },
+ { 0x1.f4c8c392fb944p105, 0x1.d9c73698fb1dcp108 },
+ { 0x1.d800ed59bd026p105, 0x1.bf716c6168baep108 },
+ { 0x1.bcd30dfbd611bp105, 0x1.a6852c6b58392p108 },
+ { 0x1.a32923130213fp105, 0x1.8eefd70594a89p108 },
+ { 0x1.8aee4cd06ec1bp105, 0x1.789fb715aae95p108 },
+ { 0x1.740ebfab80eb4p105, 0x1.6383f726a8e04p108 },
+ { 0x1.5e77b6bbd2127p105, 0x1.4f8c96f26a26ap108 },
+ { 0x1.4a1766b6e5e8ap105, 0x1.3caa61607f92p108 },
+ { 0x1.36dcf18a6465cp105, 0x1.2acee2f5ecdb8p108 },
+ { 0x1.24b85a8bf0124p105, 0x1.19ec60b1242edp108 },
+ { 0x1.139a7b37f8475p105, 0x1.09f5cf4dd2877p108 },
+ { 0x1.0374f8792ca97p105, 0x1.f5bd95d8730d8p107 },
+ { 0x1.e87470e4f4246p104, 0x1.d9371e2ff7c35p107 },
+ { 0x1.cbbab18b73217p104, 0x1.be41de54d155ap107 },
+ { 0x1.b0a44aa2f067ep104, 0x1.a4c89e08ef4f3p107 },
+ { 0x1.971a1ec0f40c7p104, 0x1.8cb738399b12cp107 },
+ { 0x1.7f064a8ba8323p104, 0x1.75fa8dbc84becp107 },
+ { 0x1.685414c16188ep104, 0x1.608078a70dcbcp107 },
+ { 0x1.52efdf060cd2p104, 0x1.4c37c0394d094p107 },
+ { 0x1.3ec7176d784b5p104, 0x1.39100d5687bfep107 },
+ { 0x1.2bc82ab9d2302p104, 0x1.26f9df8519bd7p107 },
+ { 0x1.19e277461404p104, 0x1.15e6827001f18p107 },
+ { 0x1.090640946d2d5p104, 0x1.05c803e4831c1p107 },
+ { 0x1.f24946f22d5aep103, 0x1.ed22548cffd35p106 },
+ { 0x1.d45f15b49b35ep103, 0x1.d06ad6ecdf971p106 },
+ { 0x1.b83349fd05191p103, 0x1.b551c847fbc96p106 },
+ { 0x1.9dacb2c432ef4p103, 0x1.9bc09f112b494p106 },
+ { 0x1.84b37e1cbf8ebp103, 0x1.83a1ff0aa239dp106 },
+ { 0x1.6d3126d74b6ccp103, 0x1.6ce1aa3fd7bddp106 },
+ { 0x1.5710631158bffp103, 0x1.576c72b514859p106 },
+ { 0x1.423d13a3b73e1p103, 0x1.43302cc4a0da8p106 },
+ { 0x1.2ea43465e3995p103, 0x1.301ba221dc9bbp106 },
+ { 0x1.1c33cd3c37addp103, 0x1.1e1e857adc568p106 },
+ { 0x1.0adae3e73c2b5p103, 0x1.0d2966b1746f7p106 },
+ { 0x1.f512dd15b73b7p102, 0x1.fa5b4f49cc6b2p105 },
+ { 0x1.d6608dc942687p102, 0x1.dc3ae30b55c16p105 },
+ { 0x1.b9823c51276e1p102, 0x1.bfd7555a3bd68p105 },
+ { 0x1.9e5ce2f93dd76p102, 0x1.a517d9e61628ap105 },
+ { 0x1.84d6fe15b6b93p102, 0x1.8be4f8f6c951fp105 },
+ { 0x1.6cd87746bc76bp102, 0x1.74287ded49339p105 },
+ { 0x1.564a91cd221fp102, 0x1.5dcd669f2cd34p105 },
+ { 0x1.4117d7e2c667dp102, 0x1.48bfd38302871p105 },
+ { 0x1.2d2c0909ebeb9p102, 0x1.34ecf8a3c124ap105 },
+ { 0x1.1a7409475f2f9p102, 0x1.22430f521cbcfp105 },
+ { 0x1.08ddd13bd35e7p102, 0x1.10b1488aeb235p105 },
+ { 0x1.f0b0be22d18e8p101, 0x1.0027c00a263a6p105 },
+ { 0x1.d1a75065a8c74p101, 0x1.e12ee004efc37p104 },
+ { 0x1.b48117843c1c7p101, 0x1.c3e44ae32b16bp104 },
+ { 0x1.99218b8ac7f8ep101, 0x1.a854ea14102a8p104 },
+ { 0x1.7f6dc6010b4adp101, 0x1.8e6761569f45dp104 },
+ { 0x1.674c6ae60d852p101, 0x1.7603bac345f65p104 },
+ { 0x1.50a592e3c968ep101, 0x1.5f1353cdad001p104 },
+ { 0x1.3b62b6aafb0c8p101, 0x1.4980cb3c80949p104 },
+ { 0x1.276e9b681072fp101, 0x1.3537f00b6ad4dp104 },
+ { 0x1.14b54042f445bp101, 0x1.2225b12bffc68p104 },
+ { 0x1.0323ccdc1a3dcp101, 0x1.10380e1adb7e9p104 },
+ { 0x1.e5510173b9a5p100, 0x1.febc107d5efaap103 },
+ { 0x1.c6654733b86adp100, 0x1.df0f2a0ee6947p103 },
+ { 0x1.a964ed354f984p100, 0x1.c14b2188bcee4p103 },
+ { 0x1.8e324c651b064p100, 0x1.a553644f7f07dp103 },
+ { 0x1.74b179d1eba81p100, 0x1.8b0cfce0579ep103 },
+ { 0x1.5cc82d9070d95p100, 0x1.725e7c5dd20f7p103 },
+ { 0x1.465daafca8b1dp100, 0x1.5b2fe547a134p103 },
+ { 0x1.315aaa46df48ep100, 0x1.456a974e92e93p103 },
+ { 0x1.1da9433aebbcfp100, 0x1.30f93c3699078p103 },
+ { 0x1.0b34d93135fcp100, 0x1.1dc7b5b978cf8p103 },
+ { 0x1.f3d41033c44ccp99, 0x1.0bc30c5d52f15p103 },
+ { 0x1.d36d25268cd2bp99, 0x1.f5b2be65a0c7fp102 },
+ { 0x1.b512a1fb1d8fcp99, 0x1.d5f3a8dea7357p102 },
+ { 0x1.98a442fc4fc15p99, 0x1.b82915b03515bp102 },
+ { 0x1.7e03b1cc6d738p99, 0x1.9c3517e789488p102 },
+ { 0x1.651468e010b8ap99, 0x1.81fb7df06136ep102 },
+ { 0x1.4dbb989001d84p99, 0x1.6961b8d641d06p102 },
+ { 0x1.37e00dac4e8b5p99, 0x1.524ec4d916caep102 },
+ { 0x1.236a197bf0b9ap99, 0x1.3cab1343d18d1p102 },
+ { 0x1.10437b1569d7ep99, 0x1.2860757487a01p102 },
+ { 0x1.fcae93fb7323cp98, 0x1.155a09065d4f7p102 },
+ { 0x1.db23c3f816f92p98, 0x1.0384250e4c9fcp102 },
+ { 0x1.bbc1a022c14d4p98, 0x1.e59890b926c78p101 },
+ { 0x1.9e658108af2ep98, 0x1.c642116a8a9e3p101 },
+ { 0x1.82eedbe410407p98, 0x1.a8e405e651ab6p101 },
+ { 0x1.693f22ab61ce9p98, 0x1.8d5f98114f872p101 },
+ { 0x1.5139a5f3661fbp98, 0x1.7397c5a66e307p101 },
+ { 0x1.3ac3788a1b429p98, 0x1.5b71456c5a4c4p101 },
+ { 0x1.25c354b26cb4ep98, 0x1.44d26de513197p101 },
+ { 0x1.122182e9a270fp98, 0x1.2fa31d6371537p101 },
+ { 0x1.ff8f84418d51p97, 0x1.1bcca373b7b43p101 },
+ { 0x1.dd4262aac53e8p97, 0x1.0939ab853339fp101 },
+ { 0x1.bd3474ec16ca5p97, 0x1.efac5187b2863p100 },
+ { 0x1.9f40fd0082b72p97, 0x1.cf1e86235d0e7p100 },
+ { 0x1.8345858c4438dp97, 0x1.b0a68a2128babp100 },
+ { 0x1.6921be96b86b1p97, 0x1.9423165bc4444p100 },
+ { 0x1.50b75c536f927p97, 0x1.7974e743dea3dp100 },
+ { 0x1.39e9f7dcbe479p97, 0x1.607e9eacd105p100 },
+ { 0x1.249ef1c3be817p97, 0x1.4924a74dec729p100 },
+ { 0x1.10bd565b35393p97, 0x1.334d19e0c216p100 },
+ { 0x1.fc5b8748842b2p96, 0x1.1edfa3c5f5ccap100 },
+ { 0x1.d9b4a18a38642p96, 0x1.0bc56f1b54701p100 },
+ { 0x1.b95cede6d524bp96, 0x1.f3d2185e047d9p99 },
+ { 0x1.9b2df77a02225p96, 0x1.d26cb87945e87p99 },
+ { 0x1.7f03b935e8e3ap96, 0x1.b334fac4b9f99p99 },
+ { 0x1.64bc777824f0ep96, 0x1.96076f7918d1cp99 },
+ { 0x1.4c389be9acb83p96, 0x1.7ac2d72fc2c63p99 },
+ { 0x1.355a9387de78cp96, 0x1.614801550319ep99 },
+ { 0x1.2006aeb6bc768p96, 0x1.4979ac8b28927p99 },
+ { 0x1.0c23033e2a376p96, 0x1.333c68e2d0548p99 },
+ { 0x1.f32ea02b55d23p95, 0x1.1e767bce37dd7p99 },
+ { 0x1.d099c5c770f5ap95, 0x1.0b0fc5b6d05ap99 },
+ { 0x1.b05cfe2e99435p95, 0x1.f1e3523b41d7dp98 },
+ { 0x1.92508d0743fc9p95, 0x1.d00de6608effep98 },
+ { 0x1.764f46cf19f9cp95, 0x1.b0778b7b3301bp98 },
+ { 0x1.5c36679625a01p95, 0x1.92fb04ec0f6cfp98 },
+ { 0x1.43e56c3e340a7p95, 0x1.77756ec9f78fap98 },
+ { 0x1.2d3dee1869201p95, 0x1.5dc61922d5a06p98 },
+ { 0x1.182380bd2f494p95, 0x1.45ce65699ff6dp98 },
+ { 0x1.047b91fcb6491p95, 0x1.2f71a5f15997p98 },
+ { 0x1.e45a9790460c1p94, 0x1.1a94ff571654fp98 },
+ { 0x1.c242efeaca76p94, 0x1.071f4bbea09ecp98 },
+ { 0x1.a284cb82c31cep94, 0x1.e9f1ff8ddd774p97 },
+ { 0x1.84f7a1eb7f7f3p94, 0x1.c818223a202c7p97 },
+ { 0x1.697595326d7dcp94, 0x1.a887bd2b4404dp97 },
+ { 0x1.4fdb462549af1p94, 0x1.8b1a336c5eb6bp97 },
+ { 0x1.3807ab51436a8p94, 0x1.6fab63324088ap97 },
+ { 0x1.21dbea9108398p94, 0x1.56197e30205bap97 },
+ { 0x1.0d3b35021d695p94, 0x1.3e44e45301b92p97 },
+ { 0x1.f4154a787cc1bp93, 0x1.281000bfe4c3fp97 },
+ { 0x1.d0623f4f4a28fp93, 0x1.135f28f2d50b4p97 },
+ { 0x1.af2e69a26261p93, 0x1.00187dded5975p97 },
+ { 0x1.904e0b3aa82a3p93, 0x1.dc479de0ef001p96 },
+ { 0x1.73985278fa30ep93, 0x1.bad4fdad3caa1p96 },
+ { 0x1.58e7298af87d9p93, 0x1.9baed3ed27ab8p96 },
+ { 0x1.401708b7e64c6p93, 0x1.7ead9ce4285bbp96 },
+ { 0x1.2906cb94eb40dp93, 0x1.63ac6b4edc88ep96 },
+ { 0x1.139788f2dd663p93, 0x1.4a88be2a6390cp96 },
+ { 0x1.ff58dab4f2a79p92, 0x1.332259185f1ap96 },
+ { 0x1.da552fdd03043p92, 0x1.1d5b1f3793044p96 },
+ { 0x1.b7f1f31b571b6p92, 0x1.0916f04b6e18bp96 },
+ { 0x1.98006c2117e39p92, 0x1.ec77101de6926p95 },
+ { 0x1.7a550f03b145bp92, 0x1.c960bf23153ep95 },
+ { 0x1.5ec74662c5961p92, 0x1.a8bd20fc65ef7p95 },
+ { 0x1.453141082302ap92, 0x1.8a61745ec7d1dp95 },
+ { 0x1.2d6fc2c9e8bcp92, 0x1.6e25d0e756261p95 },
+ { 0x1.1761f87a6dc3dp92, 0x1.53e4f7d1666cbp95 },
+ { 0x1.02e94eb4ac8a5p92, 0x1.3b7c27a7ddb0ep95 },
+ { 0x1.dfd296adef82ap91, 0x1.24caf2c32af14p95 },
+ { 0x1.bc8ed301215ebp91, 0x1.0fb3186804d0fp95 },
+ { 0x1.9bd5efd2c0f15p91, 0x1.f830c0bb41fd7p94 },
+ { 0x1.7d79f2db2d4a5p91, 0x1.d3c0f1a91c846p94 },
+ { 0x1.61500f5293f06p91, 0x1.b1e5acf351d87p94 },
+ { 0x1.47306f04df3d6p91, 0x1.92712d259ce66p94 },
+ { 0x1.2ef5ff0323b28p91, 0x1.7538c60a04476p94 },
+ { 0x1.187e3fb74914dp91, 0x1.5a14b04b47879p94 },
+ { 0x1.03a918225a966p91, 0x1.40dfd87456f4cp94 },
+ { 0x1.e0b15822be4ep90, 0x1.2977b1172b9d5p94 },
+ { 0x1.bce26a2fb7176p90, 0x1.13bc07e891491p94 },
+ { 0x1.9bb1bc445c3c6p90, 0x1.ff1dbb4300811p93 },
+ { 0x1.7cef42e9a617dp90, 0x1.d9a880f306bd8p93 },
+ { 0x1.606e51e0a4963p90, 0x1.b6e45220b55ep93 },
+ { 0x1.460560e841d79p90, 0x1.96a0b33f2c4dap93 },
+ { 0x1.2d8dd47a40ad8p90, 0x1.78b07e9e924acp93 },
+ { 0x1.16e3ca3d4393fp90, 0x1.5ce9ab1670dd2p93 },
+ { 0x1.01e5e8edda47bp90, 0x1.4325167006bbp93 },
+ { 0x1.dcea670907819p89, 0x1.2b3e53538ff3fp93 },
+ { 0x1.b8e9bec48816dp89, 0x1.15137a7f44864p93 },
+ { 0x1.97945aa1c9c35p89, 0x1.0084ff125639dp93 },
+ { 0x1.78b88a4e7107bp89, 0x1.daeb0b7311ec7p92 },
+ { 0x1.5c2827c986b62p89, 0x1.b7937d1c40c53p92 },
+ { 0x1.41b858361b0fep89, 0x1.96d082f59ab06p92 },
+ { 0x1.294150fb19119p89, 0x1.7872d9fa10aadp92 },
+ { 0x1.129e20e732adcp89, 0x1.5c4e8e37bc7dp92 },
+ { 0x1.fb58fa290d436p88, 0x1.423ac0df49a4p92 },
+ { 0x1.d499229819bc6p88, 0x1.2a117230ad284p92 },
+ { 0x1.b0c1a759f7739p88, 0x1.13af4f04f9998p92 },
+ { 0x1.8f9bb6c075486p88, 0x1.fde703724e56p91 },
+ { 0x1.70f4744735c2bp88, 0x1.d77f0c82e7641p91 },
+ { 0x1.549cb0f7ef8e2p88, 0x1.b3ee02611d7ddp91 },
+ { 0x1.3a68a8c1234e1p88, 0x1.92ff33023d5bdp91 },
+ { 0x1.222fc469e8b8cp88, 0x1.7481a9e69f53fp91 },
+ { 0x1.0bcc5fd30f1ddp88, 0x1.5847eda620959p91 },
+ { 0x1.ee3728761897bp87, 0x1.3e27c1fcc74bdp91 },
+ { 0x1.c7fa0c7e3bac7p87, 0x1.25f9ee0b923dcp91 },
+ { 0x1.a4a56eb132a54p87, 0x1.0f9a0686532p91 },
+ { 0x1.8401b5336a8ap87, 0x1.f5cc7718082bp90 },
+ { 0x1.65db58e2358c1p87, 0x1.cf7e53d6a2ca5p90 },
+ { 0x1.4a029a7ea7cd1p87, 0x1.ac0f5f3229372p90 },
+ { 0x1.304b3d1961171p87, 0x1.8b498644847eap90 },
+ { 0x1.188c45630dc53p87, 0x1.6cfa9bcca59dcp90 },
+ { 0x1.029fbd8b92835p87, 0x1.50f411d4fd2cdp90 },
+ { 0x1.dcc4fabf32f1cp86, 0x1.370ab8327af5ep90 },
+ { 0x1.b767ecb334a7ep86, 0x1.1f167f88c6b6ep90 },
+ { 0x1.94ec06c0ff29fp86, 0x1.08f24085d4597p90 },
+ { 0x1.751977e5803d3p86, 0x1.e8f70e181d61ap89 },
+ { 0x1.57bc950253825p86, 0x1.c324c20e337dcp89 },
+ { 0x1.3ca58b816a87fp86, 0x1.a03261574b54ep89 },
+ { 0x1.23a8197d2607ep86, 0x1.7fe903cdf5855p89 },
+ { 0x1.0c9b4b0a6a16fp86, 0x1.6215c58da345p89 },
+ { 0x1.eeb27891d2bb3p85, 0x1.46897d4b69fc6p89 },
+ { 0x1.c77dbfc848866p85, 0x1.2d1877d731b7bp89 },
+ { 0x1.a357936adf17bp85, 0x1.159a386b11517p89 },
+ { 0x1.8203fa7992554p85, 0x1.ffd27ae9393cep88 },
+ { 0x1.634b7f56b0a5cp85, 0x1.d7c593130dd0bp88 },
+ { 0x1.46fada7e6a5fep85, 0x1.b2cd607c79bcfp88 },
+ { 0x1.2ce2a3690576bp85, 0x1.90ae4d3405651p88 },
+ { 0x1.14d707280e6cfp85, 0x1.71312dd1759e2p88 },
+ { 0x1.fd5f08ad2b29ap84, 0x1.5422ef5d8949dp88 },
+ { 0x1.d48d57f7718b7p84, 0x1.39544b0ecc957p88 },
+ { 0x1.aef3ce0add578p84, 0x1.20997f73e73ddp88 },
+ { 0x1.8c52800f939c8p84, 0x1.09ca0eaacd277p88 },
+ { 0x1.6c6e61e57bf9bp84, 0x1.e9810295890ecp87 },
+ { 0x1.4f10e8ebc44a9p84, 0x1.c2b45b5aa4a1dp87 },
+ { 0x1.3407b59d72a5bp84, 0x1.9eee068fa7596p87 },
+ { 0x1.1b2443858c0a1p84, 0x1.7df2b399c10a8p87 },
+ { 0x1.043b9f1621ff3p84, 0x1.5f8b87a31bd85p87 },
+ { 0x1.de4c41eb96b45p83, 0x1.4385c96e9a2d9p87 },
+ { 0x1.b77e5cbd5d147p83, 0x1.29b2933ef4cbcp87 },
+ { 0x1.93c9fc62bfb11p83, 0x1.11e68a6378f8ap87 },
+ { 0x1.72f0c4c8e9bffp83, 0x1.f7f338086a86bp86 },
+ { 0x1.54b92affb11afp83, 0x1.cf8d7d9ce040ap86 },
+ { 0x1.38ee17b150182p83, 0x1.aa577251ae485p86 },
+ { 0x1.1f5e908f70e0cp83, 0x1.8811d739efb5fp86 },
+ { 0x1.07dd6833bb38p83, 0x1.68823e52970bep86 },
+ { 0x1.e481e7f6ac4bcp82, 0x1.4b72ae68e8b4cp86 },
+ { 0x1.bcc58edad5559p82, 0x1.30b14dbe876bcp86 },
+ { 0x1.983ee9896d582p82, 0x1.181012ef8661p86 },
+ { 0x1.76aca47764427p82, 0x1.01647ba798745p86 },
+ { 0x1.57d287836bd3dp82, 0x1.d90e917701675p85 },
+ { 0x1.3b79118c097a1p82, 0x1.b2a87e86d0c8ap85 },
+ { 0x1.216d1b97279a9p82, 0x1.8f53dcb377293p85 },
+ { 0x1.097f82fc04025p82, 0x1.6ed2f2515e933p85 },
+ { 0x1.e709b415656dp81, 0x1.50ecc9ed47f19p85 },
+ { 0x1.beaa3d6c15504p81, 0x1.356cd5ce7799ep85 },
+ { 0x1.9996ed9b83967p81, 0x1.1c229a587ab78p85 },
+ { 0x1.778be2bd9795bp81, 0x1.04e15ecc7f3f6p85 },
+ { 0x1.584a99af8a842p81, 0x1.deffc7e6a6017p84 },
+ { 0x1.3b99832cbefddp81, 0x1.b7b040832f31p84 },
+ { 0x1.2143a112d0466p81, 0x1.938e021f36d76p84 },
+ { 0x1.09182b326b229p81, 0x1.7258610b3b233p84 },
+ { 0x1.e5d47637f5db5p80, 0x1.53d3bfc82a909p84 },
+ { 0x1.bd20fcc3b76d7p80, 0x1.37c92babdc2fdp84 },
+ { 0x1.97c9dda748fc7p80, 0x1.1e06010120f6ap84 },
+ { 0x1.7589207e91ad1p80, 0x1.065b9616170d4p84 },
+ { 0x1.561e669aa7fdbp80, 0x1.e13dd96b3753bp83 },
+ { 0x1.394e7a2ac9fc7p80, 0x1.b950d32467392p83 },
+ { 0x1.1ee2e61eccc99p80, 0x1.94a72263259a5p83 },
+ { 0x1.06a996198f06fp80, 0x1.72fd93e036cdcp83 },
+ { 0x1.e0e8fbad2703ep79, 0x1.54164576929abp83 },
+ { 0x1.b8328ee330ae9p79, 0x1.37b83c521fe96p83 },
+ { 0x1.92e21013a767p79, 0x1.1daf033182e96p83 },
+ { 0x1.70aff489136ebp79, 0x1.05ca50205d26ap83 },
+ { 0x1.515a7c77fab48p79, 0x1.dfbb6235639fap82 },
+ { 0x1.34a53ce0bbb6fp79, 0x1.b7807e294781fp82 },
+ { 0x1.1a58b2b09fdcbp79, 0x1.9298add70a734p82 },
+ { 0x1.0241de6c31e5bp79, 0x1.70beaf9c7ffb6p82 },
+ { 0x1.d863cf753825cp78, 0x1.51b2cd6709222p82 },
+ { 0x1.affb906d0ae09p78, 0x1.353a6cf7f7fffp82 },
+ { 0x1.8afbf9e9520c2p78, 0x1.1b1fa8cbe84a7p82 },
+ { 0x1.691c7c768becep78, 0x1.0330f0fd69921p82 },
+ { 0x1.4a1a79df39cdep78, 0x1.da81670f96f9bp81 },
+ { 0x1.2db8ca9009091p78, 0x1.b24a16b4d09aap81 },
+ { 0x1.13bf4cb384e4ap78, 0x1.8d6eeb6efdbd6p81 },
+ { 0x1.f7f4f88751db4p77, 0x1.6ba91ac734786p81 },
+ { 0x1.cc7626bced452p77, 0x1.4cb7966770ab5p81 },
+ { 0x1.a4ab6470c1c5cp77, 0x1.305e9721d0981p81 },
+ { 0x1.80451c2811052p77, 0x1.1667311fff70ap81 },
+ { 0x1.5efa4d64f59f6p77, 0x1.fd3de10d62855p80 },
+ { 0x1.40880373ed74p77, 0x1.d1aefbcd48d0cp80 },
+ { 0x1.24b0d7368076ep77, 0x1.a9cc93c25aca9p80 },
+ { 0x1.0b3c7b0d960fp77, 0x1.85487ee3ea735p80 },
+ { 0x1.e7eea02e4ed88p76, 0x1.63daf8b4b1e0cp80 },
+ { 0x1.bd6408059b696p76, 0x1.45421e69a6ca1p80 },
+ { 0x1.96826d9e90341p76, 0x1.294175802d99ap80 },
+ { 0x1.72fa4fa12d516p76, 0x1.0fa17bf41068fp80 },
+ { 0x1.5282d2d5803fep76, 0x1.f05e82aae2bb9p79 },
+ { 0x1.34d935f1be064p76, 0x1.c578101b29058p79 },
+ { 0x1.19c050c56d0d7p76, 0x1.9e39dc5dd2f7cp79 },
+ { 0x1.01001dd9c7ccep76, 0x1.7a553a728bbf2p79 },
+ { 0x1.d4ca9b634ecbap75, 0x1.5982008db1304p79 },
+ { 0x1.ab81c5c80cf39p75, 0x1.3b7e00422e51bp79 },
+ { 0x1.85cfacb7477f2p75, 0x1.200c898d9ee3ep79 },
+ { 0x1.6365862923eb9p75, 0x1.06f5f7eb65a56p79 },
+ { 0x1.43fb317b5dc37p75, 0x1.e00e9148a1d25p78 },
+ { 0x1.274ea96044bd7p75, 0x1.b623734024e92p78 },
+ { 0x1.0d23817479c67p75, 0x1.8fd4e01891bf8p78 },
+ { 0x1.ea84dd159259p74, 0x1.6cd44c7470d89p78 },
+ { 0x1.bef1b1a12823ep74, 0x1.4cd9c04158cd7p78 },
+ { 0x1.9730edfda64acp74, 0x1.2fa34bf5c8344p78 },
+ { 0x1.72ede3b7eaa25p74, 0x1.14f4890ff2461p78 },
+ { 0x1.51db1ec3a3087p74, 0x1.f92c49dfa4df5p77 },
+ { 0x1.33b1c9d1576ecp74, 0x1.ccaaea71ab0dfp77 },
+ { 0x1.18311f8a03acap74, 0x1.a40829f001197p77 },
+ { 0x1.fe3bcf4629feap73, 0x1.7eef13b59e96cp77 },
+ { 0x1.d083fda665164p73, 0x1.5d11e1a252bf5p77 },
+ { 0x1.a6d7d18831888p73, 0x1.3e296303b2297p77 },
+ { 0x1.80dcd6603df1bp73, 0x1.21f47009f43cep77 },
+ { 0x1.5e4062d5b6a4ep73, 0x1.083768c5e4542p77 },
+ { 0x1.3eb6ef47c2758p73, 0x1.e1777d831265fp76 },
+ { 0x1.21fb7a81c5444p73, 0x1.b69f10b0191b5p76 },
+ { 0x1.07cefb734d68bp73, 0x1.8f8a3a05b5b53p76 },
+ { 0x1.dfefbdb19ac7ep72, 0x1.6be573c40c8e7p76 },
+ { 0x1.b4831fb12344p72, 0x1.4b645ba991fdbp76 },
+ { 0x1.8cf81557d20b6p72, 0x1.2dc119095729fp76 },
+ { 0x1.68f6f0feb4755p72, 0x1.12bbcfa4d62dep76 },
+ { 0x1.482fa78c40635p72, 0x1.f4343c7d504b9p75 },
+ { 0x1.2a59289a484fbp72, 0x1.c74d4fe1e0e8bp75 },
+ { 0x1.0f30c4d0be5cp72, 0x1.9e614ecbf4af6p75 },
+ { 0x1.ecf3428c48d4fp71, 0x1.791716475420cp75 },
+ { 0x1.bff86d9ec8499p71, 0x1.571d34563050ap75 },
+ { 0x1.970bb87f4ae14p71, 0x1.3829407a207d8p75 },
+ { 0x1.71d0b55b79b86p71, 0x1.1bf74244aed5ap75 },
+ { 0x1.4ff315d036fbdp71, 0x1.024924c7520d1p75 },
+ { 0x1.3125f6a3d257p71, 0x1.d5cc6ba567f29p74 },
+ { 0x1.15233ae8815f2p71, 0x1.ab3560167ccaap74 },
+ { 0x1.f755ea760487dp70, 0x1.846e9dda7a163p74 },
+ { 0x1.c905bbd9ab5a6p70, 0x1.6121d7db32bddp74 },
+ { 0x1.9eebaa0589b4ep70, 0x1.410047ead6894p74 },
+ { 0x1.78a6de0f41b89p70, 0x1.23c2090cdde78p74 },
+ { 0x1.55df1790f2f61p70, 0x1.09257fca001cp74 },
+ { 0x1.3643ec463a3cfp70, 0x1.e1dd9ec677783p73 },
+ { 0x1.198c18435598dp70, 0x1.b5ceb5a13221bp73 },
+ { 0x1.fee9bab9f4e14p69, 0x1.8dbaa11de2037p73 },
+ { 0x1.cf82e0eb6196bp69, 0x1.694680a9a3ee6p73 },
+ { 0x1.a474e7029a919p69, 0x1.481f73b3778e8p73 },
+ { 0x1.7d5af6513e2bep69, 0x1.29f9e7d8fd094p73 },
+ { 0x1.59d93e1d8f57dp69, 0x1.0e90f64b5b103p73 },
+ { 0x1.399c279e4699ap69, 0x1.eb4b9e47b58c9p72 },
+ { 0x1.1c579bbca6885p69, 0x1.bdfe62f60dd7p72 },
+ { 0x1.01c659160612dp69, 0x1.94d1de5c4576fp72 },
+ { 0x1.d352b1ae2694p68, 0x1.6f66f6ab90c3cp72 },
+ { 0x1.a78e8252c204dp68, 0x1.4d67050b31c2ap72 },
+ { 0x1.7fd7c80f3410ep68, 0x1.2e8318008cf89p72 },
+ { 0x1.5bcf92cc55d86p68, 0x1.1273463a1589bp72 },
+ { 0x1.3b1f876b10da7p68, 0x1.f1ec20afad0e2p71 },
+ { 0x1.1d791bb1324a1p68, 0x1.c39fa0d4a5a2bp71 },
+ { 0x1.0294e37abcee8p68, 0x1.99946bf7e02a1p71 },
+ { 0x1.d463db5fa3c13p67, 0x1.73679b24aeb9bp71 },
+ { 0x1.a82a5f4047a5bp67, 0x1.50bf2558ab78fp71 },
+ { 0x1.8011fb05fe09p67, 0x1.314916abfa1eap71 },
+ { 0x1.5bb91decf8a58p67, 0x1.14bad9006f53bp71 },
+ { 0x1.3ac71ce35c1d3p67, 0x1.f5a1196b5bb2ep70 },
+ { 0x1.1ceb656955c59p67, 0x1.c698e001f6d3p70 },
+ { 0x1.01dcc2acf7755p67, 0x1.9beca74b0f147p70 },
+ { 0x1.d2b166911c178p66, 0x1.753637caac6d9p70 },
+ { 0x1.a6459c5b11342p66, 0x1.5218993857afcp70 },
+ { 0x1.7e086accc805dp66, 0x1.323f3f19cff3ep70 },
+ { 0x1.59962aef547b3p66, 0x1.155d47fdb9c94p70 },
+ { 0x1.3894608650edep66, 0x1.f6599b70323cap69 },
+ { 0x1.1ab0e4d284f44p66, 0x1.c6dc8a4bb3ba6p69 },
+ { 0x1.ff4248ebb8299p65, 0x1.9bcfd83a431e9p69 },
+ { 0x1.ce42dd8e4fa23p65, 0x1.74ca889bbacd5p69 },
+ { 0x1.a1e8aa1400997p65, 0x1.516d33e26c04p69 },
+ { 0x1.79c430435a7fcp65, 0x1.31612a7ef535fp69 },
+ { 0x1.557046eb39249p65, 0x1.1457ab75c2489p69 },
+ { 0x1.349127b59b217p65, 0x1.f41259c9550cp68 },
+ { 0x1.16d392dff5104p65, 0x1.c46969ca99a2ep68 },
+ { 0x1.f7d80dc993f2fp64, 0x1.993e82b76e726p68 },
+ { 0x1.c72c149cb214bp64, 0x1.72267ac1b25ap68 },
+ { 0x1.9b270c24cc8fap64, 0x1.4ec0062aeeb78p68 },
+ { 0x1.73585df7b6643p64, 0x1.2eb2d18a2081bp68 },
+ { 0x1.4f59f9910367ep64, 0x1.11aeb0b11d1a1p68 },
+ { 0x1.2ecf5b7f6abe3p64, 0x1.eed5c0bbf1061p67 },
+ { 0x1.1164ab45aa235p64, 0x1.bf4ab21b4f3fp67 },
+ { 0x1.ed9bdbc6f1b0ap63, 0x1.944462d4d5991p67 },
+ { 0x1.bd8c96533b39bp63, 0x1.6d561de54f6a1p67 },
+ { 0x1.921ec84d5860ep63, 0x1.4a1d472804fc8p67 },
+ { 0x1.6ae172414cebap63, 0x1.2a406e25fcb44p67 },
+ { 0x1.476e3b661be8cp63, 0x1.0d6e7662dda9dp67 },
+ { 0x1.276873924f0b4p63, 0x1.e6bba6770e22dp66 },
+ { 0x1.0a7c2c9322f59p63, 0x1.b797ab2ba22d2p66 },
+ { 0x1.e0bad18c4e37dp62, 0x1.8cf813910fdcdp66 },
+ { 0x1.b18eba0be4d24p62, 0x1.666f488db6e0ap66 },
+ { 0x1.86f7884e1caadp62, 0x1.4399f7770045fp66 },
+ { 0x1.608484d592328p62, 0x1.241e1ebbbf4ecp66 },
+ { 0x1.3dcfaee52a8f5p62, 0x1.07aa30ce6a5ap66 },
+ { 0x1.1e7cbac093f27p62, 0x1.dbe8969a24c6fp65 },
+ { 0x1.023827dc88ed9p62, 0x1.ad7301258d788p65 },
+ { 0x1.d16cd999791c3p61, 0x1.837a640fa9d3dp65 },
+ { 0x1.a3666de0788bp61, 0x1.5d90f358d61f6p65 },
+ { 0x1.79e17816df1e8p61, 0x1.3b5342f7be9cp65 },
+ { 0x1.546e385224d1p61, 0x1.1c674ecd152d3p65 },
+ { 0x1.32a7a483e977bp61, 0x1.007b997a0b531p65 },
+ { 0x1.1432649c86c4dp61, 0x1.ce8cc007a6432p64 },
+ { 0x1.f177ce0bd5836p60, 0x1.a109c0bccbc39p64 },
+ { 0x1.bff3166bc36eep60, 0x1.77f5624913c3ap64 },
+ { 0x1.934fc0975fb3p60, 0x1.52e251d5d3b1fp64 },
+ { 0x1.6b13ebb9a5ad4p60, 0x1.316da780bc4d9p64 },
+ { 0x1.46d17a80cc174p60, 0x1.133deb1d3526p64 },
+ { 0x1.2624f3a0a887p60, 0x1.f00460b24acf8p63 },
+ { 0x1.08b47d7733cb6p60, 0x1.bee2903d584f9p63 },
+ { 0x1.dc5de496b181p59, 0x1.92920a7c80e26p63 },
+ { 0x1.ac9615b3c9fd7p59, 0x1.6a9b25345c773p63 },
+ { 0x1.818d3a356669ep59, 0x1.4691b26b9c82fp63 },
+ { 0x1.5acbdab2ed713p59, 0x1.2613e9610f6d1p63 },
+ { 0x1.37e61fd4c0fep59, 0x1.08c969adf0beap63 },
+ { 0x1.187ab3d71db11p59, 0x1.dcc4ac4f59be5p62 },
+ { 0x1.f8637ea4e52acp58, 0x1.ad2d0a9a18288p62 },
+ { 0x1.c577fd709b099p58, 0x1.82498a7cc94b9p62 },
+ { 0x1.97a3dc62119c8p58, 0x1.5ba462dee8a02p62 },
+ { 0x1.6e66137bb7ccap58, 0x1.38d330d8806ap62 },
+ { 0x1.494a3f6a9a70ep58, 0x1.1975e0627306cp62 },
+ { 0x1.27e767bb79ea2p58, 0x1.fa6b5ee8f3088p61 },
+ { 0x1.09dee32687729p58, 0x1.c78892308bd9p61 },
+ { 0x1.ddb6ae2f39381p57, 0x1.99b5ec6741cb3p61 },
+ { 0x1.ad1f9fba4b2abp57, 0x1.7073c400e10dcp61 },
+ { 0x1.816dde4c11ca3p57, 0x1.4b4ee0b3a84d6p61 },
+ { 0x1.5a245d5e5289cp57, 0x1.29df4862ac231p61 },
+ { 0x1.36d26a686daafp57, 0x1.0bc7294e0cbafp61 },
+ { 0x1.171277cbbce9cp57, 0x1.e163bd8df864p60 },
+ { 0x1.f5120b45c00e6p56, 0x1.b0a61bce91993p60 },
+ { 0x1.c1c74b30d0bbp56, 0x1.84cbb00f925fp60 },
+ { 0x1.93b02e5cf0324p56, 0x1.5d5841ce6cb73p60 },
+ { 0x1.6a46f43f3118cp56, 0x1.39dbcd485dd07p60 },
+ { 0x1.45132973bb79bp56, 0x1.19f153b38a108p60 },
+ { 0x1.23a85891dc72bp56, 0x1.fa7b9159fc471p59 },
+ { 0x1.05a4dba466c4ep56, 0x1.c6de3429e31fap59 },
+ { 0x1.d561964307dc4p55, 0x1.98769faac8a1bp59 },
+ { 0x1.a4fa0f13737e8p55, 0x1.6ebf82977acfp59 },
+ { 0x1.7984b636ad1bep55, 0x1.4940bc89fa5aap59 },
+ { 0x1.5281628cb373ap55, 0x1.278e135bcf0a4p59 },
+ { 0x1.2f7cc38bc628dp55, 0x1.0946088b6f8edp59 },
+ { 0x1.100f1aef8eaf5p55, 0x1.dc21972b9e9f4p58 },
+ { 0x1.e7b62ce66acdep54, 0x1.ab3e8cfada51ap58 },
+ { 0x1.b5198cf325114p54, 0x1.7f5483f729c27p58 },
+ { 0x1.87b15da6677afp54, 0x1.57e33e2b1c6dap58 },
+ { 0x1.5ef5de2e68985p54, 0x1.3477480d89e25p58 },
+ { 0x1.3a6d00852a688p54, 0x1.14a8b54629fb2p58 },
+ { 0x1.19a90b14f53afp54, 0x1.f033fa073d52p57 },
+ { 0x1.f88eba04114cbp53, 0x1.bcede5acc0d4p57 },
+ { 0x1.c3dea36b87937p53, 0x1.8ee7b29d0b081p57 },
+ { 0x1.94a28136fa731p53, 0x1.659917bbb6632p57 },
+ { 0x1.6a4b2c9663fa1p53, 0x1.40877b79cd868p57 },
+ { 0x1.44580945b8452p53, 0x1.1f44979177348p57 },
+ { 0x1.22558f1aa9f03p53, 0x1.016d3f035816p57 },
+ { 0x1.03dbf8db89298p53, 0x1.cd508600d0ba8p56 },
+ { 0x1.d11c2965639f6p52, 0x1.9d4ae77a21604p56 },
+ { 0x1.a03065db54a4bp52, 0x1.723974e9529d8p56 },
+ { 0x1.745e6013d8cf3p52, 0x1.4b9a944f57915p56 },
+ { 0x1.4d1f2eb8531p52, 0x1.28f9c9b769ee3p56 },
+ { 0x1.29f9b7c4f56dfp52, 0x1.09ee66b6e99e9p56 },
+ { 0x1.0a814a1dfc5edp52, 0x1.dc34b6999ff72p55 },
+ { 0x1.dca8b63e38fa9p51, 0x1.aa5249b4cca57p55 },
+ { 0x1.aa36c9242f8bcp51, 0x1.7d9db080918bap55 },
+ { 0x1.7d0fbfa6c3c19p51, 0x1.558e88e8945efp55 },
+ { 0x1.54a6b679dd96fp51, 0x1.31aa564e92066p55 },
+ { 0x1.307d4e71272d7p51, 0x1.11831a9c3763dp55 },
+ { 0x1.1022313b11381p51, 0x1.e96c265c21fbfp54 },
+ { 0x1.e65f78e13edcdp50, 0x1.b5d52c19374fep54 },
+ { 0x1.b2959e487c93fp50, 0x1.87a2188252d5fp54 },
+ { 0x1.84436cf62b6f8p50, 0x1.5e440cc8caaf9p54 },
+ { 0x1.5ad66c67f3f63p50, 0x1.393ad199301dep54 },
+ { 0x1.35cb549c616ebp50, 0x1.18135a0647102p54 },
+ { 0x1.14ac7e9322a1ap50, 0x1.f4ccd98eab06bp53 },
+ { 0x1.ee20fae75a2c5p49, 0x1.bfaedff2748c1p53 },
+ { 0x1.b931b883c77f2p49, 0x1.9026a7e3c9538p53 },
+ { 0x1.89e1f8e1d4be6p49, 0x1.659f3419269eep53 },
+ { 0x1.5f9a24050e89fp49, 0x1.3f92e9472ca4cp53 },
+ { 0x1.39d2746cbe57fp49, 0x1.1d89fb6602df9p53 },
+ { 0x1.18115431b6c4ap49, 0x1.fe32077e095c4p52 },
+ { 0x1.f3d3ca19edf64p48, 0x1.c7bf775863df5p52 },
+ { 0x1.bdf55dd9bdcep48, 0x1.970fb0b5580dcp52 },
+ { 0x1.8dd8e25d2255dp48, 0x1.6b88087e4af9fp52 },
+ { 0x1.62e225ebca19p48, 0x1.449de67f2c6b2p52 },
+ { 0x1.3c855ef212badp48, 0x1.21d51dc348d4dp52 },
+ { 0x1.1a4576cd5cddcp48, 0x1.02be7023a443ep52 },
+ { 0x1.f765035c713d8p47, 0x1.cdec7155697e1p51 },
+ { 0x1.c0d0bdeb46ae2p47, 0x1.9c4671c1a6e3cp51 },
+ { 0x1.901afbd3819bep47, 0x1.6feb0af26f865p51 },
+ { 0x1.64a386137b955p47, 0x1.484b1e63b3be4p51 },
+ { 0x1.3ddb15521ce49p47, 0x1.24e68a1458bd7p51 },
+ { 0x1.1b418ba2217c6p47, 0x1.054a9a7c2f05ap51 },
+ { 0x1.f8c8bad8e2a2p46, 0x1.d2214ad33ca5ep50 },
+ { 0x1.c1ba4950b8f4fp46, 0x1.9fb9933adac68p50 },
+ { 0x1.90a0b40dd690cp46, 0x1.72b99eccc462ep50 },
+ { 0x1.64d860502b279p46, 0x1.4a8e4dbe3539cp50 },
+ { 0x1.3dcf1aadc099dp46, 0x1.26b4018ef81f7p50 },
+ { 0x1.1b02414a73357p46, 0x1.06b4fe82cc6aep50 },
+ { 0x1.f7fa3e4bec2aep45, 0x1.d44feffb34893p49 },
+ { 0x1.c0aee6d6b1406p45, 0x1.a15d86bb23572p49 },
+ { 0x1.8f684065398bfp45, 0x1.73ea5ac0d71a9p49 },
+ { 0x1.637ff9397e989p45, 0x1.4b5fdd0f567fap49 },
+ { 0x1.3c618d3c706ebp45, 0x1.2737769828878p49 },
+ { 0x1.1988625955723p45, 0x1.06f8da87263cep49 },
+ { 0x1.f4fc2f6d50e41p44, 0x1.d4710a9e149edp48 },
+ { 0x1.bdb204ff1cda3p44, 0x1.a12cc7b1bf616p48 },
+ { 0x1.8c75a6fa17116p44, 0x1.73793d6253bd7p48 },
+ { 0x1.609ec277b8703p44, 0x1.4abd0af44c7f8p48 },
+ { 0x1.399725d96eb63p44, 0x1.266f2e981ccfbp48 },
+ { 0x1.16d8d1241b86bp44, 0x1.06154a07d21a2p48 },
+ { 0x1.efd875a51d28dp43, 0x1.d2842b40e25fp47 },
+ { 0x1.b8cd873c4de72p43, 0x1.9f27fa465d061p47 },
+ { 0x1.87d2a89e5ac65p43, 0x1.7167c3937ded9p47 },
+ { 0x1.5c3e42539c769p43, 0x1.48a7fb96552cap47 },
+ { 0x1.35791e04cd29fp43, 0x1.245dcbaa25b1bp47 },
+ { 0x1.12fc6cdafd10dp43, 0x1.040d4ab2de626p47 },
+ { 0x1.e8a0077a1ed47p42, 0x1.ce8fcb8dadc2cp46 },
+ { 0x1.b2118f75a4eb7p42, 0x1.9b55e7c11d9e6p46 },
+ { 0x1.818e8b1c2616fp42, 0x1.6dbce02ec5c77p46 },
+ { 0x1.566cdf4525ebp42, 0x1.4527acab6dfebp46 },
+ { 0x1.3014fd204bc71p42, 0x1.210a3ddcb4706p46 },
+ { 0x1.0dffe0bfc0c74p42, 0x1.00e7aba6527c9p46 },
+ { 0x1.df6a8d5e14f11p41, 0x1.c8a12a152d814p45 },
+ { 0x1.a9942579915cdp41, 0x1.95c35893651c9p45 },
+ { 0x1.79bdc576e403ap41, 0x1.6884d52cc9914p45 },
+ { 0x1.4f3d9114d799bp41, 0x1.4047ce663f641p45 },
+ { 0x1.297c4e6eb62fcp41, 0x1.1c7f9c74f3e7cp45 },
+ { 0x1.07f35ef1a4fcp41, 0x1.f95dcee779f74p44 },
+ { 0x1.d455e0a3b0d94p40, 0x1.c0cc007cc808ep44 },
+ { 0x1.9f70bf04a77cep40, 0x1.8e82cd2a6133cp44 },
+ { 0x1.707990a8defefp40, 0x1.61d0ef76712e4p44 },
+ { 0x1.46c779ebb14aep40, 0x1.3a1882865d26ep44 },
+ { 0x1.21c4420bc9879p40, 0x1.16cce86450b2p44 },
+ { 0x1.00ea48df1e7fbp40, 0x1.eee1d41e1e516p43 },
+ { 0x1.c7856a7693627p39, 0x1.b72a1658393d4p43 },
+ { 0x1.93c7abef59a2cp39, 0x1.85ac17b553c4fp43 },
+ { 0x1.65df602b1e0ffp39, 0x1.59b72775450f3p43 },
+ { 0x1.3d256a5ee461dp39, 0x1.32ae03812fcp43 },
+ { 0x1.19053bac5f645p39, 0x1.1004b9cd4bae6p43 },
+ { 0x1.f1f58fe66e142p38, 0x1.e27d88d5289bfp42 },
+ { 0x1.b9216793da422p38, 0x1.abdab3fb224cep42 },
+ { 0x1.86bd6adace04ep38, 0x1.7b5bd9f52a89ep42 },
+ { 0x1.5a104640aeb74p38, 0x1.5051a941eb13p42 },
+ { 0x1.32755417b50ddp38, 0x1.2a20366f6a0dep42 },
+ { 0x1.0f5a5274f5c45p38, 0x1.083cdb1163405p42 },
+ { 0x1.e07ab300dc4b9p37, 0x1.d458a013d18b4p41 },
+ { 0x1.a956163a49613p37, 0x1.9f01f97b2e043p41 },
+ { 0x1.7879eb52380edp37, 0x1.6fb2eaf7d8102p41 },
+ { 0x1.4d30488394e18p37, 0x1.45be480207b14p41 },
+ { 0x1.26d7af2869fc5p37, 0x1.208a2b041836ep41 },
+ { 0x1.04e0c593552f5p37, 0x1.ff1ba8cbc9c8dp40 },
+ { 0x1.cd98a274acae3p36, 0x1.c49f8a8ec4aebp40 },
+ { 0x1.9852d44d7528bp36, 0x1.90c81ede57558p40 },
+ { 0x1.6927c2c3e497p36, 0x1.62d5a948b6358p40 },
+ { 0x1.3f65a98c177c9p36, 0x1.3a1de0952fd2bp40 },
+ { 0x1.1a6ed66936eeap36, 0x1.16098d4b94692p40 },
+ { 0x1.f36ed3084aa81p35, 0x1.ec24d6a8bc072p39 },
+ { 0x1.b986ab7ebdd54p35, 0x1.b3828ebcc128bp39 },
+ { 0x1.864933f3c0573p35, 0x1.8158a3038115ep39 },
+ { 0x1.58f359f0c4e8fp35, 0x1.54eb3e9a3e72bp39 },
+ { 0x1.30d82cb8a968cp35, 0x1.2d93b0174f61ap39 },
+ { 0x1.0d5e5f59de7c1p35, 0x1.0abe0d45fd5c2p39 },
+ { 0x1.dbfc240ab5f81p34, 0x1.d7ce33a39bd89p38 },
+ { 0x1.a47db588b15cfp34, 0x1.a134d30d655e4p38 },
+ { 0x1.736c0d0a31187p34, 0x1.70e16f315ef4p38 },
+ { 0x1.480a1879e8f57p34, 0x1.461cda38e2783p38 },
+ { 0x1.21b0591ce1cfdp34, 0x1.2044a2faebb7bp38 },
+ { 0x1.ff94e3fca1752p33, 0x1.fd91813f8cc8cp37 },
+ { 0x1.c3a9f9558ffap33, 0x1.c2530177987fep37 },
+ { 0x1.8eb738c76b2f2p33, 0x1.8deb61106f334p37 },
+ { 0x1.5fee91a43fef1p33, 0x1.5f91f55e86346p37 },
+ { 0x1.3699940a6a811p33, 0x1.3694e7b13691bp37 },
+ { 0x1.1216c07263dep33, 0x1.1256a18de488bp37 },
+ { 0x1.e3ae49fef5535p32, 0x1.e49705a5ebd5fp36 },
+ { 0x1.aab87fb8e4441p32, 0x1.abefb3186e784p36 },
+ { 0x1.786c3dca158c4p32, 0x1.79dc285401b7dp36 },
+ { 0x1.4c036b7451223p32, 0x1.4d9a4f359ba1ep36 },
+ { 0x1.24cec8453db03p32, 0x1.267e46fd85893p36 },
+ { 0x1.02334e92993b9p32, 0x1.03efdea0a0506p36 },
+ { 0x1.c74fc41217dfbp31, 0x1.cad0afbb569b1p35 },
+ { 0x1.9166837399532p31, 0x1.94e0d5e7a8744p35 },
+ { 0x1.61d46c11dd916p31, 0x1.653d077d9eefp35 },
+ { 0x1.37dbe7711fcd4p31, 0x1.3b2a639494566p35 },
+ { 0x1.12d55c1e73c65p31, 0x1.16038b4af0a0ep35 },
+ { 0x1.e4594b115943bp30, 0x1.ea6c598920c48p34 },
+ { 0x1.aabdabdb93484p30, 0x1.b081aaf25ade1p34 },
+ { 0x1.77f073eb945dfp30, 0x1.7d62079a4e4a6p34 },
+ { 0x1.4b252d0bc8bebp30, 0x1.5042e1a8664edp34 },
+ { 0x1.23a7345c57ccap30, 0x1.287117d29a9e6p34 },
+ { 0x1.00d6f8a57f06ep30, 0x1.054e44f8ee735p34 },
+ { 0x1.c44f136cf3bd8p29, 0x1.cc9cbc5fe04a8p33 },
+ { 0x1.8e38df2790b7ap29, 0x1.95eb2cb828067p33 },
+ { 0x1.5e8f828661e21p29, 0x1.65acfefcd0029p33 },
+ { 0x1.3490e7e2bc31cp29, 0x1.3b20c56ad84f5p33 },
+ { 0x1.0f91b7ff9bb2ap29, 0x1.159b917beb87ap33 },
+ { 0x1.ddf56913a541ep28, 0x1.e90cb5cac7057p32 },
+ { 0x1.a48cc1b8a7bc7p28, 0x1.aeb7659e5f7efp32 },
+ { 0x1.71fde01e2ca8cp28, 0x1.7b4b752e86e5fp32 },
+ { 0x1.4578e0b906b32p28, 0x1.4df8ace15322ep32 },
+ { 0x1.1e4659a2a2156p28, 0x1.26072a17961ap32 },
+ { 0x1.f788fc218597bp27, 0x1.02d48c75e7d9bp32 },
+ { 0x1.bac92daac0b9dp27, 0x1.c7a2ecd5f05ap31 },
+ { 0x1.85518c3484796p27, 0x1.90feaede7f2aep31 },
+ { 0x1.56441b55bfff1p27, 0x1.60dcef1cedc3ap31 },
+ { 0x1.2cdd203ab43a1p27, 0x1.36787980e7387p31 },
+ { 0x1.08700c199ad4fp27, 0x1.112346e13dd7ep31 },
+ { 0x1.d0c9857c390f3p26, 0x1.e087915129a98p30 },
+ { 0x1.986a650394095p26, 0x1.a6a5096da5b7dp30 },
+ { 0x1.66d6688315ad6p26, 0x1.73aff07c7874ep30 },
+ { 0x1.3b3d55ebd8547p26, 0x1.46d572e10e216p30 },
+ { 0x1.14e7b714e7093p26, 0x1.1f5ba17e5a90bp30 },
+ { 0x1.e667d9a8bcd9ep25, 0x1.f93d0d186fbcdp29 },
+ { 0x1.ab2733e383ad8p25, 0x1.bc1b22cec72bp29 },
+ { 0x1.7712b76c8c7f6p25, 0x1.86529e9df069cp29 },
+ { 0x1.494d8e1d4fc61p25, 0x1.5702d052bf73ap29 },
+ { 0x1.2115447c6627dp25, 0x1.2d65aee08874cp29 },
+ { 0x1.fb7d503fc65c8p24, 0x1.08ccb49580d43p29 },
+ { 0x1.bd660913b938cp24, 0x1.d13c32a98512bp28 },
+ { 0x1.86db66e158524p24, 0x1.98a4bfd5a5fadp28 },
+ { 0x1.56f3ed5aa4222p24, 0x1.66e459a7794f4p28 },
+ { 0x1.2ce2265a96befp24, 0x1.3b28bbce3c1c6p28 },
+ { 0x1.07f14a8d0c116p24, 0x1.14b8b6b67144ep28 },
+ { 0x1.cf049ebedf60dp23, 0x1.e5e26dbef0e28p27 },
+ { 0x1.96129ca292f7ep23, 0x1.aa854b5c4f131p27 },
+ { 0x1.6416763f6b3bcp23, 0x1.765d329106241p27 },
+ { 0x1.3837bf030f4a8p23, 0x1.488b9479ee1c4p27 },
+ { 0x1.11b82880134f9p23, 0x1.204c8d940530bp27 },
+ { 0x1.dfe0c1b8af1f3p22, 0x1.f9e77238e0031p26 },
+ { 0x1.a49aa1651cfcap22, 0x1.bbd2c8fd7e193p26 },
+ { 0x1.709b5a3a79128p22, 0x1.85502f16a0f8dp26 },
+ { 0x1.42ffa7e9ace3fp22, 0x1.5574ceffe3945p26 },
+ { 0x1.1affd2eccd616p22, 0x1.2b72182c97af5p26 },
+ { 0x1.efd8be43ac9a9p21, 0x1.06925da53a0fcp26 },
+ { 0x1.b2564005de7e5p21, 0x1.cc6bb6d71090dp25 },
+ { 0x1.7c694cd2b4ffdp21, 0x1.93a02d0c97221p25 },
+ { 0x1.4d23fa69bd814p21, 0x1.61cb1a027e057p25 },
+ { 0x1.23b556e6e918ep21, 0x1.361358dd1f243p25 },
+ { 0x1.fecbcf04dca9p20, 0x1.0fba0d2660d89p25 },
+ { 0x1.bf29264dcdc82p20, 0x1.dc2ef387bd0ep24 },
+ { 0x1.8767d7fc43eb6p20, 0x1.a130711aadcdap24 },
+ { 0x1.568f9937abc79p20, 0x1.6d758e1ac9659p24 },
+ { 0x1.2bc67d8c20136p20, 0x1.401abca024479p24 },
+ { 0x1.064d4616b0094p20, 0x1.185819a7f8c6ap24 },
+ { 0x1.caf8458ad2a12p19, 0x1.eafc2b00a99b1p23 },
+ { 0x1.917faff93e54p19, 0x1.ade505ba61e89p23 },
+ { 0x1.5f2e79283b1cap19, 0x1.785c00b5cb27ep23 },
+ { 0x1.33220b1da4f59p19, 0x1.4973634932c1ap23 },
+ { 0x1.0c93ac678b0ccp19, 0x1.205a7d78be568p23 },
+ { 0x1.d5aa313452daep18, 0x1.f8b4440d68221p22 },
+ { 0x1.9a9b05368c88bp18, 0x1.b9a31a7b9868cp22 },
+ { 0x1.66ede7f0c2d55p18, 0x1.826756e1a42e2p22 },
+ { 0x1.39b7fc18e5891p18, 0x1.5209676e4b424p22 },
+ { 0x1.122b662569616p18, 0x1.27b019965e362p22 },
+ { 0x1.df2779ceabfc8p17, 0x1.029ce648133fdp22 },
+ { 0x1.a2a5d2945d2b7p17, 0x1.c45161cd95fe8p21 },
+ { 0x1.6dbccf848794ap17, 0x1.8b81d680cdfc5p21 },
+ { 0x1.3f79bf21caa96p17, 0x1.59ca24a7521ddp21 },
+ { 0x1.17080ae674896p17, 0x1.2e48f266999cfp21 },
+ { 0x1.e75b024885f54p16, 0x1.0838b13324d03p21 },
+ { 0x1.a98e26924c6c8p16, 0x1.cdd86b83e679dp20 },
+ { 0x1.738bf4bc8d296p16, 0x1.93977456406ddp20 },
+ { 0x1.445a6a9a273c6p16, 0x1.60a47aca18e96p20 },
+ { 0x1.1b1eabeffc3a5p16, 0x1.341669953fe1cp20 },
+ { 0x1.ee324e1fde417p15, 0x1.0d210b765b3d6p20 },
+ { 0x1.af4465e9c5668p15, 0x1.d622fa53c02cep19 },
+ { 0x1.784e3008fb46bp15, 0x1.9a961d6383ef7p19 },
+ { 0x1.484eecd2f1383p15, 0x1.66890cd0bf55fp19 },
+ { 0x1.1e65fd1ef2701p15, 0x1.390b73f2a4fbp19 },
+ { 0x1.f39dc6baaccd7p14, 0x1.114ae59581395p19 },
+ { 0x1.b3bb863d26278p14, 0x1.dd1e5296953a3p18 },
+ { 0x1.7bf89f052b591p14, 0x1.a06dfa21b6c59p18 },
+ { 0x1.4b4e35dbe0cddp14, 0x1.6b6a7a27c9005p18 },
+ { 0x1.20d6781986167p14, 0x1.3d1cca3d4f6d8p18 },
+ { 0x1.f790f6877f51ep13, 0x1.14acc164c64fep18 },
+ { 0x1.b6e93fa7299b3p13, 0x1.e2ba80b9c3a1bp17 },
+ { 0x1.7e82cde922833p13, 0x1.a511aa3827999p17 },
+ { 0x1.4d515a14a6132p13, 0x1.6f3d9139319edp17 },
+ { 0x1.226a790f97768p13, 0x1.404113d7d18e6p17 },
+ { 0x1.fa02b8ac73416p12, 0x1.173ed60fcd6fap17 },
+ { 0x1.b8c634233722p12, 0x1.e6ea95e92c624p16 },
+ { 0x1.7fe6d7fbcef2cp12, 0x1.a8767775dd309p16 },
+ { 0x1.4e53acc7531b1p12, 0x1.71f97a2983044p16 },
+ { 0x1.231e547065724p12, 0x1.42710a88aab19p16 },
+ { 0x1.faed5c4559717p11, 0x1.18fb2ded8ebb1p16 },
+ { 0x1.b94e0bfb59934p11, 0x1.e9a4d9b21386ep15 },
+ { 0x1.80217e57d8a3fp11, 0x1.aa947efe69879p15 },
+ { 0x1.4e52d23cf50bp11, 0x1.7397d8e2bd385p15 },
+ { 0x1.22f0652094ae6p11, 0x1.43a79684f6ef6p15 },
+ { 0x1.fa4eba730bf6p10, 0x1.19ddbd8138a9p15 },
+ { 0x1.b87f86a26fad7p10, 0x1.eae2ef93df996p14 },
+ { 0x1.7f323487ff94ap10, 0x1.ab66cfccafb75p14 },
+ { 0x1.4d4ec8ea8ee67p10, 0x1.7414e5b5ca43cp14 },
+ { 0x1.21e112e39bf18p10, 0x1.43e1e22ebfdb4p14 },
+ { 0x1.f8283ec45f117p9, 0x1.19e4732be2ffp14 },
+ { 0x1.b65c7f9f1fbedp9, 0x1.eaa1efb3b003ep13 },
+ { 0x1.7d1b22b6810f6p9, 0x1.aaeb7de6855e2p13 },
+ { 0x1.4b49e984886ep9, 0x1.736f7c0d13f06p13 },
+ { 0x1.1ff2d0d5a2649p9, 0x1.431f651be2ff4p13 },
+ { 0x1.f47ee1cab73ddp8, 0x1.190f3f39e9af4p13 },
+ { 0x1.b2e9e76c8d9f9p8, 0x1.e8e2722ca46cfp12 },
+ { 0x1.79e11d635b9a7p8, 0x1.a923a9d8d5019p12 },
+ { 0x1.4848ddf7dfffep8, 0x1.71a91ee04e82cp12 },
+ { 0x1.1d2a13fdd2709p8, 0x1.4161e6298ed3ap12 },
+ { 0x1.ef5b15f73200ap7, 0x1.176014201ab17p12 },
+ { 0x1.ae2fb07705cc3p7, 0x1.e5a88cbf394e4p11 },
+ { 0x1.758b92cdfdc64p7, 0x1.a6137c537bf6dp11 },
+ { 0x1.44528f79b1b51p7, 0x1.6ec5f2d1367f4p11 },
+ { 0x1.198d422be3f8cp7, 0x1.3ead7491061afp11 },
+ { 0x1.e8c8a7276c93p6, 0x1.14dadee76975ap11 },
+ { 0x1.a838b09afcf62p6, 0x1.e0fbc2ec572b9p10 },
+ { 0x1.70246e766d2f3p6, 0x1.a1c215fcd0beap10 },
+ { 0x1.3f700c0d99876p6, 0x1.6accae115453ep10 },
+ { 0x1.1524997d01ap6, 0x1.3b08582357e32p10 },
+ { 0x1.e0d68d9047f7ap5, 0x1.118577f06b2f2p10 },
+ { 0x1.a11277ca2bd3fp5, 0x1.dae6e8d292a1ep9 },
+ { 0x1.69b7f34ec048ep5, 0x1.9c3973d4c9b08p9 },
+ { 0x1.39ac6410ceb63p5, 0x1.65c67e684d1e6p9 },
+ { 0x1.0ffa110b113fp5, 0x1.367af901b137p9 },
+ { 0x1.d796b4f7aaf7fp4, 0x1.0d678c614f535p9 },
+ { 0x1.98cd1cb38dccp4, 0x1.d377f96b9fd62p8 },
+ { 0x1.62548d6675835p4, 0x1.958648bd6035p8 },
+ { 0x1.331480815e7cdp4, 0x1.5fbee5e7590f4p8 },
+ { 0x1.0a19336cc73a1p4, 0x1.310fbf558eca2p8 },
+ { 0x1.cd1db96a6c6efp3, 0x1.088a80b837328p8 },
+ { 0x1.8f7b007e1de49p3, 0x1.cabfe10b3371ap7 },
+ { 0x1.5a0a9c047e3c7p3, 0x1.8db7ccf7600f4p7 },
+ { 0x1.2bb6f2dd8e254p3, 0x1.58c38f07b7c3bp7 },
+ { 0x1.038ef3cbdc1c7p3, 0x1.2ad2ebb6268bdp7 },
+ { 0x1.c1829acfb62b3p2, 0x1.02f94d1fb1ba4p7 },
+ { 0x1.85308ad209551p2, 0x1.c0d23d3daadadp6 },
+ { 0x1.50ec3549a202dp2, 0x1.84df8496cc3aep6 },
+ { 0x1.23a3bf963c1ebp2, 0x1.50e4191e1b76cp6 },
+ { 0x1.f8d2fce0ebb41p1, 0x1.23d2690dc7344p6 },
+ { 0x1.b4de68e608347p1, 0x1.f980a88588961p5 },
+ { 0x1.7a03df8f9f479p1, 0x1.b5c5135a44acbp5 },
+ { 0x1.470ce4924af72p1, 0x1.7b10fe1f0aeaap5 },
+ { 0x1.1aec242758b4fp1, 0x1.4831de32e25bdp5 },
+ { 0x1.e9700b697ec96p0, 0x1.1c1d98f1b1f71p5 },
+ { 0x1.a74be9568f922p0, 0x1.ebda6af103d07p4 },
+ { 0x1.6e0c8fadbb05p0, 0x1.a9b07f491a273p4 },
+ { 0x1.3c8164e42f29cp0, 0x1.70618a9c019dap4 },
+ { 0x1.11a259faba91ep0, 0x1.3ebfb36da371bp4 },
+ { 0x1.d91518c2acaf6p-1, 0x1.13c51b7852ecp4 },
+ { 0x1.98e739a118b5ep-1, 0x1.dd1d36683753bp3 },
+ { 0x1.616346ca3be0ep-1, 0x1.9cae5c1f5de61p3 },
+ { 0x1.315f58c13df9cp-1, 0x1.64e7f0a95542fp3 },
+ { 0x1.07d957435b8c4p-1, 0x1.34a1a5595e9cbp3 },
+ { 0x1.c7e35cf4db634p-2, 0x1.0ada93ac2688ep3 },
+ { 0x1.89cd6ead31b71p-2, 0x1.cd680d6a376d2p2 },
+ { 0x1.542176fe1c2b2p-2, 0x1.8ed9e84be9bacp2 },
+ { 0x1.25bd00bd97eddp-2, 0x1.58bc1beb8e117p2 },
+ { 0x1.fb491e02b7c15p-3, 0x1.29ecb15514182p2 },
+ { 0x1.b5fcd30c7e1f6p-3, 0x1.017069c4b54cfp2 },
+ { 0x1.7a1c33cc1922bp-3, 0x1.bcdb33f7b88f9p1 },
+ { 0x1.46610483f2395p-3, 0x1.804f671a7a35cp1 },
+ { 0x1.19b0f23241b88p-3, 0x1.4bf6ca87a4707p1 },
+ { 0x1.e62f62b4555dcp-4, 0x1.1eb67d8a75351p1 },
+ { 0x1.a383ca9f98a0fp-4, 0x1.ef3318a5788dep0 },
+ { 0x1.69f16aeb3677p-4, 0x1.ab97c2106c4d2p0 },
+ { 0x1.383bf2b37a037p-4, 0x1.712bc1550fb6ap0 },
+ { 0x1.0d51cf5a16254p-4, 0x1.3eb13a24821e2p0 },
+ { 0x1.d08cdac87dce6p-5, 0x1.131510c1da6adp0 },
+ { 0x1.909a7c3ac6f99p-5, 0x1.dad26311e9efp-1 },
+ { 0x1.596acfa0bcc8fp-5, 0x1.99bf36c7ef068p-1 },
+ { 0x1.29cc13bfd53ap-5, 0x1.618c26c1169a6p-1 },
+ { 0x1.00b60212cf113p-5, 0x1.3104d5f799552p-1 },
+ { 0x1.ba886ae6e40ep-6, 0x1.071e8b6003b16p-1 },
+ { 0x1.7d62a282a4851p-6, 0x1.c5e5338097f6bp-2 },
+ { 0x1.48a59e9cb1eb1p-6, 0x1.87730de08c821p-2 },
+ { 0x1.1b2abc895a771p-6, 0x1.518db221cf8bap-2 },
+ { 0x1.e7e6f4c33ededp-7, 0x1.230ae74a714aap-2 },
+ { 0x1.a4480db60fe17p-7, 0x1.f5d1c58fdc6acp-3 },
+ { 0x1.69fd19aacb90ap-7, 0x1.b091a88a72f08p-3 },
+ { 0x1.37be42e1159e7p-7, 0x1.74d459ba38afep-3 },
+ { 0x1.0c707db025298p-7, 0x1.414d114bdcde1p-3 },
+ { 0x1.ce3ee3757dbe5p-8, 0x1.14dc49cbc0c3p-3 },
+ { 0x1.8df06bfb34f6dp-8, 0x1.dd13408401cdcp-4 },
+ { 0x1.568986affafc5p-8, 0x1.9afd0eca1593dp-4 },
+ { 0x1.26d009f5af049p-8, 0x1.6203633a6814ap-4 },
+ { 0x1.fb69c5d6b524ep-9, 0x1.30e632b0008c9p-4 },
+ { 0x1.b49c67cd1611fp-9, 0x1.069124dc6eaefp-4 },
+ { 0x1.77a47ec4e9fa1p-9, 0x1.c42b48d5cfe42p-5 },
+ { 0x1.43260788f0a1fp-9, 0x1.854b792c33d4ap-5 },
+ { 0x1.15f4e018a09eep-9, 0x1.4f1f511f7b2d7p-5 },
+ { 0x1.de1c72f739a49p-10, 0x1.2073f996519cp-5 },
+ { 0x1.9b25dc6d6642ep-10, 0x1.f08155c194aadp-6 },
+ { 0x1.61853cc8eddacp-10, 0x1.ab41e011814e5p-6 },
+ { 0x1.2feeed430b87bp-10, 0x1.6f9f62ec4193ap-6 },
+ { 0x1.05451535e8102p-10, 0x1.3c45d7f9e2fbp-6 },
+ { 0x1.c122bcbda7f8ep-11, 0x1.100ffa10ff0f3p-6 },
+ { 0x1.81ff0b26f3b6ap-11, 0x1.d401bee3a7787p-7 },
+ { 0x1.4bb153d2d0728p-11, 0x1.927ce5fbbe352p-7 },
+ { 0x1.1cfe80beb05a4p-11, 0x1.5a195c6e2a08ep-7 },
+ { 0x1.e9ae566e02486p-12, 0x1.2992f3c7d2ce7p-7 },
+ { 0x1.a4a3297375461p-12, 0x1.ffa47aef63bd2p-8 },
+ { 0x1.6948e77b6c537p-12, 0x1.b7ccca35ce88ep-8 },
+ { 0x1.3644eed5b1126p-12, 0x1.79ffc3cd6bc92p-8 },
+ { 0x1.0a6cd27d913d7p-12, 0x1.44d7c3dca9cc8p-8 },
+ { 0x1.c97f5c053e775p-13, 0x1.1720abf01aa9bp-8 },
+ { 0x1.88c0c973b68fcp-13, 0x1.dfa22008cf2c8p-9 },
+ { 0x1.512157ee1d8bep-13, 0x1.9c08a63df00dcp-9 },
+ { 0x1.215988e86b086p-13, 0x1.61eb258af5a93p-9 },
+ { 0x1.f09f2b684fb31p-14, 0x1.2ff68a28f7dc4p-9 },
+ { 0x1.aa222a98ba953p-14, 0x1.0506e21782262p-9 },
+ { 0x1.6d9b06046eb66p-14, 0x1.c041afe3a1ad2p-10 },
+ { 0x1.39a30e3030664p-14, 0x1.80d8271e40929p-10 },
+ { 0x1.0d05cd2b64652p-14, 0x1.4a5cc1e67b046p-10 },
+ { 0x1.cd740d2318d4dp-15, 0x1.1b8f04bdfa1bfp-10 },
+ { 0x1.8bb7603d9828p-15, 0x1.e6b65816f0ff1p-11 },
+ { 0x1.534d810db5377p-15, 0x1.a1a7ec86c94fbp-11 },
+ { 0x1.22e56de90dc1ap-15, 0x1.665a9398034f1p-11 },
+ { 0x1.f2bb06a7069e2p-16, 0x1.336f30c8d3345p-11 },
+ { 0x1.ab79b6edb04e1p-16, 0x1.07b7cbf13abf4p-11 },
+ { 0x1.6e5b33b150249p-16, 0x1.c461717dacbd8p-12 },
+ { 0x1.39f005226a7dbp-16, 0x1.83f56253c12f1p-12 },
+ { 0x1.0cfc8192e69bdp-16, 0x1.4cab82baddd6cp-12 },
+ { 0x1.cce310b024fd4p-17, 0x1.1d39d04e50424p-12 },
+ { 0x1.8acc81455f971p-17, 0x1.e9094beff3587p-13 },
+ { 0x1.522570529739fp-17, 0x1.a3308036822dbp-13 },
+ { 0x1.219685023e1bep-17, 0x1.67464f8a36affp-13 },
+ { 0x1.eff1f945e7f7bp-18, 0x1.33e2c9c277148p-13 },
+ { 0x1.a89fa515a2b44p-18, 0x1.07d0b7bb52fc7p-13 },
+ { 0x1.6b83bb4ee4348p-18, 0x1.c40cfbd11fd1p-14 },
+ { 0x1.372982e2fde1dp-18, 0x1.833ffa698fa8bp-14 },
+ { 0x1.0a51297b20ab7p-18, 0x1.4bb29dadf3acp-14 },
+ { 0x1.c7d093fb7e463p-19, 0x1.1c147957723bdp-14 },
+ { 0x1.8607006600009p-19, 0x1.e6896f5762306p-15 },
+ { 0x1.4db1c7b733812p-19, 0x1.a096cc3260668p-15 },
+ { 0x1.1d76959a6b622p-19, 0x1.64a7647d3f88ap-15 },
+ { 0x1.e858d8b3acc8p-20, 0x1.314deba7bab37p-15 },
+ { 0x1.a1a94b14e3d7fp-20, 0x1.0550e92636252p-15 },
+ { 0x1.6529df3d1cf1cp-20, 0x1.bf46cd0f972c3p-16 },
+ { 0x1.316449a955429p-20, 0x1.7ebd49fbb30eep-16 },
+ { 0x1.0517b9e1f89dep-20, 0x1.47796af08285bp-16 },
+ { 0x1.be627dddb55d7p-21, 0x1.1827a73755ec7p-16 },
+ { 0x1.7d8a7f2a8a2dp-21, 0x1.df49a10ccc568p-17 },
+ { 0x1.4613bf000c71dp-21, 0x1.99ee7037b652bp-17 },
+ { 0x1.16a45fcb7b882p-21, 0x1.5e9197017791dp-17 },
+ { 0x1.dc283bcbe780fp-22, 0x1.2bc40c543e36bp-17 },
+ { 0x1.96ca751cac37fp-22, 0x1.004b34180a4a9p-17 },
+ { 0x1.5b7cd13179ddep-22, 0x1.b632d58444fadp-18 },
+ { 0x1.28cb2cb8b4015p-22, 0x1.768f3e13d3bdcp-18 },
+ { 0x1.faedd62dabd96p-23, 0x1.401fa7657909ep-18 },
+ { 0x1.b0de982dbf111p-23, 0x1.1190d162109abp-18 },
+ { 0x1.7195b2becea19p-23, 0x1.d3803e22a78e4p-19 },
+ { 0x1.3b8387eea3f9dp-23, 0x1.8f694ad8ac632p-19 },
+ { 0x1.0d521f8291cd6p-23, 0x1.55326d6aac6fap-19 },
+ { 0x1.cbb9be9cbac1ep-24, 0x1.236e8d3a9e0e7p-19 },
+ { 0x1.8852e54d26542p-24, 0x1.f1ca221c0b98bp-20 },
+ { 0x1.4ec36b8fdf428p-24, 0x1.a914b62872bc3p-20 },
+ { 0x1.1d9d0055d11dp-24, 0x1.6af2ae42db58p-20 },
+ { 0x1.e74cb7ebdea0ap-25, 0x1.35dbe86ed95c7p-20 },
+ { 0x1.9fa735b03463ap-25, 0x1.0880cfe68041ep-20 },
+ { 0x1.627f6220ca6a9p-25, 0x1.c3847cbf78a3bp-21 },
+ { 0x1.2e4d9d8b5b22fp-25, 0x1.81550cf271bfdp-21 },
+ { 0x1.01c325e8bb3cp-25, 0x1.48cefa0aac509p-21 },
+ { 0x1.b783bc148fcefp-26, 0x1.188ab9ce5fdddp-21 },
+ { 0x1.76aa8791eba33p-26, 0x1.dea9996bf1c0fp-22 },
+ { 0x1.3f58d390caeecp-26, 0x1.984c7bb9c53ffp-22 },
+ { 0x1.10299f255a2cap-26, 0x1.5c3c6ce5f2f75p-22 },
+ { 0x1.cfd7e08a13b2p-27, 0x1.28f8faa7c3202p-22 },
+ { 0x1.8b368e0429dacp-27, 0x1.fa7304087353p-23 },
+ { 0x1.50b2501707be6p-27, 0x1.afca3c464e1d5p-23 },
+ { 0x1.1ecf2c897b782p-27, 0x1.701780b38d71ap-23 },
+ { 0x1.e891642306feep-28, 0x1.39c08dab159ep-23 },
+ { 0x1.a013c6709bdd5p-28, 0x1.0b66dac93672bp-23 },
+ { 0x1.624c9a2f2f8fcp-28, 0x1.c7bde43ebd873p-24 },
+ { 0x1.2da83d59392f5p-28, 0x1.84520ec5eb55ap-24 },
+ { 0x1.00ce3767b77a8p-28, 0x1.4ad54236cf6b4p-24 },
+ { 0x1.b5312d520a3f4p-29, 0x1.19d258cf47194p-24 },
+ { 0x1.74191dcab90bcp-29, 0x1.e015665e4efbdp-25 },
+ { 0x1.3ca855a30dad5p-29, 0x1.98dc92b26aeap-25 },
+ { 0x1.0d71d1069e44fp-29, 0x1.5c29c3e79c162p-25 },
+ { 0x1.ca7c7b61a5357p-30, 0x1.28708aaed4d7p-25 },
+ { 0x1.86083aaabaf73p-30, 0x1.f8bd2046619b5p-26 },
+ { 0x1.4bc21b880f9dep-30, 0x1.ada636f165959p-26 },
+ { 0x1.1a28183b0e32p-30, 0x1.6dafa60f704a1p-26 },
+ { 0x1.dfe23a6ad4f8bp-31, 0x1.37351629c53cp-26 },
+ { 0x1.980956bea8ccp-31, 0x1.08cff68f5874cp-26 },
+ { 0x1.5ae767663002ep-31, 0x1.c29ce58c1fc1p-27 },
+ { 0x1.26e4fd1165b76p-31, 0x1.7f5772973d16cp-27 },
+ { 0x1.f54dde2ba8f56p-32, 0x1.4612c5674eed9p-27 },
+ { 0x1.aa0af3e698b26p-32, 0x1.15539e864d70fp-27 },
+ { 0x1.6a0956d7d1b63p-32, 0x1.d7ad5cdc3741ep-28 },
+ { 0x1.339bd6e517d44p-32, 0x1.9110bc4b50f8cp-28 },
+ { 0x1.0554f0943ba8cp-32, 0x1.54fb970dbe54ep-28 },
+ { 0x1.bbfac9007ec07p-33, 0x1.21dd98bc7de87p-28 },
+ { 0x1.791862715d02fp-33, 0x1.ecc34851c9763p-29 },
+ { 0x1.403f77382e654p-33, 0x1.a2ca34863bfcbp-29 },
+ { 0x1.0feff2a4fc49p-33, 0x1.63e0d12d4d288p-29 },
+ { 0x1.cdc5de1ae8c09p-34, 0x1.2e615f0543e41p-29 },
+ { 0x1.8804761a993c4p-34, 0x1.00e4ae934cb56p-29 },
+ { 0x1.4cc23eb3b5ffap-34, 0x1.b471c42165f4ap-30 },
+ { 0x1.1a6c6c06ea18bp-34, 0x1.72b316e47cc93p-30 },
+ { 0x1.df58ab9ae4fcbp-35, 0x1.3ad1e7143aa75p-30 },
+ { 0x1.96bd0bd6c9a31p-35, 0x1.0b54bd6a9e23fp-30 },
+ { 0x1.59163428fb3a6p-35, 0x1.c5f4a785a88d1p-31 },
+ { 0x1.24be8d0138113p-35, 0x1.8162809b8dff6p-31 },
+ { 0x1.f09f3c1618809p-36, 0x1.4721b76389525p-31 },
+ { 0x1.a53148c3fc482p-36, 0x1.15a6678e0082cp-31 },
+ { 0x1.652d1d62b45e1p-36, 0x1.d73f8da963966p-32 },
+ { 0x1.2eda549c16ee8p-36, 0x1.8fdeb6a9e8ebcp-32 },
+ { 0x1.00c2a84aed164p-36, 0x1.5342fe16e83a5p-32 },
+ { 0x1.b3501c0fdbbcfp-37, 0x1.1fcdfea216d16p-32 },
+ { 0x1.70f8998ccf075p-37, 0x1.e83eb9bce31c4p-33 },
+ { 0x1.38b3a7222dd33p-37, 0x1.9e170e2dbff8cp-33 },
+ { 0x1.08fb437656229p-37, 0x1.5f27a9aa5f66p-33 },
+ { 0x1.c1085f96d9feep-38, 0x1.29bfa42bc7b76p-33 },
+ { 0x1.7c6a3cf1c9dcfp-38, 0x1.f8de2739c95a9p-34 },
+ { 0x1.423e65b2a3a8cp-38, 0x1.abfaa7d4233fap-34 },
+ { 0x1.10ef40de709bcp-38, 0x1.6ac1833360c58p-34 },
+ { 0x1.ce48f9d9e5928p-39, 0x1.336f5ff042b88p-34 },
+ { 0x1.8773adc5703cep-39, 0x1.0484d7ff5f6bdp-34 },
+ { 0x1.4b6e86a5aa9d8p-39, 0x1.b978904649f57p-35 },
+ { 0x1.189488e2e9743p-39, 0x1.760249f31a968p-35 },
+ { 0x1.db0100ef385d3p-40, 0x1.3cd13761f1731p-35 },
+ { 0x1.9206c1ae9fb29p-40, 0x1.0c569a0b1627cp-35 },
+ { 0x1.54382e8081943p-40, 0x1.c67fe1e83e91p-36 },
+ { 0x1.1fe13002859cap-40, 0x1.80dbcff1d72cfp-36 },
+ { 0x1.e71fde0c5e218p-41, 0x1.45d945dc4844dp-36 },
+ { 0x1.9c159bbc9900ap-41, 0x1.13da615eb6c5fp-36 },
+ { 0x1.5c8fc931c6d94p-41, 0x1.d2ffe78d87996p-37 },
+ { 0x1.26cb8c1920344p-41, 0x1.8b4017551e03bp-37 },
+ { 0x1.f295714275bc3p-42, 0x1.4e7bd56b77338p-37 },
+ { 0x1.a592ca70605e5p-42, 0x1.1b06621cfb60ep-37 },
+ { 0x1.646a234bddd88p-42, 0x1.dee83fc205fc8p-38 },
+ { 0x1.2d4a498c21371p-42, 0x1.9521701d324dap-38 },
+ { 0x1.fd5235020e009p-43, 0x1.56ad77d8efe38p-38 },
+ { 0x1.ae71657ff542ep-43, 0x1.21d11201bfbcfp-38 },
+ { 0x1.6bbc82f12468ap-43, 0x1.ea290040397f4p-39 },
+ { 0x1.3354802504d9ep-43, 0x1.9e7295f29cf91p-39 },
+ { 0x1.03a3b07cf84bp-43, 0x1.5e631fb2a96dbp-39 },
+ { 0x1.b6a52af7c7202p-44, 0x1.28313d62cbf4fp-39 },
+ { 0x1.727cc024d462ap-44, 0x1.f4b2d92a8da6ap-40 },
+ { 0x1.38e1c7590edafp-44, 0x1.a726cda9c5fc4p-40 },
+ { 0x1.083385f1e344cp-44, 0x1.6592390114765p-40 },
+ { 0x1.be229b5ed10ebp-45, 0x1.2e1e1bdc1cff3p-40 },
+ { 0x1.78a15c33bf0d1p-45, 0x1.fe77379b5869ap-41 },
+ { 0x1.3dea49bdca04dp-45, 0x1.af3202215009fp-41 },
+ { 0x1.0c5225e967ce3p-45, 0x1.6c30c15ee186bp-41 },
+ { 0x1.c4df14833b32ep-46, 0x1.338f646703f05p-41 },
+ { 0x1.7e2197e99732ep-46, 0x1.03b4338f71d3bp-41 },
+ { 0x1.4266d76b7e9efp-46, 0x1.b688e02001605p-42 },
+ { 0x1.0ff9aa4df55cbp-46, 0x1.72355f261c90fp-42 },
+ { 0x1.cad0ea9847218p-47, 0x1.387d609c076c8p-42 },
+ { 0x1.82f5884a3c4ffp-47, 0x1.07bcd8d61f54dp-42 },
+ { 0x1.4650f71159187p-47, 0x1.bd20f0d88c869p-43 },
+ { 0x1.1324c9f973607p-47, 0x1.77977767b819cp-43 },
+ { 0x1.cfef7f529f1bfp-48, 0x1.3ce0fee10ae91p-43 },
+ { 0x1.8716298a66d68p-48, 0x1.0b4fbeda58aa9p-43 },
+ { 0x1.49a2f582864b8p-48, 0x1.c2f0b2bc85943p-44 },
+ { 0x1.15cee56fb8f8p-48, 0x1.7c4f426570458p-44 },
+ { 0x1.d43356b5d1bc3p-49, 0x1.40b3e347db73ap-44 },
+ { 0x1.8a7d700826ce3p-49, 0x1.0e67b4f33d066p-44 },
+ { 0x1.4c57f38808af9p-49, 0x1.c7efb04c36011p-45 },
+ { 0x1.17f41219f6e6ep-49, 0x1.8055de49eb405p-45 },
+ { 0x1.d796294cc09e7p-50, 0x1.43f076e4dac86p-45 },
+ { 0x1.8d265709c8b81p-50, 0x1.11003322f9f2ap-45 },
+ { 0x1.4e6bf1c869176p-50, 0x1.cc169496c493bp-46 },
+ { 0x1.199123dce7f7cp-50, 0x1.83a55fe01c77fp-46 },
+ { 0x1.da12f38ef6065p-51, 0x1.4691f56a0b9d1p-46 },
+ { 0x1.8f0ced10d0db4p-51, 0x1.131565242338p-46 },
+ { 0x1.4fdbda9c9106cp-51, 0x1.cf5f3d25346p-47 },
+ { 0x1.1aa3b4e8f3caap-51, 0x1.8638e1112031dp-47 },
+ { 0x1.dba6023e1257ap-52, 0x1.489478d82c425p-47 },
+ { 0x1.902e5d96b5dc7p-52, 0x1.14a433d21a4e2p-47 },
+ { 0x1.50a589affacc9p-52, 0x1.d1c4c912f9acbp-48 },
+ { 0x1.1b2a2ba958505p-52, 0x1.880c8cf6ecf16p-48 },
+ { 0x1.dc4cfb90a7ce5p-53, 0x1.49f5031dc194p-48 },
+ { 0x1.9088f811b7254p-53, 0x1.15aa4ccc2f79bp-48 },
+ { 0x1.50c7d151d73dp-53, 0x1.d343a5202c7c4p-49 },
+ { 0x1.1b23bebdcda6dp-53, 0x1.891da95a3a6f5p-49 },
+ { 0x1.dc06e50abd949p-54, 0x1.4ab18582d9df2p-49 },
+ { 0x1.901c34297491p-54, 0x1.1626283914e64p-49 },
+ { 0x1.50427d64b1c7dp-54, 0x1.d3d994938f3adp-50 },
+ { 0x1.1a9076f0d2e24p-54, 0x1.896a9d7ab89b1p-50 },
+ { 0x1.dad425efa38efp-55, 0x1.4ac8e5c7c8723p-50 },
+ { 0x1.8ee8b30ca2586p-55, 0x1.16170c969f828p-50 },
+ { 0x1.4f1653e256f41p-55, 0x1.d385b6cd88b32p-51 },
+ { 0x1.19712f23cae3dp-55, 0x1.88f2f609fe4d3p-51 },
+ { 0x1.d8b686448b5afp-56, 0x1.4a3b00e506616p-51 },
+ { 0x1.8cf03de32b406p-56, 0x1.157d10888e2f3p-51 },
+ { 0x1.4d4512f22a65dp-56, 0x1.d2488978a2f74p-52 },
+ { 0x1.17c7923127a39p-56, 0x1.87b7664b4e00cp-52 },
+ { 0x1.d5b12a674c804p-57, 0x1.4908ab62a09acp-52 },
+ { 0x1.8a35c1621f2ccp-57, 0x1.14591aa0080cap-52 },
+ { 0x1.4ad16c988b007p-57, 0x1.d023e74fea7e1p-53 },
+ { 0x1.159616cbf8a0cp-57, 0x1.85b9c65443c51p-53 },
+ { 0x1.d1c88b489c5c3p-58, 0x1.4733af4601fe1p-53 },
+ { 0x1.86bd4690c0845p-58, 0x1.12acdf1c9738cp-53 },
+ { 0x1.47bf000e37ae9p-58, 0x1.cd1b037f7490bp-54 },
+ { 0x1.12dff96b26d81p-58, 0x1.82fd0e7486194p-54 },
+ { 0x1.cd026b64a0ca8p-59, 0x1.44bec79d5416cp-54 },
+ { 0x1.828be8d7b2e74p-59, 0x1.107adbae7661dp-54 },
+ { 0x1.441250d6b8cc7p-59, 0x1.c93261af2cd0dp-55 },
+ { 0x1.0fa934555eb5ap-59, 0x1.7f854fd47e7d3p-55 },
+ { 0x1.c765c89feb632p-60, 0x1.41ad99b7fc9ebp-55 },
+ { 0x1.7da7c97c8ea4bp-60, 0x1.0dc65148f57fcp-55 },
+ { 0x1.3fd0bbb47d67cp-60, 0x1.c46fcad39a071p-56 },
+ { 0x1.0bf675e9015a3p-60, 0x1.7b57aa64c1e42p-56 },
+ { 0x1.c0facb396944ap-61, 0x1.3e04ac23c3f11p-56 },
+ { 0x1.781800b4c5862p-61, 0x1.0a933c1a65e31p-56 },
+ { 0x1.3b0069a07f02dp-61, 0x1.beda3eeb5f0a2p-57 },
+ { 0x1.07cd15415698ap-61, 0x1.767a404101f5ap-57 },
+ { 0x1.b9cab20b7b4acp-62, 0x1.39c95b8dcd835p-57 },
+ { 0x1.71e48c82b190ap-62, 0x1.06e649c54a11dp-57 },
+ { 0x1.35a840f1bb9bfp-62, 0x1.b879e3daa485dp-58 },
+ { 0x1.0333055f872d1p-62, 0x1.70f426b1f5c67p-58 },
+ { 0x1.b1dfbc5f13465p-63, 0x1.3501cdad9df5bp-58 },
+ { 0x1.6b163d96b3dd9p-63, 0x1.02c4cdfc5722cp-58 },
+ { 0x1.2fcfd4e6913cap-63, 0x1.b157f19f267eap-59 },
+ { 0x1.fc5d8e0519af3p-64, 0x1.6acd55017e4e2p-59 },
+ { 0x1.a945119b38a65p-64, 0x1.2fb4e266d3e9fp-59 },
+ { 0x1.63b6a2745bde1p-64, 0x1.fc696b5025168p-60 },
+ { 0x1.297f53c6e927fp-64, 0x1.a97e9c202c067p-60 },
+ { 0x1.f18eb2ba6357fp-65, 0x1.640e915b3f3eap-60 },
+ { 0x1.a006a7219c6a4p-65, 0x1.29ea2353deb28p-60 },
+ { 0x1.5bcff1208eb99p-65, 0x1.f278f182d5ccep-61 },
+ { 0x1.22bf73da1838dp-65, 0x1.a0f8fae51588p-61 },
+ { 0x1.e60853b8b4b65p-66, 0x1.5cc15bf9dbbbbp-61 },
+ { 0x1.963124add21cp-66, 0x1.23a9b1f0c9515p-61 },
+ { 0x1.536cefa1810b4p-66, 0x1.e7c6162103b4ep-62 },
+ { 0x1.1b995f6e584afp-66, 0x1.97d2ef035140ap-62 },
+ { 0x1.d9da06644bc9dp-67, 0x1.54efd8e5e8a15p-62 },
+ { 0x1.8bd1c79049ec2p-67, 0x1.1cfc34a10ee47p-62 },
+ { 0x1.4a98db9bff0e8p-67, 0x1.dc5f9803d5324p-63 },
+ { 0x1.1416a031bacf2p-67, 0x1.8e1907994f8d3p-63 },
+ { 0x1.cd13f7b7c3414p-68, 0x1.4ca4b88f6234cp-63 },
+ { 0x1.80f645203dff7p-68, 0x1.15eac2ce52257p-63 },
+ { 0x1.415f515af2672p-68, 0x1.d054eb8db2ad5p-64 },
+ { 0x1.0c410a1d6b3cap-68, 0x1.83d8652f7235cp-64 },
+ { 0x1.bfc6c8b2d1c95p-69, 0x1.43eb1f8cfdcf1p-64 },
+ { 0x1.75acacc068ebep-69, 0x1.0e7ed05fb3af3p-64 },
+ { 0x1.37cc328e513e5p-69, 0x1.c3b617ec3cfd6p-65 },
+ { 0x1.0422a6340a512p-69, 0x1.791e9c59e2b42p-65 },
+ { 0x1.b2036a988beadp-70, 0x1.3ace8dce03fbdp-65 },
+ { 0x1.6a0349d192d1ap-70, 0x1.06c218ca5f25ap-65 },
+ { 0x1.2deb8d0dae905p-70, 0x1.b69393c895b87p-66 },
+ { 0x1.f78b3aa5bebbep-71, 0x1.6df997f6bab1bp-66 },
+ { 0x1.a3dafb67a96cfp-71, 0x1.315ac58b7d6b7p-66 },
+ { 0x1.5e0885ebd9cc3p-71, 0x1.fd7d13f78002dp-67 },
+ { 0x1.23c981e88b022p-71, 0x1.a8fe21d205ebp-67 },
+ { 0x1.e66846a73c925p-72, 0x1.62777b62fde0cp-67 },
+ { 0x1.955ea2f392221p-72, 0x1.279bb2446baf4p-67 },
+ { 0x1.51cacbb42476ep-72, 0x1.ecfc5eb955129p-68 },
+ { 0x1.19722d0b598a4p-72, 0x1.9b06ad8cbcafbp-68 },
+ { 0x1.d4f0c5733dbc9p-73, 0x1.56a684fe99fcap-68 },
+ { 0x1.869f70ffc1fcbp-73, 0x1.1d9d500e92622p-68 },
+ { 0x1.45586a9e82938p-73, 0x1.dc163a555fefbp-69 },
+ { 0x1.0ef18dbc017ffp-73, 0x1.8cbe28ca7c426p-69 },
+ { 0x1.c338d2435fb4bp-74, 0x1.4a94f1540c9eap-69 },
+ { 0x1.77ae3cb88b469p-74, 0x1.136b93820fc76p-69 },
+ { 0x1.38bf7be87e681p-74, 0x1.cadeb8c3bba05p-70 },
+ { 0x1.0453702b9a5bbp-74, 0x1.7e356a2db5e15p-70 },
+ { 0x1.b154294e891dap-75, 0x1.3e50df3387f95p-70 },
+ { 0x1.689b85dc875b1p-75, 0x1.09125281c373ap-70 },
+ { 0x1.2c0dc90fab5bap-75, 0x1.b969aedac7779p-71 },
+ { 0x1.f346b0aa94647p-76, 0x1.6f7d0d10edd84p-71 },
+ { 0x1.9f5604d9610bp-76, 0x1.31e8350b95daep-71 },
+ { 0x1.597757e14e4e8p-76, 0x1.fd3a5c3ac18bbp-72 },
+ { 0x1.1f50b401397f7p-76, 0x1.a7ca8fa24018p-72 },
+ { 0x1.ddd8dcb76e388p-77, 0x1.60a5532471804p-72 },
+ { 0x1.8d50fcdd2a012p-77, 0x1.256887c26e498p-72 },
+ { 0x1.4a512f5483d32p-77, 0x1.e82efb884fa7p-73 },
+ { 0x1.129521372a709p-77, 0x1.961449f1f5f93p-73 },
+ { 0x1.c872d91eff745p-78, 0x1.51be080b9d49dp-73 },
+ { 0x1.7b56e9895b756p-78, 0x1.18df034ba2c47p-73 },
+ { 0x1.3b37e1b01d1bdp-78, 0x1.d31877f1753bap-74 },
+ { 0x1.05e763ef1c6e1p-78, 0x1.845928aac023dp-74 },
+ { 0x1.b3291e83a6ddap-79, 0x1.42d6673958cf7p-74 },
+ { 0x1.6978c8d7d61b8p-79, 0x1.0c58552d896bdp-74 },
+ { 0x1.2c3987ce2b431p-79, 0x1.be0be95f0126ep-75 },
+ { 0x1.f2a6593b4ee39p-80, 0x1.72aab5cc51918p-75 },
+ { 0x1.9e0f0cfd57ab4p-80, 0x1.33fd04413c4e8p-75 },
+ { 0x1.57c6a75ebbd36p-80, 0x1.ffc132424c87ap-76 },
+ { 0x1.1d636b1da2b46p-80, 0x1.a91d6af35687bp-76 },
+ { 0x1.d9c6f3705063cp-81, 0x1.6119a09e14fe5p-76 },
+ { 0x1.8936d384f421ap-81, 0x1.253fb5c838ba6p-76 },
+ { 0x1.464f8c7e074fcp-81, 0x1.e7068fdcaeb4ep-77 },
+ { 0x1.0ec1f5aebc21fp-81, 0x1.945fff2eb1b17p-77 },
+ { 0x1.c14515cb6f8fp-82, 0x1.4fb5a7146299ap-77 },
+ { 0x1.74b15b6eeceb1p-82, 0x1.16ab8334ccb0ap-77 },
+ { 0x1.352169fa33216p-82, 0x1.ce965139dad89p-78 },
+ { 0x1.0060a522d6818p-82, 0x1.7fe578074e0c8p-78 },
+ { 0x1.a933ad3e37ea3p-83, 0x1.3e8d828e807b4p-78 },
+ { 0x1.608e37fe916b7p-83, 0x1.084c9533fea9dp-78 },
+ { 0x1.24490f08ca22dp-83, 0x1.b68488148e38cp-79 },
+ { 0x1.e4940102c0a26p-84, 0x1.6bbe630bdc58cp-79 },
+ { 0x1.91a40479b1837p-84, 0x1.2daed7fd23569p-79 },
+ { 0x1.4cdb9a0d20ef7p-84, 0x1.f45c523b5ec4ep-80 },
+ { 0x1.13d21ec7ce7a5p-84, 0x1.9ee3b5d440d2p-80 },
+ { 0x1.c90f21d2d475fp-85, 0x1.57f9f997e1f52p-80 },
+ { 0x1.7aa5b8d4b4359p-85, 0x1.1d262b74c69e4p-80 },
+ { 0x1.39a647b21bed6p-85, 0x1.d8b50e711660ap-81 },
+ { 0x1.03c70a0dadb1dp-85, 0x1.87c4bc616ed3dp-81 },
+ { 0x1.ae43ba1c85bb1p-86, 0x1.44a615135e868p-81 },
+ { 0x1.6446b3db12c58p-86, 0x1.0cfed72363bb7p-81 },
+ { 0x1.26f997cdc041dp-86, 0x1.bdb5f7a82d0f4p-82 },
+ { 0x1.e86218ea3e6acp-87, 0x1.7136d3b897e11p-82 },
+ { 0x1.9440cec9f5e3ap-87, 0x1.31cf2729ac24dp-82 },
+ { 0x1.4e93295651e9bp-87, 0x1.fa860b2bf75f8p-83 },
+ { 0x1.14df714b2cc27p-87, 0x1.a36fa64c5b19fp-83 },
+ { 0x1.ca3058fde005fp-88, 0x1.5b478418ed951p-83 },
+ { 0x1.7b135dc219792p-88, 0x1.1f8035d726d41p-83 },
+ { 0x1.3995999427ba7p-88, 0x1.dbf75e60682c2p-84 },
+ { 0x1.03604de581436p-88, 0x1.89f0afa1deecap-84 },
+ { 0x1.ad067d36fa2c8p-89, 0x1.4602a49df0a52p-84 },
+ { 0x1.62c6642f5d4b9p-89, 0x1.0dc2db21eaf21p-84 },
+ { 0x1.2556d7a42568ap-89, 0x1.be61355e30a98p-85 },
+ { 0x1.e5068065139bep-90, 0x1.7145a7dd1cf8cp-85 },
+ { 0x1.90efd5cd13c3p-90, 0x1.31725e0702649p-85 },
+ { 0x1.4b62e9374c452p-90, 0x1.f93e90900fd6bp-86 },
+ { 0x1.11de133cc6916p-90, 0x1.a1d0c10ff74dfp-86 },
+ { 0x1.c49bf95c5f745p-91, 0x1.597928f3e0c7p-86 },
+ { 0x1.75f56ab48bd89p-91, 0x1.1d9f316556fccp-86 },
+ { 0x1.34f00cbd8ea42p-91, 0x1.d8389849eaf01p-87 },
+ { 0x1.fe61cbe17950dp-92, 0x1.8650e1db268ebp-87 },
+ { 0x1.a589caf82618cp-92, 0x1.4293ddcb013c1p-87 },
+ { 0x1.5c1e107375834p-92, 0x1.0a90025fd130cp-87 },
+ { 0x1.1f7319c565581p-92, 0x1.b87eb911fc5efp-88 },
+ { 0x1.daa6c6af5c17fp-93, 0x1.6bea387f6b0ap-88 },
+ { 0x1.87d63120a742cp-93, 0x1.2c9c915a28ddap-88 },
+ { 0x1.436e80df031fp-93, 0x1.f094496a5e827p-89 },
+ { 0x1.0aef9bffa708dp-93, 0x1.9a19446f657ccp-89 },
+ { 0x1.b890579385cdcp-94, 0x1.52a33b4b8094cp-89 },
+ { 0x1.6b84ffdb5d885p-94, 0x1.179841589cdp-89 },
+ { 0x1.2be9773700384p-94, 0x1.cda2d93f291abp-90 },
+ { 0x1.eecef0206652cp-95, 0x1.7d0e0e7cac5bp-90 },
+ { 0x1.9821029662ccfp-95, 0x1.3a804f20fd2f4p-90 },
+ { 0x1.5097c74b3d08ep-95, 0x1.038a34010e13fp-90 },
+ { 0x1.158fcf12f6c8ep-95, 0x1.ac508371be502p-91 },
+ { 0x1.c9b60c296975dp-96, 0x1.61608ea10db83p-91 },
+ { 0x1.7958bc88e6006p-96, 0x1.2383e3bce375p-91 },
+ { 0x1.370dfa8e149d1p-96, 0x1.e0e820ef7463p-92 },
+ { 0x1.0060a594f59c7p-96, 0x1.8c9f67fa9c048p-92 },
+ { 0x1.a6925bee98d74p-97, 0x1.471203b047e85p-92 },
+ { 0x1.5c351b499632p-97, 0x1.0dae92b93887p-92 },
+ { 0x1.1ee518d278c58p-97, 0x1.bcabf2ba981bfp-93 },
+ { 0x1.d8b2f8b0b2924p-98, 0x1.6e8f25135d13fp-93 },
+ { 0x1.855f0a34582a6p-98, 0x1.2e219acb023aep-93 },
+ { 0x1.40b1881e58e3p-98, 0x1.f1fe817902cebp-94 },
+ { 0x1.0818d80634105p-98, 0x1.9a5d5233d8e13p-94 },
+ { 0x1.b2ecbb2e8d76cp-99, 0x1.521d0766f8b85p-94 },
+ { 0x1.6614d9da549fbp-99, 0x1.168c985c93c95p-94 },
+ { 0x1.26c7736a63e7fp-99, 0x1.cae6809d7d445p-95 },
+ { 0x1.e546a107b57d5p-100, 0x1.79f71edd3cb51p-95 },
+ { 0x1.8f64020effd9cp-100, 0x1.37443c37e4835p-95 },
+ { 0x1.48aa64075b15p-100, 0x1.004e8297ce819p-95 },
+ { 0x1.0e6e891142764p-100, 0x1.a60ceba01346ap-96 },
+ { 0x1.bcfa525d16889p-101, 0x1.5b71dfbe662f9p-96 },
+ { 0x1.6e0be1ed4e4ccp-101, 0x1.1dfe04c5b884ap-96 },
+ { 0x1.2d14568fa3103p-101, 0x1.d6c299b6b03dep-97 },
+ { 0x1.ef39c9c67da7p-102, 0x1.8366f8264d161p-97 },
+ { 0x1.973b86e9a718fp-102, 0x1.3ec401194be5fp-97 },
+ { 0x1.4ed55e6d4d5dfp-102, 0x1.0641ea45be131p-97 },
+ { 0x1.1345b1de4a541p-102, 0x1.af7b06dd7c2fap-98 },
+ { 0x1.c48e8cf8e20edp-103, 0x1.62e7924beab28p-98 },
+ { 0x1.73f6cd7db5a56p-103, 0x1.23e2123cac1dcp-98 },
+ { 0x1.31afb2e91937bp-103, 0x1.e00be39adba8fp-99 },
+ { 0x1.f6600b76754fcp-104, 0x1.8ab4ee2717624p-99 },
+ { 0x1.9cc2881babafp-104, 0x1.447fa5b4e25fep-99 },
+ { 0x1.5316d5b010b17p-104, 0x1.0abf02c055867p-99 },
+ { 0x1.1688993cfebe3p-104, 0x1.b67d9f35f4de8p-100 },
+ { 0x1.c98758b0a4ebap-105, 0x1.685ccfe1e2ab5p-100 },
+ { 0x1.77baf72da4868p-105, 0x1.281e65593d67p-100 },
+ { 0x1.3484c1e2418cbp-105, 0x1.e698bd1000fd2p-101 },
+ { 0x1.fa991c211034p-106, 0x1.8fc0326c87b11p-101 },
+ { 0x1.9fe006460b912p-106, 0x1.485d5ed97243ep-101 },
+ { 0x1.555b844a27ecdp-106, 0x1.0db191585c5a2p-101 },
+ { 0x1.182875c9f3984p-106, 0x1.baf50ff65044dp-102 },
+ { 0x1.cbce2423a80acp-107, 0x1.6bb8ebe73c54ap-102 },
+ { 0x1.794741d4d28c6p-107, 0x1.2a9fd1221e357p-102 },
+ { 0x1.3586a18110b0ep-107, 0x1.ea4b746dbeae3p-103 },
+ { 0x1.fbd1c1dcb3991p-108, 0x1.9271dfe5687e7p-103 },
+ { 0x1.a085cf5d6c87ep-108, 0x1.4a4b9ae2c857dp-103 },
+ { 0x1.559911f8b7812p-108, 0x1.0f0c2d578f06ap-103 },
+ { 0x1.181ddd71c27fbp-108, 0x1.bccd0201398bap-104 },
+ { 0x1.cb5889458c00ep-109, 0x1.6cec95dfef21ap-104 },
+ { 0x1.789499da6bff1p-109, 0x1.2b5ae7721763fp-104 },
+ { 0x1.34b0b5ddf82c6p-109, 0x1.eb1327842cc63p-105 },
+ { 0x1.fa04646636ebep-110, 0x1.92bda7bca05b7p-105 },
+ { 0x1.9eb0ea42d451ep-110, 0x1.4a4186866270ap-105 },
+ { 0x1.53ce6234f7db7p-110, 0x1.0ec8a57831ec5p-105 },
+ { 0x1.1668fdbb007d5p-110, 0x1.bbfd05e1b64f3p-106 },
+ { 0x1.c8289c5fd0187p-111, 0x1.6bf24d893426cp-106 },
+ { 0x1.75a62b0407aefp-111, 0x1.2a4c4fb42b862p-106 },
+ { 0x1.3206cc37b0e4ap-111, 0x1.e8ec43d273fbap-107 },
+ { 0x1.f53937c26236ep-112, 0x1.90a22ee0d506ep-107 },
+ { 0x1.9a69ad7793258p-112, 0x1.483f4fee6553cp-107 },
+ { 0x1.50039cbf56e41p-112, 0x1.0ce82f0139653p-107 },
+ { 0x1.13119a81ee824p-112, 0x1.b888d3fea2a71p-108 },
+ { 0x1.c24cdc6a6909bp-113, 0x1.68ce8cbb7eaebp-108 },
+ { 0x1.7089487e1182ep-113, 0x1.2778e05f0f826p-108 },
+ { 0x1.2d94fe2dcd5a4p-113, 0x1.e3e0a1bcb7b9p-109 },
+ { 0x1.ed85fe218f015p-114, 0x1.8c29185861611p-109 },
+ { 0x1.93c37ffa2be3p-114, 0x1.444e2559eb861p-109 },
+ { 0x1.4a49efe08b764p-114, 0x1.09735c9244f77p-109 },
+ { 0x1.0e26d33274acdp-114, 0x1.b28030446d467p-110 },
+ { 0x1.b9dfc560135fp-115, 0x1.638fa554a9791p-110 },
+ { 0x1.6955081ac80b2p-115, 0x1.22ed7a20d2031p-110 },
+ { 0x1.276f565251c73p-115, 0x1.dc07399fb9ebdp-111 },
+ { 0x1.e30d639687648p-116, 0x1.8566bbf3afdccp-111 },
+ { 0x1.8adc46e842374p-116, 0x1.3e7fef514c8f7p-111 },
+ { 0x1.42bb0eedd3fb2p-116, 0x1.0479dd0162987p-111 },
+ { 0x1.07beb0edff1b8p-116, 0x1.a9fe7272a642bp-112 },
+ { 0x1.af070915be74ep-117, 0x1.5c4d5495043b3p-112 },
+ { 0x1.602994f04daa5p-117, 0x1.1cbea64272b5fp-112 },
+ { 0x1.1fb139d7ad13p-117, 0x1.d18375dee0b86p-113 },
+ { 0x1.d5fdfa65dd70dp-118, 0x1.7c798c690caf6p-113 },
+ { 0x1.7fdb85ec65bd4p-118, 0x1.36eec953c25e3p-113 },
+ { 0x1.39787263ebbcap-118, 0x1.fc2409fc1812ep-114 },
+ { 0x1.ffeb0495cc103p-119, 0x1.9f29b80329143p-114 },
+ { 0x1.a1f276c1aeb71p-119, 0x1.5328106ecc8f8p-114 },
+ { 0x1.552f40714fe54p-119, 0x1.1507fc4d2f4bap-114 },
+ { 0x1.167c9d827337cp-119, 0x1.c484291d11ffp-115 },
+ { 0x1.c690e28b6a9bfp-120, 0x1.7189333483e3bp-115 },
+ { 0x1.72f13b97db104p-120, 0x1.2dbc3e931f24dp-115 },
+ { 0x1.2eaa616a9b21cp-120, 0x1.ecb050b3055ap-116 },
+ { 0x1.edda16b7edc87p-121, 0x1.9231c8255bcdbp-116 },
+ { 0x1.92da9c960076ap-121, 0x1.4848161f4e509p-116 },
+ { 0x1.48955baf138afp-121, 0x1.0beb55467080ap-116 },
+ { 0x1.0bf90e157d9dap-121, 0x1.b542338309321p-117 },
+ { 0x1.b5082a5d8de09p-122, 0x1.64c56b8fb3cecp-117 },
+ { 0x1.6454856772fedp-122, 0x1.231052b5f7dd6p-117 },
+ { 0x1.227ecea87251dp-122, 0x1.dadb937ed07ebp-118 },
+ { 0x1.d99724acabf71p-123, 0x1.834eb55a1d18ep-118 },
+ { 0x1.81ff31715569ap-123, 0x1.3bdc43dd8955fp-118 },
+ { 0x1.3a90e48619574p-123, 0x1.018fd4cd15479p-118 },
+ { 0x1.005296113b586p-123, 0x1.a3fee5158c03fp-119 },
+ { 0x1.a1acf8c750894p-124, 0x1.5664a8518a142p-119 },
+ { 0x1.54421936100c1p-124, 0x1.171860917e7c8p-119 },
+ { 0x1.152813e135602p-124, 0x1.c6f152728fb8fp-120 },
+ { 0x1.c375a4cba7b23p-125, 0x1.72bf4ab4db677p-120 },
+ { 0x1.6fa5568fa20f3p-125, 0x1.2e18c95c4bfb1p-120 },
+ { 0x1.2b5b13ef0805cp-125, 0x1.ec41a3d4cf576p-121 },
+ { 0x1.e77117811a7d2p-126, 0x1.91022d83bf8f5p-121 },
+ { 0x1.8ccd934db2cbp-126, 0x1.46a292659269ep-121 },
+ { 0x1.42faa33070d2ap-126, 0x1.0a05da41d6048p-121 },
+ { 0x1.06db98d7f6125p-126, 0x1.b14375f322de2p-122 },
+ { 0x1.abcdbdfcc9f7cp-127, 0x1.60c75486158bp-122 },
+ { 0x1.5c15c23fbb403p-127, 0x1.1f35bc35fb59fp-122 },
+ { 0x1.1b2fdb7cab6dfp-127, 0x1.d39954e0a9d3dp-123 },
+ { 0x1.ccb8a64624f6cp-128, 0x1.7c98ab66270f5p-123 },
+ { 0x1.76bb52e82b59ap-128, 0x1.35be6eb898758p-123 },
+ { 0x1.30c117f001ac3p-128, 0x1.f819edd38db9cp-124 },
+ { 0x1.efa0e49e3feccp-129, 0x1.9a2821242ebdp-124 },
+ { 0x1.92fa046d58d4ep-129, 0x1.4dadd528d6ea9p-124 },
+ { 0x1.479ae4e865feep-129, 0x1.0f6d9e092345cp-124 },
+ { 0x1.0a4c603089f16p-129, 0x1.b987187720ae4p-125 },
+ { 0x1.b0e03e96a5485p-130, 0x1.6711ad9310ce1p-125 },
+ { 0x1.5fc89a9e03199p-130, 0x1.23f97aea9f29fp-125 },
+ { 0x1.1dd90a3522c75p-130, 0x1.dac6b554960ffp-126 },
+ { 0x1.d07c0b8b30398p-131, 0x1.81f77dc55f2bdp-126 },
+ { 0x1.795540ea5dda7p-131, 0x1.39bb36d1a51dap-126 },
+ { 0x1.327f191dd6247p-131, 0x1.fdf7c425dfb89p-127 },
+ { 0x1.f1db008e061d6p-132, 0x1.9e6c7f42ee3ap-127 },
+ { 0x1.944b7c8850269p-132, 0x1.50bd38f4b0e14p-127 },
+ { 0x1.4846e1e475567p-132, 0x1.11954fcd9d596p-127 },
+ { 0x1.0a8512d6deebp-132, 0x1.bc7d8a23288e1p-128 },
+ { 0x1.b0b57b848dfd5p-133, 0x1.69099571fea27p-128 },
+ { 0x1.5f385601a1095p-133, 0x1.25378a982372p-128 },
+ { 0x1.1d0aee3f21eaep-133, 0x1.dc36feecfa2bap-129 },
+ { 0x1.ce9ce0f1b56b8p-134, 0x1.82a9fb7ad076bp-129 },
+ { 0x1.775af322a6fb6p-134, 0x1.39ea243c7bf71p-129 },
+ { 0x1.3084e2fb958e5p-134, 0x1.fda4af81b306ap-130 },
+ { 0x1.ee0aaff5c7275p-135, 0x1.9da7a2c5ab52cp-130 },
+ { 0x1.90b5b261712acp-135, 0x1.4fb44aa933f5cp-130 },
+ { 0x1.44f853ca3d2a1p-135, 0x1.1068e39733d5fp-130 },
+ { 0x1.07839b24e2329p-135, 0x1.ba0b385a9673fp-131 },
+ { 0x1.ab4ef712ea53cp-136, 0x1.669cb88b98bb4p-131 },
+ { 0x1.5a6a27edc2aafp-136, 0x1.22e458ff074e2p-131 },
+ { 0x1.18ccfb2383c0dp-136, 0x1.d7dccacf16bdfp-132 },
+ { 0x1.c72c7d427b5c7p-137, 0x1.7ea9a57d9c3fdp-132 },
+ { 0x1.70debd3477d7cp-137, 0x1.364981b4fcaccp-132 },
+ { 0x1.2ae4c8505c4dcp-137, 0x1.f723b60a4c45ap-133 },
+ { 0x1.e45347f37826dp-138, 0x1.97e0b5db827a8p-133 },
+ { 0x1.8859d9d834871p-138, 0x1.4a9cae44d02aap-133 },
+ { 0x1.3dcdd6f53a761p-138, 0x1.0bf347561e06fp-133 },
+ { 0x1.0163c7a1b8ce3p-138, 0x1.b246ea577dcd5p-134 },
+ { 0x1.a0de9e4d0326ap-139, 0x1.5fe1a8f2ffd47p-134 },
+ { 0x1.518a7407eb90ep-139, 0x1.1d15869af1a46p-134 },
+ { 0x1.1146574533e59p-139, 0x1.cde08f63664fdp-135 },
+ { 0x1.ba6f77161f191p-140, 0x1.761ba88bf6eedp-135 },
+ { 0x1.661c59f17faep-140, 0x1.2efafc89163c3p-135 },
+ { 0x1.21d2894bdd4c7p-140, 0x1.eab12c8aa7e5p-136 },
+ { 0x1.d50e0eba3e44dp-141, 0x1.8d4d432dee077p-136 },
+ { 0x1.7b84a5753cf1fp-141, 0x1.41a589d11cb19p-136 },
+ { 0x1.33091416396dbp-141, 0x1.045db9ec2ba81p-136 },
+ { 0x1.f0bb3ff173143p-142, 0x1.a57861242277fp-137 },
+ { 0x1.91c3cacc75aaap-142, 0x1.551681b8d361p-137 },
+ { 0x1.44ea256a84bbp-142, 0x1.140098b38820cp-137 },
+ { 0x1.06bb841410434p-142, 0x1.be9e2feb561ep-138 },
+ { 0x1.a8d98b0d5771p-143, 0x1.694e9fdcb7be5p-138 },
+ { 0x1.57755a2313bdfp-143, 0x1.24419d9ce37ffp-138 },
+ { 0x1.15a03d39bca43p-143, 0x1.d8bf1578b3aacp-139 },
+ { 0x1.c0c4e9f387792p-144, 0x1.7e4dfe2cee6a2p-139 },
+ { 0x1.6aa9b63079411p-144, 0x1.3520b0bf08a51p-139 },
+ { 0x1.250ad98a67e4fp-144, 0x1.f3daa3dd37f3ap-140 },
+ { 0x1.d9842421f4af1p-145, 0x1.94140b3abb78ep-140 },
+ { 0x1.7e859d0226582p-145, 0x1.469d2facc66f7p-140 },
+ { 0x1.34f9e5d4c96d3p-145, 0x1.07f7c6b04c092p-140 },
+ { 0x1.f314a5f5af6d7p-146, 0x1.aa9f80ec12e52p-141 },
+ { 0x1.9306ca687d568p-146, 0x1.58b5e63278412p-141 },
+ { 0x1.456b681315dafp-146, 0x1.167dcc97a0fd3p-141 },
+ { 0x1.06b98180e66fp-146, 0x1.c1ee5bab4ede7p-142 },
+ { 0x1.a82a4c036e3f3p-147, 0x1.6b69077bfc3c7p-142 },
+ { 0x1.565cda5d05a6ap-147, 0x1.257dcc5bc2717p-142 },
+ { 0x1.144d77262f022p-147, 0x1.d9fdd2296338fp-143 },
+ { 0x1.bdec7b50a66cp-148, 0x1.7eb427b4ddd71p-143 },
+ { 0x1.67cb265d8483ap-148, 0x1.34f5aee91217p-143 },
+ { 0x1.224399b226996p-148, 0x1.f2ca4dc8ff69fp-144 },
+ { 0x1.d448f86c23d12p-149, 0x1.92943634830d2p-144 },
+ { 0x1.79b2a15ae0faap-149, 0x1.44e2d8e947442p-144 },
+ { 0x1.3098d833c2dap-149, 0x1.0627b1e47c261p-144 },
+ { 0x1.eb3aa595948f3p-150, 0x1.a705784809825p-145 },
+ { 0x1.8c0f08dff4e68p-150, 0x1.554226cd542efp-145 },
+ { 0x1.3f49a8880f6adp-150, 0x1.1343e7a202e9p-145 },
+ { 0x1.015dd1c62a082p-150, 0x1.bc0384ab3550dp-146 },
+ { 0x1.9edb80143a705p-151, 0x1.660fe966c4e28p-146 },
+ { 0x1.4e52056f2dec4p-151, 0x1.20b6b60dae611p-146 },
+ { 0x1.0d62a769875ep-151, 0x1.d1893fc15ba16p-147 },
+ { 0x1.b2128dd015485p-152, 0x1.7747e31ddd25cp-147 },
+ { 0x1.5dad6d3a16694p-152, 0x1.2e7c997078049p-147 },
+ { 0x1.19a81ef58dfc6p-152, 0x1.e790d89e8e564p-148 },
+ { 0x1.c5ae1b79c4ee8p-153, 0x1.88e545d12ba57p-148 },
+ { 0x1.6d56e11abc8a7p-153, 0x1.3c919aea9787p-148 },
+ { 0x1.262a204b39df1p-153, 0x1.fe13c6f07b6aep-149 },
+ { 0x1.d9a774b67b183p-154, 0x1.9ae2b16a9550ap-149 },
+ { 0x1.7d48e51f6d6edp-154, 0x1.4af14f857334ep-149 },
+ { 0x1.32e43016e50e4p-154, 0x1.0a8564eab8ff5p-149 },
+ { 0x1.edf747f9f14f1p-155, 0x1.ad3a33350402p-150 },
+ { 0x1.8d7d80e14b91p-155, 0x1.5996d7e13f467p-150 },
+ { 0x1.3fd1708b687cbp-155, 0x1.1636f3d76858ap-150 },
+ { 0x1.014ad3fec9ec4p-155, 0x1.bfe545fce7a55p-151 },
+ { 0x1.9dee40ecc2982p-156, 0x1.687ce08618977p-151 },
+ { 0x1.4ceca2b27454p-156, 0x1.221a377d62eb4p-151 },
+ { 0x1.0bbd071377b87p-156, 0x1.d2dcd30499eb7p-152 },
+ { 0x1.ae9438e9a5c0bp-157, 0x1.779da2df7a30cp-152 },
+ { 0x1.5a30285652adp-157, 0x1.2e2a7c1fe1c5fp-152 },
+ { 0x1.164daef1c2b15p-157, 0x1.e61933d473856p-153 },
+ { 0x1.bf6806876a635p-158, 0x1.86f2e6e7e582ap-153 },
+ { 0x1.67960688424efp-158, 0x1.3a62b4892ce6ep-153 },
+ { 0x1.20f7f47f404a7p-158, 0x1.f99234ed0089ep-154 },
+ { 0x1.d061d530972c5p-159, 0x1.9676058974913p-154 },
+ { 0x1.7517e8c57f622p-159, 0x1.46bd7c1e28efp-154 },
+ { 0x1.2bb6ba79809edp-159, 0x1.069f8cb02119fp-154 },
+ { 0x1.e17962871247p-160, 0x1.a61febb6d574dp-155 },
+ { 0x1.82af24bbe81ddp-160, 0x1.53351984f5d61p-155 },
+ { 0x1.3684a09debb18p-160, 0x1.108b4faaa8971p-155 },
+ { 0x1.f2a603a977e7cp-161, 0x1.b5e91e3ee196dp-156 },
+ { 0x1.9054beadf5a51p-161, 0x1.5fc381e001854p-156 },
+ { 0x1.415c074fc9065p-161, 0x1.1a8782bc000bep-156 },
+ { 0x1.01ef55a0092e3p-161, 0x1.c5c9be5ba37d4p-157 },
+ { 0x1.9e016e74801cbp-162, 0x1.6c625c9dd5c05p-157 },
+ { 0x1.4c3713bae315dp-162, 0x1.248f08aa2a9f5p-157 },
+ { 0x1.0a8cf82738469p-162, 0x1.d5b98efc2e8d5p-158 },
+ { 0x1.abada51b7b47ep-163, 0x1.790b07dcc17ddp-158 },
+ { 0x1.570fb47030aa8p-163, 0x1.2e9c8b4dec3dep-158 },
+ { 0x1.13270ae279a57p-163, 0x1.e5affac730013p-159 },
+ { 0x1.b951931589ad6p-164, 0x1.85b69d604d483p-159 },
+ { 0x1.61dfa678e3296p-164, 0x1.38aa7fa8655e3p-159 },
+ { 0x1.1bb88966006c4p-164, 0x1.f5a41ad29abd6p-160 },
+ { 0x1.c6e52f00f28e6p-165, 0x1.925df815332e1p-160 },
+ { 0x1.6ca07adb2cabep-165, 0x1.42b32a68b6433p-160 },
+ { 0x1.243c4de072741p-165, 0x1.02c65f05a223cp-160 },
+ { 0x1.d4603cf73627ep-166, 0x1.9ef9ba1f58105p-161 },
+ { 0x1.774b9c8b0652p-166, 0x1.4cb0a4ddc2264p-161 },
+ { 0x1.2cad15ed5f00dp-166, 0x1.0ab038a2ddd17p-161 },
+ { 0x1.e1ba565f2f2dap-167, 0x1.ab82536c08c11p-162 },
+ { 0x1.81da56c03901cp-167, 0x1.569ce24f30cadp-162 },
+ { 0x1.350587b61e2e7p-167, 0x1.128ac3f80b9acp-162 },
+ { 0x1.eeeaf2386ba73p-168, 0x1.b7f008c184953p-163 },
+ { 0x1.8c45dba9ebaffp-168, 0x1.6071b5b7d5f0bp-163 },
+ { 0x1.3d40375ab2fc9p-168, 0x1.1a5112ad78884p-163 },
+ { 0x1.fbe96dd52dd2ap-169, 0x1.c43afb43abf3ap-164 },
+ { 0x1.96874b77050b3p-169, 0x1.6a28d7dab475p-164 },
+ { 0x1.4557ac9b8a4ffp-169, 0x1.21fe234726979p-164 },
+ { 0x1.04568afbad70bp-169, 0x1.d05b30647f5b6p-165 },
+ { 0x1.a097bba9c5bbap-170, 0x1.73bbedaae952fp-165 },
+ { 0x1.4d4668bc3c638p-170, 0x1.298ce64edbc52p-165 },
+ { 0x1.0a969821c25d4p-170, 0x1.dc489a35fd89p-166 },
+ { 0x1.aa703eac27071p-171, 0x1.7d248efdebaf1p-166 },
+ { 0x1.5506ec96ce1d8p-171, 0x1.30f843b6c62b7p-166 },
+ { 0x1.10b0827e1c59fp-171, 0x1.e7fb2011e1175p-167 },
+ { 0x1.b409eb99c2287p-172, 0x1.865c4d7ebd336p-167 },
+ { 0x1.5c93bed6568e9p-172, 0x1.383b206d0bb99p-167 },
+ { 0x1.169ff47b694c6p-172, 0x1.f36aa78ac249dp-168 },
+ { 0x1.bd5de633517f7p-173, 0x1.8f5cbbd7e3bd9p-168 },
+ { 0x1.63e7724f64774p-173, 0x1.3f5064180659dp-168 },
+ { 0x1.1c60a3dd2224ep-173, 0x1.fe8f1d993bb19p-169 },
+ { 0x1.c66566ef40333p-174, 0x1.981f750955121p-169 },
+ { 0x1.6afcac6c09d1ap-174, 0x1.4632fef2669ecp-169 },
+ { 0x1.21ee56dbc8c6ap-174, 0x1.04b03ffb7174ap-169 },
+ { 0x1.cf19c31a391acp-175, 0x1.a09e23dee12dbp-170 },
+ { 0x1.71ce2ba111a68p-175, 0x1.4cddefbe00daep-170 },
+ { 0x1.2744e94597dfp-175, 0x1.09eb734c1a314p-170 },
+ { 0x1.d77474fa3c96fp-176, 0x1.a8d28a7b21f9ep-171 },
+ { 0x1.7856cde19858bp-176, 0x1.534c49c3a48ap-171 },
+ { 0x1.2c60519b06073p-176, 0x1.0ef5469afe541p-171 },
+ { 0x1.df6f23e67822ep-177, 0x1.b0b689ea896fp-172 },
+ { 0x1.7e9197060941ap-177, 0x1.59793ad60d8abp-172 },
+ { 0x1.313ca61e59763p-177, 0x1.13c9ee6b2a529p-172 },
+ { 0x1.e703ac45eb1a5p-178, 0x1.b84429b1d33d8p-173 },
+ { 0x1.8479b71b66ff2p-178, 0x1.5f60114dc317ap-173 },
+ { 0x1.35d621cd7892fp-178, 0x1.1865baa279b03p-173 },
+ { 0x1.ee2c2766d39aep-179, 0x1.bf759f4ae6481p-174 },
+ { 0x1.8a0a908fbee34p-179, 0x1.64fc41f392bcdp-174 },
+ { 0x1.3a29293d26666p-179, 0x1.1cc51b3533d1bp-174 },
+ { 0x1.f4e2f320ed2f5p-180, 0x1.c645558315ad7p-175 },
+ { 0x1.8f3fbe30bc1d8p-180, 0x1.6a496dcf4682p-175 },
+ { 0x1.3e324f4cf0981p-180, 0x1.20e4a4b8e031ep-175 },
+ { 0x1.fb22b934b993p-181, 0x1.ccadf3adb1afp-176 },
+ { 0x1.941518f17ca26p-181, 0x1.6f4367d03dbd8p-176 },
+ { 0x1.41ee59ab3f625p-181, 0x1.24c114d62226p-176 },
+ { 0x1.00733b2d2d2a7p-181, 0x1.d2aa649df6e65p-177 },
+ { 0x1.9886bd6d1085bp-182, 0x1.73e63a45afd4dp-177 },
+ { 0x1.455a452136a6p-182, 0x1.285756918be22p-177 },
+ { 0x1.0314c07978175p-182, 0x1.d835dd5ba6335p-178 },
+ { 0x1.9c91111b6c15fp-183, 0x1.782e2c1c97a81p-178 },
+ { 0x1.4873499e69a71p-183, 0x1.2ba486638ab1ep-178 },
+ { 0x1.0573c7a800f18p-183, 0x1.dd4be385e972p-179 },
+ { 0x1.a030c72f0cf33p-184, 0x1.7c17c5d99552cp-179 },
+ { 0x1.4b36ddfcc8743p-184, 0x1.2ea5f617d321fp-179 },
+ { 0x1.078e5ec28bafdp-184, 0x1.e1e853589fe15p-180 },
+ { 0x1.a362e51221b9fp-185, 0x1.7f9fd64579e1ap-180 },
+ { 0x1.4da2bb75a5c65p-185, 0x1.3159306d0abdp-180 },
+ { 0x1.0962c95c3eb5p-185, 0x1.e6076548c0765p-181 },
+ { 0x1.a624c67aa97dfp-186, 0x1.82c376c3acddfp-181 },
+ { 0x1.4fb4e0c13d49p-186, 0x1.33bbfc6dd55a6p-181 },
+ { 0x1.0aef82f484486p-186, 0x1.e9a5b32d2ef52p-182 },
+ { 0x1.a874210dbadcfp-187, 0x1.85800f4a2d262p-182 },
+ { 0x1.516b94dabb86dp-187, 0x1.35cc607ce4fd8p-182 },
+ { 0x1.0c33410fd4c56p-187, 0x1.ecc03cea2935dp-183 },
+ { 0x1.aa4f078af0321p-188, 0x1.87d359f39448ep-183 },
+ { 0x1.52c5696370c9dp-188, 0x1.3788a50e33e44p-183 },
+ { 0x1.0d2cf5025ba2dp-188, 0x1.ef546c9652b0ap-184 },
+ { 0x1.abb3ec79d594dp-189, 0x1.89bb66243bfd5p-184 },
+ { 0x1.53c13ca08d951p-189, 0x1.38ef570827673p-184 },
+ { 0x1.0ddbcd68fc943p-189, 0x1.f1601a115b514p-185 },
+ { 0x1.aca1a45423b35p-190, 0x1.8b369b3c6ec4fp-185 },
+ { 0x1.545e3b0f8838ap-190, 0x1.39ff49c7fe5e8p-185 },
+ { 0x1.0e3f374dd9d68p-190, 0x1.f2e18e05495b4p-186 },
+ { 0x1.ad1767288e013p-191, 0x1.8c43bad265564p-186 },
+ { 0x1.549be08e15927p-191, 0x1.3ab798c59d4c2p-186 },
+ { 0x1.0e56def61fbc4p-191, 0x1.f3d7844c8a592p-187 },
+ { 0x1.ad14d1b2f0b5fp-192, 0x1.8ce1e26fb8214p-187 },
+ { 0x1.5479f9137160bp-192, 0x1.3b17a8d383f04p-187 },
+ { 0x1.0e22b05782284p-192, 0x1.f4412db819edfp-188 },
+ { 0x1.ac99e5e7b9269p-193, 0x1.8d108ccedcd75p-188 },
+ { 0x1.53f8a0f98a8b8p-193, 0x1.3b1f28f8795cap-188 },
+ { 0x1.0da2d734853ffp-193, 0x1.f41e3132440dap-189 },
+ { 0x1.aba70af1767bp-194, 0x1.8ccf9296410aep-189 },
+ { 0x1.531844d58365ep-194, 0x1.3ace12e143377p-189 },
+ { 0x1.0cd7bedf59779p-194, 0x1.f36eac3bc78c2p-190 },
+ { 0x1.aa3d0ca096eedp-195, 0x1.8c1f2a8f92477p-190 },
+ { 0x1.51d9a0dfd2e93p-195, 0x1.3a24aae988ae7p-190 },
+ { 0x1.0bc211a3c2859p-195, 0x1.f23332c263066p-191 },
+ { 0x1.a85d1a4e6bedcp-196, 0x1.8affe95ac6f2ap-191 },
+ { 0x1.503dbfed30324p-196, 0x1.39237fbbcfa18p-191 },
+ { 0x1.0a62b7d92f095p-196, 0x1.f06cce511da3ep-192 },
+ { 0x1.a608c535a2ba1p-197, 0x1.8972c09d7f45cp-192 },
+ { 0x1.4e45f9fa4adffp-197, 0x1.37cb698950bdap-192 },
+ { 0x1.08bad69ed20a4p-197, 0x1.ee1cfc9be3df9p-193 },
+ { 0x1.a341fe436d2d7p-198, 0x1.8778fdb058321p-193 },
+ { 0x1.4bf3f24d273a5p-198, 0x1.361d88db2b95bp-193 },
+ { 0x1.06cbce44363ecp-198, 0x1.eb45ad695330ap-194 },
+ { 0x1.a00b13659be7cp-199, 0x1.851447ccc879bp-194 },
+ { 0x1.4949952fc2371p-199, 0x1.341b44ff4c3c6p-194 },
+ { 0x1.0497386163a39p-199, 0x1.e7e93fdecaep-195 },
+ { 0x1.9c66ac5ae65b3p-200, 0x1.82469dbf1833ep-195 },
+ { 0x1.464915486577bp-200, 0x1.31c64a141680ep-195 },
+ { 0x1.021ee5a248c7fp-200, 0x1.e40a7f340982ap-196 },
+ { 0x1.9857c70b8b2bcp-201, 0x1.7f125320f1e94p-196 },
+ { 0x1.42f4e894cc71ap-201, 0x1.2f2086b6a5cf4p-196 },
+ { 0x1.fec9b69351b7p-202, 0x1.dfac9ed4c27cep-197 },
+ { 0x1.93e1b371520a1p-202, 0x1.7b7a0d21f0262p-197 },
+ { 0x1.3f4fc50de840ap-202, 0x1.2c2c295822108p-197 },
+ { 0x1.f8d6a0e0a9508p-203, 0x1.dad335f7aacdbp-198 },
+ { 0x1.8f080f16c57cp-203, 0x1.7780bee4609a1p-198 },
+ { 0x1.3b5c9cfaada16p-203, 0x1.28eb9d3f5000ap-198 },
+ { 0x1.f269560bdbf92p-204, 0x1.d5823ab37d92ep-199 },
+ { 0x1.89cec0363502dp-204, 0x1.7329a5753ca24p-199 },
+ { 0x1.371e9af8e6ccfp-204, 0x1.2561873c1cc7ap-199 },
+ { 0x1.eb86f931c309dp-205, 0x1.cfbdfc9b64d6ep-200 },
+ { 0x1.8439f081b525ap-205, 0x1.6e7843670c8d2p-200 },
+ { 0x1.32991dc38028ep-205, 0x1.2190c2136fc76p-200 },
+ { 0x1.e434fdd743954p-206, 0x1.c98b1eed08258p-201 },
+ { 0x1.7e4e079de1a2ep-206, 0x1.69705c180d6c1p-201 },
+ { 0x1.2dcfb3be31ebdp-206, 0x1.1d7c5aaa0949p-201 },
+ { 0x1.dc7920bafc5dcp-207, 0x1.c2ee925b3e3f6p-202 },
+ { 0x1.780fa5599d558p-207, 0x1.6415eeac7f744p-202 },
+ { 0x1.28c6164ec1235p-207, 0x1.19278bf59ff34p-202 },
+ { 0x1.d459605b63623p-208, 0x1.bbed8e8100752p-203 },
+ { 0x1.71839bad6a45bp-208, 0x1.5e6d30c67b96bp-203 },
+ { 0x1.2380250c57526p-208, 0x1.1495babbc8d8ep-203 },
+ { 0x1.cbdbf53eed588p-209, 0x1.b48d8b08c37b5p-204 },
+ { 0x1.6aaee88d3a5e6p-209, 0x1.587a8905112ebp-204 },
+ { 0x1.1e01e0cda0c0ep-209, 0x1.0fca71267dd26p-204 },
+ { 0x1.c3074a0c1c67dp-210, 0x1.acd43894c1f06p-205 },
+ { 0x1.6396af97c5f7fp-210, 0x1.52428954b7c2fp-205 },
+ { 0x1.184f669e7e645p-210, 0x1.0ac95a364b406p-205 },
+ { 0x1.b9e1f37f768c9p-211, 0x1.a4c779750fb77p-206 },
+ { 0x1.5c4033ae88d94p-211, 0x1.4bc9e91b546a8p-206 },
+ { 0x1.126ceaa621095p-211, 0x1.05963d1a5105bp-206 },
+ { 0x1.b072a84d6770bp-212, 0x1.9c6d5a387a6d7p-207 },
+ { 0x1.54b0d08180ac6p-212, 0x1.45157f4a2e598p-207 },
+ { 0x1.0c5eb30658611p-212, 0x1.0034f87652744p-207 },
+ { 0x1.a6c038fdf5aedp-213, 0x1.93cc0a254a9f5p-208 },
+ { 0x1.4cedf419a9b38p-213, 0x1.3e2a3c60327aap-208 },
+ { 0x1.062912bcc23f9p-213, 0x1.f552fb3e1c70bp-209 },
+ { 0x1.9cd187cff951cp-214, 0x1.8ae9d3a6eb66fp-209 },
+ { 0x1.44fd186d008c2p-214, 0x1.370d2466d3327p-209 },
+ { 0x1.ffa0c91caab55p-215, 0x1.e9ef97aa04b46p-210 },
+ { 0x1.92ad80b12a09bp-215, 0x1.81cd14bd535bbp-210 },
+ { 0x1.3ce3bd0683046p-215, 0x1.2fc348f3a8121p-210 },
+ { 0x1.f2b20c0b002abp-216, 0x1.de47d70b3398cp-211 },
+ { 0x1.885b1157e885cp-216, 0x1.787c377ac34cdp-211 },
+ { 0x1.34a760cc47acap-216, 0x1.2851c338b22e4p-211 },
+ { 0x1.e58ea51580badp-217, 0x1.d263d33512bb6p-212 },
+ { 0x1.7de1218b19542p-217, 0x1.6efdaa9c0e45ep-212 },
+ { 0x1.2c4d7bed4d522p-217, 0x1.20bdae2cd61c6p-212 },
+ { 0x1.d83f3d3e6d15p-218, 0x1.c64ba5bdb46dep-213 },
+ { 0x1.73468ba3c29b8p-218, 0x1.6557da47246f7p-213 },
+ { 0x1.23db7a001a935p-218, 0x1.190c20d5b5808p-213 },
+ { 0x1.cacc668087b83p-219, 0x1.ba075f0192b6p-214 },
+ { 0x1.689215536317fp-219, 0x1.5b9128fb09361p-214 },
+ { 0x1.1b56b45aac06fp-219, 0x1.114228bb99133p-214 },
+ { 0x1.bd3e92f58e3aep-220, 0x1.ad9efd6e7e35p-215 },
+ { 0x1.5dca68b92a62fp-220, 0x1.51afe8bbb6b6cp-215 },
+ { 0x1.12c46cab86e91p-220, 0x1.0964c48f92b05p-215 },
+ { 0x1.af9e0c680145ap-221, 0x1.a11a652260dp-216 },
+ { 0x1.52f60dcf5b39p-221, 0x1.47ba5483b6e8fp-216 },
+ { 0x1.0a29c7db10f7p-221, 0x1.0178df0b67157p-216 },
+ { 0x1.a1f2ec5b27de2p-222, 0x1.948157e97fbd7p-217 },
+ { 0x1.481b643932becp-222, 0x1.3db68a0470a4fp-217 },
+ { 0x1.018bc93b8e2e5p-222, 0x1.f306942454ae6p-218 },
+ { 0x1.9445149305037p-223, 0x1.87db6da6dd3cap-218 },
+ { 0x1.3d409d78b6819p-223, 0x1.33aa83bd4deabp-218 },
+ { 0x1.f1de9c1ab95aap-224, 0x1.e311742f9561bp-219 },
+ { 0x1.869c2824b4b6bp-224, 0x1.7b300d303ed2cp-219 },
+ { 0x1.326bb792c8c5bp-224, 0x1.299c1370fc2d1p-219 },
+ { 0x1.e0b212b870715p-225, 0x1.d31b83aa1a53bp-220 },
+ { 0x1.78ff85165ac91p-225, 0x1.6e8665a634affp-220 },
+ { 0x1.27a27826da7a5p-225, 0x1.1f90dcff1976ep-220 },
+ { 0x1.cf9b0072f8176p-226, 0x1.c32d9c998168ap-221 },
+ { 0x1.6b763e947db08p-226, 0x1.61e5684f4d137p-221 },
+ { 0x1.1cea67fe8699cp-226, 0x1.158e51a7ac97ep-221 },
+ { 0x1.bea20cad09b1fp-227, 0x1.b350464c51c99p-222 },
+ { 0x1.5e0717c155a1cp-227, 0x1.5553c2fc66728p-222 },
+ { 0x1.1248cf18568a2p-227, 0x1.0b99abbccdbb1p-222 },
+ { 0x1.adcf760300963p-228, 0x1.a38baebfb68e4p-223 },
+ { 0x1.50b87f214792dp-228, 0x1.48d7dafad7ffep-223 },
+ { 0x1.07c2b12fe4dbap-228, 0x1.01b7eac5ea688p-223 },
+ { 0x1.9d2b0d0c4a0b1p-229, 0x1.93e7a4bb0743p-224 },
+ { 0x1.43908aa677d25p-229, 0x1.3c77c897ed254p-224 },
+ { 0x1.fab995891c153p-230, 0x1.efdba02e2ceffp-225 },
+ { 0x1.8cbc2fe600108p-230, 0x1.846b92a47c343p-225 },
+ { 0x1.3694f45c1b92fp-230, 0x1.30395337f89bbp-225 },
+ { 0x1.e6371d3dc0233p-231, 0x1.dc7fb7bbca8adp-226 },
+ { 0x1.7c89c6867890ep-231, 0x1.751e7a10e8264p-226 },
+ { 0x1.29cb17b0f706bp-231, 0x1.2421ee0211f87p-226 },
+ { 0x1.d20647a807a0cp-232, 0x1.c9649548abac7p-227 },
+ { 0x1.6c9a3fd812077p-232, 0x1.6606f00ed6d5dp-227 },
+ { 0x1.1d37ef5f490cdp-232, 0x1.1836b52067807p-227 },
+ { 0x1.be2ec88ae1479p-233, 0x1.b6922692e74d4p-228 },
+ { 0x1.5cf38f9818abfp-233, 0x1.572b1a2c0293ap-228 },
+ { 0x1.10e013ef486f7p-233, 0x1.0c7c6b93f06a1p-228 },
+ { 0x1.aab7b734b99f6p-234, 0x1.a40fcadcdd133p-229 },
+ { 0x1.4d9b2cf546b09p-234, 0x1.4890ac32b69b5p-229 },
+ { 0x1.04c7bad04b57cp-234, 0x1.00f779993bbc1p-229 },
+ { 0x1.97a78d5f1c6dbp-235, 0x1.91e450ac30542p-230 },
+ { 0x1.3e9611e8218p-235, 0x1.3a3ce69b6a143p-230 },
+ { 0x1.f1e56c0773bb7p-236, 0x1.eb57d7362f984p-231 },
+ { 0x1.850426f2df55dp-236, 0x1.8015f467ddd4p-231 },
+ { 0x1.2fe8bb3e4f4d8p-236, 0x1.2c3495adab7d8p-231 },
+ { 0x1.dac8e8a813f1fp-237, 0x1.d53ae35dbfa26p-232 },
+ { 0x1.72d2c2a7422abp-237, 0x1.6eaa5fce4af3ap-232 },
+ { 0x1.21972950f570dp-237, 0x1.1e7c114a57a33p-232 },
+ { 0x1.c44004226dc17p-238, 0x1.bf9ebf2ac34cfp-233 },
+ { 0x1.6118037139874p-238, 0x1.5da6aa3adb7a3p-233 },
+ { 0x1.13a4e15d42467p-238, 0x1.11173d5813f4dp-233 },
+ { 0x1.ae501496e23f2p-239, 0x1.aa895a750e0f6p-234 },
+ { 0x1.4fd7f2b705e64p-239, 0x1.4d0f59b16ac32p-234 },
+ { 0x1.0614ef7575b09p-239, 0x1.04098aca1b898p-234 },
+ { 0x1.98fdb1084fd1cp-240, 0x1.95ffef5a788b3p-235 },
+ { 0x1.3f16033b4da17p-240, 0x1.3ce864a4f75bbp-235 },
+ { 0x1.f1d3d20014dd3p-241, 0x1.eeabf27142ccbp-236 },
+ { 0x1.844cb59a101a9p-241, 0x1.82070510e6e91p-236 },
+ { 0x1.2ed514b22b68bp-241, 0x1.2d35346de60f3p-236 },
+ { 0x1.d84bdf7421499p-242, 0x1.d5fe3202b4d44p-237 },
+ { 0x1.7040489842ad7p-242, 0x1.6ea2738b3dbebp-237 },
+ { 0x1.1f1777f205012p-242, 0x1.1df8a8637ba9cp-237 },
+ { 0x1.bf956a62adf73p-243, 0x1.be0e1bcc5bf2bp-238 },
+ { 0x1.5cdae0381ff94p-243, 0x1.5bd567e120a1cp-238 },
+ { 0x1.0fdef3b187063p-243, 0x1.0f35198b8b7f7p-238 },
+ { 0x1.a7b2fd5556b6ap-244, 0x1.a6df243f2c6f4p-239 },
+ { 0x1.4a1e48fd99b8ep-244, 0x1.49a26968a8fd1p-239 },
+ { 0x1.012cc9c3d142ap-244, 0x1.00ec5ed2dbe3ep-239 },
+ { 0x1.90a652d08b6ecp-245, 0x1.9073f3afbdfebp-240 },
+ { 0x1.380bacb3471d9p-245, 0x1.380b5f70c487dp-240 },
+ { 0x1.e603798765b0ap-246, 0x1.e63fa380d130bp-241 },
+ { 0x1.7a705e88ab4c8p-246, 0x1.7ace6e086aab7p-241 },
+ { 0x1.26a399e180e7cp-246, 0x1.2711978a97cf7p-241 },
+ { 0x1.cabc2c3d98d7cp-247, 0x1.cba0a72ae9c08p-242 },
+ { 0x1.651157275ac6fp-247, 0x1.65efbb20adf2dp-242 },
+ { 0x1.15e60bb1a2bacp-247, 0x1.16b5cc5019368p-242 },
+ { 0x1.b08358e30e1b1p-248, 0x1.b1fca598944c3p-243 },
+ { 0x1.5088c08941b89p-248, 0x1.51d84fa353951p-243 },
+ { 0x1.05d2722aa0abep-248, 0x1.06f82c9619b9p-243 },
+ { 0x1.9757d44a0d5d1p-249, 0x1.9953a1cf16aadp-244 },
+ { 0x1.3cd5765cc7b51p-249, 0x1.3e87f66d27bbp-244 },
+ { 0x1.eccf7568ff3afp-250, 0x1.efb0c5f0312cdp-245 },
+ { 0x1.7f37a88128933p-250, 0x1.81a4d1085cfd1p-245 },
+ { 0x1.29f5b70afae6ep-250, 0x1.2bfdda4e2b20cp-245 },
+ { 0x1.cf48b1a182cb9p-251, 0x1.d2ab3b59164a6p-246 },
+ { 0x1.682022c0d8296p-251, 0x1.6aeea740e7e26p-246 },
+ { 0x1.17e72ed48d1c2p-251, 0x1.1a389017ca93cp-246 },
+ { 0x1.b30c9decefa86p-252, 0x1.b6dd2d215fccfp-247 },
+ { 0x1.520de188c8ff4p-252, 0x1.552ee415230cdp-247 },
+ { 0x1.06a7030db71fbp-252, 0x1.093620e33d9f9p-247 },
+ { 0x1.98166f02e00aap-253, 0x1.9c4336b720df7p-248 },
+ { 0x1.3cfce2d301755p-253, 0x1.40629fd47fda6p-248 },
+ { 0x1.ec63bac9af50ap-254, 0x1.f1e828f7f1e6ep-249 },
+ { 0x1.7e609b497d4bfp-254, 0x1.82d92bd0fbc5bp-249 },
+ { 0x1.28e89244647b5p-254, 0x1.2c8658b1c7fabp-249 },
+ { 0x1.cd07ee41894f6p-255, 0x1.d2def7b6139fbp-250 },
+ { 0x1.65e4eca3c47cep-255, 0x1.6a9a29142865ap-250 },
+ { 0x1.15cbd7439af48p-255, 0x1.1995fff959855p-250 },
+ { 0x1.af324889fe32ep-256, 0x1.b549f742691f7p-251 },
+ { 0x1.4e9c920d5db05p-256, 0x1.5380a4af4c2e9p-251 },
+ { 0x1.03a122e1077b7p-256, 0x1.078d07375b0bp-251 },
+ { 0x1.92d9bd168c63p-257, 0x1.9921acfd99f39p-252 },
+ { 0x1.388030ea8589cp-257, 0x1.3d867ecfb60a5p-252 },
+ { 0x1.e4c4faf832008p-258, 0x1.ecccda72dba49p-253 },
+ { 0x1.77f4a046c515ep-258, 0x1.7e5deef2de87bp-253 },
+ { 0x1.2387f5f4b712ep-258, 0x1.28a511d87ce7dp-253 },
+ { 0x1.c413282821079p-259, 0x1.cc3995b1e2c4p-254 },
+ { 0x1.5e78bc56d0fbbp-259, 0x1.64f5f80200f46p-254 },
+ { 0x1.0faba5af01355p-259, 0x1.14d5424501d7ep-254 },
+ { 0x1.a51f8a6830159p-260, 0x1.ad54bef9112dp-255 },
+ { 0x1.465b65a83bdbbp-260, 0x1.4ce07b8d50856p-255 },
+ { 0x1.f9c5589e7201fp-261, 0x1.020f8e226943ep-255 },
+ { 0x1.87dc5ad8af9ecp-261, 0x1.90123a8271991p-256 },
+ { 0x1.2f918e4d3f95cp-261, 0x1.3613b89391a8fp-256 },
+ { 0x1.d6485a170413ap-262, 0x1.e098381b76cd3p-257 },
+ { 0x1.6c3b66970be3dp-262, 0x1.7465697a54c64p-257 },
+ { 0x1.1a0fd8c3a4e6fp-262, 0x1.20858c20a1795p-257 },
+ { 0x1.b4ce217bd5e55p-263, 0x1.bf05934cfa1ccp-258 },
+ { 0x1.522e259c7017ap-263, 0x1.5a41409f84e49p-258 },
+ { 0x1.05caa9cf257c4p-263, 0x1.0c2b83023243dp-258 },
+ { 0x1.954427a430b11p-264, 0x1.9f5672cf62a4fp-259 },
+ { 0x1.39a5d07601e71p-264, 0x1.41985de8f7a14p-259 },
+ { 0x1.e56c72cc01fccp-265, 0x1.f1f5d5615d783p-260 },
+ { 0x1.7797a6e64ddc9p-265, 0x1.8179bfb69c631p-260 },
+ { 0x1.229374c83806p-265, 0x1.2a5d1d1f1ae5cp-260 },
+ { 0x1.c18d454a503aep-266, 0x1.cdd1c2bddbb9ep-261 },
+ { 0x1.5bb5b3e414ad3p-266, 0x1.655e203c78adp-261 },
+ { 0x1.0ce808921de57p-266, 0x1.1481ab5a1469ap-261 },
+ { 0x1.9fdfe587f056ap-267, 0x1.abd4ca4bd8884p-262 },
+ { 0x1.418b54bd6a895p-267, 0x1.4af20f59f283dp-262 },
+ { 0x1.f128f851039d9p-268, 0x1.fff032b2dbde7p-263 },
+ { 0x1.804c6e03f60cbp-268, 0x1.8be8c488684b4p-263 },
+ { 0x1.290596a08a94fp-268, 0x1.3223f2e5be0fp-263 },
+ { 0x1.cb1395c8187f6p-269, 0x1.d964d959533d1p-264 },
+ { 0x1.62bb1316ec5fcp-269, 0x1.6df780d5ecc43p-264 },
+ { 0x1.1211a1b47d3aep-269, 0x1.1ae2302fd4bcdp-264 },
+ { 0x1.a772150026811p-270, 0x1.b5455f4e2ce45p-265 },
+ { 0x1.47143aa78b5fep-270, 0x1.51eade2a24279p-265 },
+ { 0x1.f93996ba5e93dp-271, 0x1.051b3f15282e5p-265 },
+ { 0x1.8626f2553e204p-271, 0x1.93760037df87ap-266 },
+ { 0x1.2d4091cd12adcp-271, 0x1.37ace1ccc1a8dp-266 },
+ { 0x1.d1294db79df79p-272, 0x1.e17b7713cf17fp-267 },
+ { 0x1.6715149108678p-272, 0x1.73db39c4b278bp-267 },
+ { 0x1.1529206516167p-272, 0x1.1f27cc2724f9p-267 },
+ { 0x1.abce28a1f17f2p-273, 0x1.bb70eb3792a1cp-268 },
+ { 0x1.4a1fe3e55f964p-273, 0x1.5659e4463ddd1p-268 },
+ { 0x1.fd6eb54be7326p-274, 0x1.08462ba9624dbp-268 },
+ { 0x1.89049c51b8388p-274, 0x1.97f4ffe1284a1p-269 },
+ { 0x1.2f2b5e6789756p-274, 0x1.3ad748e88c53fp-269 },
+ { 0x1.d3aa617478594p-275, 0x1.e5e5db98318a5p-270 },
+ { 0x1.68a9e9f7b2f9ap-275, 0x1.76e6798f53e9ap-270 },
+ { 0x1.161c2a1de488ep-275, 0x1.21393590da64bp-270 },
+ { 0x1.acda38e82463bp-276, 0x1.be32dc731f12cp-271 },
+ { 0x1.4a9c33e05809ap-276, 0x1.5824d30f3fce1p-271 },
+ { 0x1.fdaf4969fc45p-277, 0x1.09660e736b8bdp-271 },
+ { 0x1.88d45a53c41c5p-277, 0x1.994b0856743cbp-272 },
+ { 0x1.2eba8f55fe897p-277, 0x1.3b9051c5e7679p-272 },
+ { 0x1.d287e1e77c85ap-278, 0x1.e689bae600601p-273 },
+ { 0x1.6770239fc87e6p-278, 0x1.77071c1633b26p-273 },
+ { 0x1.14e513c1b20dcp-278, 0x1.210a174166fcdp-273 },
+ { 0x1.aa90041143186p-279, 0x1.bd7abebe480e6p-274 },
+ { 0x1.488642c71cfa6p-279, 0x1.5740f6d4ed277p-274 },
+ { 0x1.f9f9ce5a157bbp-280, 0x1.0874302ee34fdp-274 },
+ { 0x1.85974997b931fp-280, 0x1.97701e51a6bfep-275 },
+ { 0x1.2bf0c37efc00bp-280, 0x1.39d3aac239fe2p-275 },
+ { 0x1.cdc89092e43c3p-281, 0x1.e36341a88ea0cp-276 },
+ { 0x1.636f0e2785c54p-281, 0x1.743c5e4db43f9p-276 },
+ { 0x1.118b19def65f8p-281, 0x1.1e9b8ad36fd99p-276 },
+ { 0x1.a4fd2c459c71p-282, 0x1.b94cde5e4fc3p-277 },
+ { 0x1.43ea7a73d5cfp-282, 0x1.53b3a109a94aep-277 },
+ { 0x1.f26454740b953p-283, 0x1.057635a1ed1dfp-277 },
+ { 0x1.7f60ab495565cp-283, 0x1.926f55b776f91p-278 },
+ { 0x1.26de8be09d876p-283, 0x1.35abb1f1cadefp-278 },
+ { 0x1.c5889cb51dbb9p-284, 0x1.dc853b381e5ap-279 },
+ { 0x1.5cbe6a335189cp-284, 0x1.6e96e5d005f5dp-279 },
+ { 0x1.0c22190c33c65p-284, 0x1.19fc0dba0e848p-279 },
+ { 0x1.9c42b0a7816acp-285, 0x1.b1c21d6e11086p-280 },
+ { 0x1.3ce41b9a97542p-285, 0x1.4d91f3701143cp-280 },
+ { 0x1.e71ba6efe048bp-286, 0x1.007de792cfd6ep-280 },
+ { 0x1.76552635a3b27p-286, 0x1.8a6663a0ececbp-281 },
+ { 0x1.1fa1c7f04e719p-286, 0x1.2f310e41037d6p-281 },
+ { 0x1.b9f88d1e59fb3p-287, 0x1.d2185735c5ad9p-282 },
+ { 0x1.538582347c59ep-287, 0x1.66381bdd98a02p-282 },
+ { 0x1.04c9ca3c242adp-287, 0x1.1346f1ba5a69ap-282 },
+ { 0x1.9093a8968bba5p-288, 0x1.a706fd9470fb8p-283 },
+ { 0x1.339c31e0d51b7p-288, 0x1.45000f1eec014p-283 },
+ { 0x1.d8619415342d3p-289, 0x1.f3510620184eap-284 },
+ { 0x1.6aa95f63dd017p-289, 0x1.7f84791f6fdbbp-284 },
+ { 0x1.16648113f6ec6p-289, 0x1.2689bc620188bp-284 },
+ { 0x1.ab5b65b277be7p-290, 0x1.c45998d7521aep-285 },
+ { 0x1.47f9aad3382fep-290, 0x1.5b50e4b7d6356p-285 },
+ { 0x1.f7591b1b1c875p-291, 0x1.0aa3508d5dbp-285 },
+ { 0x1.82335294ba26p-291, 0x1.9959eb6f64db6p-286 },
+ { 0x1.2848053b7dfb1p-291, 0x1.3a2fb2a16d1ccp-286 },
+ { 0x1.c68a6f5a8ef62p-292, 0x1.e23b370697cbbp-287 },
+ { 0x1.5c9ffcce7e5fdp-292, 0x1.720876851d9fbp-287 },
+ { 0x1.0b5b54d487d35p-292, 0x1.1be79c992aff6p-287 },
+ { 0x1.9a0421e5c5d71p-293, 0x1.b3980569c43a5p-288 },
+ { 0x1.3a5c4268d4e27p-293, 0x1.4e1fc4f822568p-288 },
+ { 0x1.e1fba80d34a41p-294, 0x1.0042910b94342p-288 },
+ { 0x1.7172912ec21f8p-294, 0x1.8908e30f7a1b3p-289 },
+ { 0x1.1b271db151968p-294, 0x1.2d5e5a1b8288ep-289 },
+ { 0x1.b1f9ef2d6b135p-295, 0x1.ce1b3b9ea6267p-290 },
+ { 0x1.4c872d1af92bcp-295, 0x1.623e8fb994f23p-290 },
+ { 0x1.fd87064e02a6fp-296, 0x1.0f8695160ca38p-290 },
+ { 0x1.8652a61cdcd3bp-296, 0x1.a031b186be289p-291 },
+ { 0x1.2af84a660968dp-296, 0x1.3eee8e04dc3ap-291 },
+ { 0x1.c9f07af149226p-297, 0x1.e8bd23cc416fp-292 },
+ { 0x1.5eacf76fffc0cp-297, 0x1.766e8d5583265p-292 },
+ { 0x1.0c80f3efbbf3fp-297, 0x1.1ed2fab014c43p-292 },
+ { 0x1.9b1f8ffd8f3c8p-298, 0x1.b76010ebb6c6ap-293 },
+ { 0x1.3ab5d5023fe4ap-298, 0x1.507d813502ab7p-293 },
+ { 0x1.e1c174ea2aaa6p-299, 0x1.01aa61c90eaccp-293 },
+ { 0x1.70b05029068dap-299, 0x1.8a90544ab274dp-294 },
+ { 0x1.1a1fba21de5fp-299, 0x1.2e0fb0911dd84p-294 },
+ { 0x1.afb70654af059p-300, 0x1.ce6f24739f7c7p-295 },
+ { 0x1.4a458b53b2a84p-300, 0x1.61eefc532711fp-295 },
+ { 0x1.f944d95c81983p-301, 0x1.0edb77098a96p-295 },
+ { 0x1.8272ab43f7156p-301, 0x1.9e82e04d9025fp-296 },
+ { 0x1.278886c5a4d73p-301, 0x1.3d237a2e0f859p-296 },
+ { 0x1.c3f57b512a1f2p-302, 0x1.e5385c7d0efep-297 },
+ { 0x1.598c52c5d1746p-302, 0x1.73258d0b919ebp-297 },
+ { 0x1.0828ad1da0983p-302, 0x1.1bdb57d01ceccp-297 },
+ { 0x1.93d4935512f54p-303, 0x1.b223e5e67d24ap-298 },
+ { 0x1.34a3670d3cd59p-303, 0x1.4bf43098a2ef1p-298 },
+ { 0x1.d7b67cefff216p-304, 0x1.fb93db1e39a21p-299 },
+ { 0x1.686e7356020d2p-304, 0x1.8402d3eada60ap-299 },
+ { 0x1.135e695d6d4f8p-304, 0x1.2892e3159736p-299 },
+ { 0x1.a4b6028e1ae52p-305, 0x1.c5502f868f04bp-300 },
+ { 0x1.415808da66669p-305, 0x1.5a670a5d83e0ep-300 },
+ { 0x1.ead51e60a821dp-306, 0x1.08ac71830fd4ep-300 },
+ { 0x1.76cfe88ffbfa7p-306, 0x1.9467d9d3bce7dp-301 },
+ { 0x1.1e2e61d740a91p-306, 0x1.34ea92731d6fp-301 },
+ { 0x1.b4f6c22875415p-307, 0x1.d7e402cf49a21p-302 },
+ { 0x1.4d8e03e448998p-307, 0x1.6860e96265ba8p-302 },
+ { 0x1.fd2c6816f010bp-308, 0x1.132f279000564p-302 },
+ { 0x1.8494b75728df1p-308, 0x1.a4356bd52863ep-303 },
+ { 0x1.28836b62851b4p-308, 0x1.40cac092d16a6p-303 },
+ { 0x1.c476ceb4ce0a6p-309, 0x1.e9bb8c8c45eaap-304 },
+ { 0x1.592d26553a529p-309, 0x1.75c6ad9777c96p-304 },
+ { 0x1.074be65f60432p-309, 0x1.1d3d889242361p-304 },
+ { 0x1.91a14719373e5p-310, 0x1.b34c7bf3e0108p-305 },
+ { 0x1.3248b33f78dd9p-310, 0x1.4c1bf325b5886p-305 },
+ { 0x1.d316bfa6ecf07p-311, 0x1.fab351a6d7271p-306 },
+ { 0x1.641dc398561efp-311, 0x1.827d8b273a859p-306 },
+ { 0x1.0f79d08c027e2p-311, 0x1.26c35a8453a6ep-306 },
+ { 0x1.9ddabce45ff88p-312, 0x1.c18e854f7a653p-307 },
+ { 0x1.3b6a0443345f1p-312, 0x1.56c727238c10ep-307 },
+ { 0x1.e0b830517633fp-313, 0x1.05545196af9e3p-307 },
+ { 0x1.6e4903f595976p-313, 0x1.8e6b62ae03487p-308 },
+ { 0x1.170eca4e7a4cap-313, 0x1.2facf384d3a3bp-308 },
+ { 0x1.a92756c27d93ap-314, 0x1.ceddf1e753b81p-309 },
+ { 0x1.43d40bf74392dp-314, 0x1.60b61e0028436p-309 },
+ { 0x1.ed3e286c4c0dep-315, 0x1.0cbd09b1e5e1p-309 },
+ { 0x1.77993389df313p-315, 0x1.997719e8b73a8p-310 },
+ { 0x1.1dfa945eaae99p-315, 0x1.37e77cf85ca37p-310 },
+ { 0x1.b36ec5aa0588p-316, 0x1.db1e802a6c81fp-311 },
+ { 0x1.4b749e64b35f5p-316, 0x1.69d3aa6fccfd9p-311 },
+ { 0x1.f88d823260c9ep-317, 0x1.1383f4dd09079p-311 },
+ { 0x1.7ffa0f1fabb65p-317, 0x1.a388f33976b7bp-312 },
+ { 0x1.242e12375b352p-317, 0x1.3f613589599c6p-312 },
+ { 0x1.bc9a844ffd2b5p-318, 0x1.e635a66e3ebe7p-313 },
+ { 0x1.523af73f84783p-318, 0x1.720bfb4a981d7p-313 },
+ { 0x1.0146a610e0588p-318, 0x1.199a49bcc51p-313 },
+ { 0x1.87590d6d36008p-319, 0x1.ac8ae259e160cp-314 },
+ { 0x1.299b80ea6bb7fp-319, 0x1.4609b0c4183cap-314 },
+ { 0x1.c496292aa266bp-320, 0x1.f00af26520f9dp-315 },
+ { 0x1.5817f72c95e4cp-320, 0x1.794ce31e24c7bp-315 },
+ { 0x1.059392396d038p-320, 0x1.1ef2877dbfcadp-315 },
+ { 0x1.8da5a346cbb3fp-321, 0x1.b468dc95cb829p-316 },
+ { 0x1.2e36a9eb80d32p-321, 0x1.4bd213115ac94p-316 },
+ { 0x1.cb4fb203e18ap-322, 0x1.f88862b544527p-317 },
+ { 0x1.5cfe5be9615c7p-322, 0x1.7f861b04cbe3ap-317 },
+ { 0x1.0923c6394f695p-322, 0x1.2380a7a548a2fp-317 },
+ { 0x1.92d18166ccd51p-323, 0x1.bb1122f6e5762p-318 },
+ { 0x1.31f510cb3f507p-323, 0x1.50ad48dd9b3a6p-318 },
+ { 0x1.d0b7c794af438p-324, 0x1.ff9ab8e5d6631p-319 },
+ { 0x1.60e2f23228dedp-324, 0x1.84a97f6b3e853p-319 },
+ { 0x1.0bef1906dac58p-324, 0x1.273a4b16ba84fp-319 },
+ { 0x1.96d0ca88e4fcp-325, 0x1.c07484e1da469p-320 },
+ { 0x1.34ce1af3c1b6p-325, 0x1.549037ceef1fep-320 },
+ { 0x1.d4c1f7c67dd18p-326, 0x1.0298e0fc06037p-320 },
+ { 0x1.63bcc0600e3b1p-326, 0x1.88ab45875f419p-321 },
+ { 0x1.0def17046c37ep-326, 0x1.2a16e161fa35fp-321 },
+ { 0x1.999a40ba75f42p-327, 0x1.c48699c75f345p-322 },
+ { 0x1.36bb3093bcf7fp-327, 0x1.5771e906a9978p-322 },
+ { 0x1.d764e5657aa2p-328, 0x1.04a04a1699caap-322 },
+ { 0x1.658528dc53bd5p-328, 0x1.8b822865b44e6p-323 },
+ { 0x1.0f1f1acd583cp-328, 0x1.2c0fc98ac934cp-323 },
+ { 0x1.9b2768ee2e28p-329, 0x1.c73df0b6d4334p-324 },
+ { 0x1.37b7d60833afbp-329, 0x1.594bab8ddacb1p-324 },
+ { 0x1.d89a6c43f4c1p-330, 0x1.05dee05833b3cp-324 },
+ { 0x1.663803afd90e2p-330, 0x1.8d278c9cbfc58p-325 },
+ { 0x1.0f7c5f2e4265p-330, 0x1.2d206b997c2ccp-325 },
+ { 0x1.9b74a41343d69p-331, 0x1.c89434d36542fp-326 },
+ { 0x1.37c1bd3bb9cfep-331, 0x1.5a192e33cf627p-326 },
+ { 0x1.d85fb90bdf218p-332, 0x1.0651bc0c61b2p-326 },
+ { 0x1.65d3aea4b609ep-332, 0x1.8d9799e5f2521p-327 },
+ { 0x1.0f0609e7aa674p-332, 0x1.2d464a6b30dc2p-327 },
+ { 0x1.9a813d2878f74p-333, 0x1.c88645e6c88eep-328 },
+ { 0x1.36d8ce9d2217bp-333, 0x1.59d89052b0525p-328 },
+ { 0x1.d6b5543d3c94p-334, 0x1.05f7d07f3fb02p-328 },
+ { 0x1.645913a262a36p-334, 0x1.8cd14a1185c8dp-329 },
+ { 0x1.0dbd2f003b6a5p-334, 0x1.2c810d60e767ep-329 },
+ { 0x1.984f6bfe6778p-335, 0x1.c714448c370a6p-330 },
+ { 0x1.34ff297cd534dp-335, 0x1.588a691f2cd1fp-330 },
+ { 0x1.d39f201da2255p-336, 0x1.04d1f01416963p-330 },
+ { 0x1.61cba521cabb4p-336, 0x1.8ad66d03eba59p-331 },
+ { 0x1.0ba4cc94c45b3p-336, 0x1.2ad281b8cc2ap-331 },
+ { 0x1.94e44c9a075e7p-337, 0x1.c44191b160ec2p-332 },
+ { 0x1.32391bcecdc03p-337, 0x1.5631c55b5d22cp-332 },
+ { 0x1.cf2449a3fda4bp-338, 0x1.02e2c911c7929p-332 },
+ { 0x1.5e3150cc8eda4p-338, 0x1.87aba1a7120bfp-333 },
+ { 0x1.08c1bf3c985fap-338, 0x1.283e938a586f7p-333 },
+ { 0x1.9047cb663bb8cp-339, 0x1.c014c17012593p-334 },
+ { 0x1.2e8d117dfdd44p-339, 0x1.52d41b7968429p-334 },
+ { 0x1.c94f2cb2815a8p-340, 0x1.002edb3674f27p-334 },
+ { 0x1.599268900e7bcp-340, 0x1.835843f5f0b0cp-335 },
+ { 0x1.051aaf415041dp-340, 0x1.24cb3e8b7d756p-335 },
+ { 0x1.8a84869fc8267p-341, 0x1.ba9781881c8a9p-336 },
+ { 0x1.2a037bab743e1p-341, 0x1.4e79366e7a47p-336 },
+ { 0x1.c22d2c350e306p-342, 0x1.f978cc962d426p-337 },
+ { 0x1.53f982a03a248p-342, 0x1.7de65083f0e21p-337 },
+ { 0x1.00b7f70f68972p-342, 0x1.208076f18ea3p-337 },
+ { 0x1.83a7a5a0b9d4dp-343, 0x1.b3d6740403453p-338 },
+ { 0x1.24a6b05eb3edap-343, 0x1.492b17a8d9ad4p-338 },
+ { 0x1.b9ce7efad864cp-344, 0x1.f126a42ab2a64p-339 },
+ { 0x1.4d7351162fad8p-344, 0x1.77623e1a3ca2fp-339 },
+ { 0x1.f74706d1f613cp-345, 0x1.1b680aeae0c3cp-339 },
+ { 0x1.7bc0a6e57fbc5p-345, 0x1.abe0fed214bcap-340 },
+ { 0x1.1e82c35430e3dp-345, 0x1.42f5d0cb0afebp-340 },
+ { 0x1.b045f25c98b4bp-346, 0x1.e77a20528f8f5p-341 },
+ { 0x1.460e7202036c7p-346, 0x1.6fdace394b03cp-341 },
+ { 0x1.ebd15c07c2acdp-347, 0x1.158d7d54f1681p-341 },
+ { 0x1.72e125d540295p-347, 0x1.a2c9115542385p-342 },
+ { 0x1.17a558b9c184fp-347, 0x1.3be755f8b210cp-342 },
+ { 0x1.a5a8a3f3de092p-348, 0x1.dc88f077bd369p-343 },
+ { 0x1.3ddb38ecb5b52p-348, 0x1.6760d57bb9982p-343 },
+ { 0x1.df2826b036578p-349, 0x1.0efdda755dbb3p-343 },
+ { 0x1.691c997f37f0ep-349, 0x1.98a2e123c782ep-344 },
+ { 0x1.101d72c627ff7p-349, 0x1.340f49a72211p-344 },
+ { 0x1.9a0db3d2b8dacp-350, 0x1.d06b3f65f6fdp-345 },
+ { 0x1.34eb72e63e592p-350, 0x1.5e06fcff790f4p-345 },
+ { 0x1.d166c8f34fca4p-351, 0x1.07c787991a68p-345 },
+ { 0x1.5e880d9f1fe43p-351, 0x1.8d849f54265f7p-346 },
+ { 0x1.07fb3b2ff1602p-351, 0x1.2b7ec30262d2bp-346 },
+ { 0x1.8d8df0cbffd52p-352, 0x1.c33b5a8ad639fp-347 },
+ { 0x1.2b52265317648p-352, 0x1.53e17e1a8afadp-347 },
+ { 0x1.c2aa6bd34f17bp-353, 0x1.fff41d2913dabp-348 },
+ { 0x1.5339d751ff2a1p-353, 0x1.818627da2e9e4p-348 },
+ { 0x1.fe9f93308c405p-354, 0x1.2248100f21115p-348 },
+ { 0x1.80438073219dep-354, 0x1.b515531d535ebp-349 },
+ { 0x1.21234fbc4a127p-354, 0x1.4905d9b84e0cbp-349 },
+ { 0x1.b31198aa5f8abp-355, 0x1.ef4bcc5f71a72p-350 },
+ { 0x1.474946f304456p-355, 0x1.74c0ac8d03b2bp-350 },
+ { 0x1.ec59d00f3fe38p-356, 0x1.187e74c209a91p-350 },
+ { 0x1.7249848679fa9p-356, 0x1.a6169b09c4411p-351 },
+ { 0x1.16739cec78bd4p-356, 0x1.3d8a8ccb26cd9p-351 },
+ { 0x1.a2bbd0795adeep-357, 0x1.ddb87127c2076p-352 },
+ { 0x1.3ace589cd3352p-357, 0x1.674e5d7be735cp-352 },
+ { 0x1.d949ad392f075p-358, 0x1.0e35e84d33d3fp-352 },
+ { 0x1.63bbbf78651ccp-358, 0x1.965d9f895d99cp-353 },
+ { 0x1.0b5827a3ba382p-358, 0x1.3186c3440696p-353 },
+ { 0x1.91c922f9ee4cp-359, 0x1.cb5d51a48d7d4p-354 },
+ { 0x1.2de164c74e725p-359, 0x1.594a1039f0199p-354 },
+ { 0x1.c5941f108d9d1p-360, 0x1.0382d1e479246p-354 },
+ { 0x1.54b639c219649p-360, 0x1.8609634a384ccp-355 },
+ { 0x1.ffcc62473097ap-361, 0x1.25120afe02122p-355 },
+ { 0x1.8059c757355aep-361, 0x1.b85e31314f4b4p-356 },
+ { 0x1.209ad26ca18d9p-361, 0x1.4acee7c0fcbafp-356 },
+ { 0x1.b15e18d0d2d12p-362, 0x1.f0f38c6449ad9p-357 },
+ { 0x1.4554e9983b016p-362, 0x1.753919ff4b182p-357 },
+ { 0x1.e865bf893f8f4p-363, 0x1.1844080030d76p-357 },
+ { 0x1.6e8db855aac9ap-363, 0x1.a4dede3a3eb93p-358 },
+ { 0x1.1312cc0ae5d04p-363, 0x1.3bf7fe7aa33ap-358 },
+ { 0x1.9ccc1bfbf7ecbp-364, 0x1.da5e8d4d639edp-359 },
+ { 0x1.35b35e7d0088ep-364, 0x1.640bc7176cda7p-359 },
+ { 0x1.d0a5ff60b92cfp-365, 0x1.0b342b640cc13p-359 },
+ { 0x1.5c84558f35d95p-365, 0x1.9102c47629cb9p-360 },
+ { 0x1.0560f8bafb2c7p-365, 0x1.2ce013e375d0fp-360 },
+ { 0x1.8801ce509ea26p-366, 0x1.c36f07720a932p-361 },
+ { 0x1.25ec7207b3c64p-366, 0x1.529fe13854ed9p-361 },
+ { 0x1.b8b58f7c67c36p-367, 0x1.fbf2dc269c35dp-362 },
+ { 0x1.4a5c0b3b7424dp-367, 0x1.7cec854a40ddcp-362 },
+ { 0x1.ef3874e46141bp-368, 0x1.1da13f1aaaee6p-362 },
+ { 0x1.732197e24d857p-368, 0x1.ac4c46230c45cp-363 },
+ { 0x1.1619ff0ea7ec6p-368, 0x1.4112fbeff8a1fp-363 },
+ { 0x1.a0bb46a0a2c53p-369, 0x1.e15420dda8758p-364 },
+ { 0x1.383201c8ba71ap-369, 0x1.68bd97eb5b05dp-364 },
+ { 0x1.d3b4e4b894768p-370, 0x1.0e54a78756b6bp-364 },
+ { 0x1.5e4c4aaef013p-370, 0x1.951c14f527745p-365 },
+ { 0x1.0654a030d3e7p-370, 0x1.2f8178dd14a04p-365 },
+ { 0x1.88dc03d1ca801p-371, 0x1.c6b6bf9361ee4p-366 },
+ { 0x1.2621d65152a67p-371, 0x1.5495f2949c65ep-366 },
+ { 0x1.b860981f4834ap-372, 0x1.fe24891c8ca0cp-367 },
+ { 0x1.49a0d4c97c281p-372, 0x1.7e02609a87253p-367 },
+ { 0x1.ed66ed1143993p-373, 0x1.1e064158c947bp-367 },
+ { 0x1.713a5a10cc9bp-373, 0x1.ac4304f253262p-368 },
+ { 0x1.14455cbbff469p-373, 0x1.4093bdea6e36fp-368 },
+ { 0x1.9d62205df47a6p-374, 0x1.dfe14a435c3c2p-369 },
+ { 0x1.353bfdeb15aa4p-374, 0x1.6720e3d624fdcp-369 },
+ { 0x1.ce97f23783a55p-375, 0x1.0cba8970a9d66p-369 },
+ { 0x1.59f649793ea9ap-375, 0x1.921e961b81171p-370 },
+ { 0x1.02b46c188f22dp-375, 0x1.2cd3135c626d1p-370 },
+ { 0x1.82dcfdba2d59cp-376, 0x1.c2097f7f7c953p-371 },
+ { 0x1.213830f44d648p-376, 0x1.5096e15b063dbp-371 },
+ { 0x1.b0639acae41c7p-377, 0x1.f76b39886a20dp-372 },
+ { 0x1.432d063e4cc5ap-377, 0x1.786c2636e4e2ap-372 },
+ { 0x1.e3096b161ade1p-378, 0x1.196dc712e8651p-372 },
+ { 0x1.68f1646f450ccp-378, 0x1.a4c39680abb0bp-373 },
+ { 0x1.0dad51a121c5fp-378, 0x1.3a80eb1934625p-373 },
+ { 0x1.92ed52465cf13p-379, 0x1.d6196b3830612p-374 },
+ { 0x1.2cf8cdb32b26dp-379, 0x1.5f4b3b930a91ap-374 },
+ { 0x1.c1934bb7035c1p-380, 0x1.067b3db09279ep-374 },
+ { 0x1.4fbc11c19c0b7p-380, 0x1.8832413bcb6f5p-375 },
+ { 0x1.f5613cdc1ad52p-381, 0x1.24f8b72bbd6eep-375 },
+ { 0x1.76547ab0f816ap-381, 0x1.b5a5bcacf14ddp-376 },
+ { 0x1.1770c93ef3136p-381, 0x1.46d8046ba690cp-376 },
+ { 0x1.a128a30d837ebp-382, 0x1.e8209bd7c6d4dp-377 },
+ { 0x1.375630e92b79p-382, 0x1.6c744b66f6406p-377 },
+ { 0x1.d0a93cd8add1ep-383, 0x1.1015024fefc8dp-377 },
+ { 0x1.5ab4549d6cf15p-383, 0x1.9631ba1694964p-378 },
+ { 0x1.02a8fed4a1944p-383, 0x1.2f2b3b1ae197dp-378 },
+ { 0x1.81e6d5efc2ecep-384, 0x1.c47e5b8f9de0cp-379 },
+ { 0x1.1fd54f3e20bfcp-384, 0x1.51a481761d265p-379 },
+ { 0x1.ad523512d80aep-385, 0x1.f7d2ff106229cp-380 },
+ { 0x1.4023f854f9c86p-385, 0x1.77da522f79ec5p-380 },
+ { 0x1.dd649c8fad0d5p-386, 0x1.185a192bd02b4p-380 },
+ { 0x1.63e684c4d4572p-386, 0x1.a22ed5ef67f83p-381 },
+ { 0x1.094b5ecc6e29p-386, 0x1.37d9a85948033p-381 },
+ { 0x1.8b7643330549ep-387, 0x1.d10da89b8212ap-382 },
+ { 0x1.26b65f14cd4dap-387, 0x1.5ab7d4224f7e2p-382 },
+ { 0x1.b734f53e57228p-388, 0x1.0276587fa1c2p-382 },
+ { 0x1.473b9d1931175p-388, 0x1.814bdb918424dp-383 },
+ { 0x1.e78d8c6e84fddp-389, 0x1.1f2684f2af658p-383 },
+ { 0x1.6b2a2c93cd65ap-389, 0x1.abf540fb4e1a1p-384 },
+ { 0x1.0e7a7b055d281p-389, 0x1.3eddfeeed0dd2p-384 },
+ { 0x1.92d87cacce695p-390, 0x1.db1c82f79707dp-385 },
+ { 0x1.2bf57b6e0d98dp-390, 0x1.61ea0b7eb4c3cp-385 },
+ { 0x1.bea4f9488e121p-391, 0x1.0799f1fb897d8p-385 },
+ { 0x1.4c7d8bf7bdc41p-391, 0x1.889f21fdb1d69p-386 },
+ { 0x1.eef6b8bfa9225p-392, 0x1.245c20ba28a39p-386 },
+ { 0x1.705ed2bbfd521p-392, 0x1.b3598a0d5984p-387 },
+ { 0x1.121f1b69882ebp-392, 0x1.4418fde75923ep-387 },
+ { 0x1.97ec608197c79p-393, 0x1.e27e05b6c31f9p-388 },
+ { 0x1.2f7b0edc74f1cp-393, 0x1.671af7f5d8858p-388 },
+ { 0x1.c380c41f7503p-394, 0x1.0b3d4442eda68p-388 },
+ { 0x1.4fd20f15083b3p-394, 0x1.8db341e4d4306p-389 },
+ { 0x1.f37ea8d01e9c5p-395, 0x1.27e37e3bc73c9p-389 },
+ { 0x1.736cebb19a201p-395, 0x1.b83a639f29a8p-390 },
+ { 0x1.1428c012e2c57p-395, 0x1.47730acf38edcp-390 },
+ { 0x1.9a9ae80c06018p-396, 0x1.e710d5155d028p-391 },
+ { 0x1.31371c2b63b8p-396, 0x1.6a331ab64b688p-391 },
+ { 0x1.c5b240b14f4d6p-397, 0x1.0d4fd25f7f52ep-391 },
+ { 0x1.5129ffd17a136p-397, 0x1.90712f4e38e37p-392 },
+ { 0x1.f510ba62354a5p-398, 0x1.29ac951c1e60bp-392 },
+ { 0x1.74468acd1611cp-398, 0x1.ba819d5f14678p-393 },
+ { 0x1.148e1d96c299ep-398, 0x1.48dce2dc3ecd5p-393 },
+ { 0x1.9ad7d58aaba44p-399, 0x1.e8c0193d16d55p-394 },
+ { 0x1.3121b71d77179p-399, 0x1.6b2456938b866p-394 },
+ { 0x1.c52f68dd90e64p-400, 0x1.0dc826696c76cp-394 },
+ { 0x1.507f397188496p-400, 0x1.90cc63cdbf2a2p-395 },
+ { 0x1.f3a5bdf92c388p-401, 0x1.29af3c144f8cp-395 },
+ { 0x1.72e7cbdbb95dbp-401, 0x1.ba24cc0f4c8e2p-396 },
+ { 0x1.134d638b07143p-401, 0x1.48500e815d897p-396 },
+ { 0x1.98a2111174d79p-402, 0x1.e7841c45926dp-397 },
+ { 0x1.2f3b409e1b7b6p-402, 0x1.69ea5b1b71301p-397 },
+ { 0x1.c1fa91a869695p-403, 0x1.0ca4195cda6d3p-397 },
+ { 0x1.4dd4c7d7ec9fap-403, 0x1.8ec33daf13649p-398 },
+ { 0x1.ef442d8796795p-404, 0x1.27eb66fea5e85p-398 },
+ { 0x1.6f56f0c0f22b9p-404, 0x1.b72598c77c448p-399 },
+ { 0x1.106c4a594a047p-404, 0x1.45cf12a60cb9ap-399 },
+ { 0x1.9403b0e4bd1b9p-405, 0x1.e36284e81b5ffp-400 },
+ { 0x1.2b8c63e7468c1p-405, 0x1.668ac570f2fc8p-400 },
+ { 0x1.bc22598793379p-406, 0x1.09e8e37ef2488p-400 },
+ { 0x1.4936d06178106p-406, 0x1.8a5f0c63b5c24p-401 },
+ { 0x1.e7fffb3b16a7dp-407, 0x1.2469273320bdap-401 },
+ { 0x1.69a431ed205ap-407, 0x1.b191b44e70edfp-402 },
+ { 0x1.0bf7e7cce4d07p-407, 0x1.41655d7606103p-402 },
+ { 0x1.8d11ace4d8996p-408, 0x1.dc6e2b76185d5p-403 },
+ { 0x1.2625d4b960a47p-408, 0x1.6114f58eab906p-403 },
+ { 0x1.b3c139841a735p-409, 0x1.05a2f4a403a4dp-403 },
+ { 0x1.42ba35d81be5cp-409, 0x1.83b3c9af7ee45p-404 },
+ { 0x1.ddf9fa6fc513ap-410, 0x1.1f386e3013e68p-404 },
+ { 0x1.61e943a26f542p-410, 0x1.a9826f127d04dp-405 },
+ { 0x1.06044c28d2704p-410, 0x1.3b26ef9596f74p-405 },
+ { 0x1.83eb403668f94p-411, 0x1.d2c68adc24dd3p-406 },
+ { 0x1.1f1fd15ed30fep-411, 0x1.59a199b7c8167p-406 },
+ { 0x1.a8fcbdc7eab51p-412, 0x1.ffcb2bfa5b8dap-407 },
+ { 0x1.3a7bfb4be9962p-412, 0x1.7adf828472cfdp-407 },
+ { 0x1.d15ee90987618p-413, 0x1.1870951a86a79p-407 },
+ { 0x1.584895194492p-413, 0x1.9f1bfa110cbbap-408 },
+ { 0x1.fd57d7b45b3cap-414, 0x1.332fc55367264p-408 },
+ { 0x1.78b8ffae32bfp-414, 0x1.c696d39db75f3p-409 },
+ { 0x1.16996dab0cd1ep-414, 0x1.5051f4ea04fdfp-409 },
+ { 0x1.9c046dcaa75a4p-415, 0x1.f194b2a4cb97p-410 },
+ { 0x1.30a06c462f23ep-415, 0x1.700975cbb46aap-410 },
+ { 0x1.c2662350ce7fap-416, 0x1.102fae0ec7794p-410 },
+ { 0x1.4cec5169fb931p-416, 0x1.928c588cfb6d9p-411 },
+ { 0x1.ec1db7d8e44b5p-417, 0x1.29a3060c44f3ap-411 },
+ { 0x1.6babae8929706p-417, 0x1.b814aa869e0e4p-412 },
+ { 0x1.0cb7ae5506e7ep-417, 0x1.454ee7edd0063p-412 },
+ { 0x1.8d106f7f4047ep-418, 0x1.e0e0b72e6ef2ep-413 },
+ { 0x1.255213192c405p-418, 0x1.6360f251c2f1fp-413 },
+ { 0x1.b1500fc71b69ap-419, 0x1.0699a6631f93fp-413 },
+ { 0x1.40052c8ba04b4p-419, 0x1.840a0d97bb129p-414 },
+ { 0x1.d8a3d24511c07p-420, 0x1.1eaa023d58a69p-414 },
+ { 0x1.5cfadd7b9716p-420, 0x1.a77ea01d8b821p-415 },
+ { 0x1.01a47ddad3ea8p-420, 0x1.38c7c7057a652p-415 },
+ { 0x1.7c5ff3799c35bp-421, 0x1.cdf6c504a93e5p-416 },
+ { 0x1.18c087e86a1f3p-421, 0x1.551bff88c1175p-416 },
+ { 0x1.9e64530b957f4p-422, 0x1.f7ae8590bb8p-417 },
+ { 0x1.31c908986e1a8p-422, 0x1.73d293026bc2ap-417 },
+ { 0x1.c33b25da2082ep-423, 0x1.12730a9790f69p-417 },
+ { 0x1.4ce362055227ep-423, 0x1.951a7082f394ap-418 },
+ { 0x1.eb1b0ae0a386ap-424, 0x1.2af1081b22794p-418 },
+ { 0x1.6a3779e1ff3bp-424, 0x1.b925bc48353ep-419 },
+ { 0x1.0b1f245435eeap-424, 0x1.4575deb5305a2p-419 },
+ { 0x1.89efddb97fd18p-425, 0x1.e029ff0fc8645p-420 },
+ { 0x1.227180cb0a8cap-425, 0x1.6228a92a17423p-420 },
+ { 0x1.ac39e8a7de062p-426, 0x1.05302bb5e3a1ap-420 },
+ { 0x1.3ba5b5279aa24p-426, 0x1.81331d3a2cc81p-421 },
+ { 0x1.d145ea8ff6403p-427, 0x1.1c02d69097c72p-421 },
+ { 0x1.56df011e743b9p-427, 0x1.a2c1b0ae83a64p-422 },
+ { 0x1.f94750d0f9308p-428, 0x1.34ad734ae6135p-422 },
+ { 0x1.7442e7172840ap-428, 0x1.c703bfdc748cdp-423 },
+ { 0x1.123a683e9b9d5p-428, 0x1.4f5290291de6ep-423 },
+ { 0x1.93f94a8e393e5p-429, 0x1.ee2bb5a2a447p-424 },
+ { 0x1.298449094a08p-429, 0x1.6c16f34d9525ep-424 },
+ { 0x1.b62c8f87855a8p-430, 0x1.0c379a70923bcp-424 },
+ { 0x1.42a02f59d51efp-430, 0x1.8b21b8919710fp-425 },
+ { 0x1.db09bb0ffb21fp-431, 0x1.2303a1b68b2dep-425 },
+ { 0x1.5daee76f997a8p-431, 0x1.ac9c706a79cfcp-426 },
+ { 0x1.01604a662bf4cp-431, 0x1.3b983b3f72fb5p-426 },
+ { 0x1.7ad33d50dacdp-432, 0x1.d0b33fd9b6e85p-427 },
+ { 0x1.16c1e4c8c451ap-432, 0x1.5615904c6373ap-427 },
+ { 0x1.9a32159dea0d8p-433, 0x1.f7950165d693dp-428 },
+ { 0x1.2dc48781056c9p-433, 0x1.729dc070c926ap-428 },
+ { 0x1.bbf2871addffbp-434, 0x1.10b9b38c6e833p-428 },
+ { 0x1.4684a4152d4ep-434, 0x1.9154f9f73ee5fp-429 },
+ { 0x1.e03df4eb2c204p-435, 0x1.27418ebfd96bep-429 },
+ { 0x1.6120558a89b12p-435, 0x1.b26192fa2f36ep-430 },
+ { 0x1.03a014bcb5352p-435, 0x1.3f7df7d25b3e6p-430 },
+ { 0x1.7db773a6f6623p-436, 0x1.d5ec232ba3385p-431 },
+ { 0x1.1893b9023690dp-436, 0x1.598c75ff21ea4p-431 },
+ { 0x1.9c6ba6a49465ap-437, 0x1.fc1f9e46a53e2p-432 },
+ { 0x1.2f125d64e7642p-437, 0x1.758c452444076p-432 },
+ { 0x1.bd607b51aff83p-438, 0x1.1294b791c6529p-432 },
+ { 0x1.4735d5e25dd32p-438, 0x1.939e692035be7p-433 },
+ { 0x1.e0bb7795ebab2p-439, 0x1.289cc9b3b4107p-433 },
+ { 0x1.611962fb4b008p-439, 0x1.b3e5c199dc217p-434 },
+ { 0x1.035217aa6e0adp-439, 0x1.40415be2c6028p-434 },
+ { 0x1.7cd9c096da3b3p-440, 0x1.d6871e2c76342p-435 },
+ { 0x1.17a22cd2a508fp-440, 0x1.599d2a64857abp-435 },
+ { 0x1.9a95351e8c9f1p-441, 0x1.fba952efabe51p-436 },
+ { 0x1.2d63f329a8bcbp-441, 0x1.74cc660d4897ap-436 },
+ { 0x1.ba6ba0cb47e2bp-442, 0x1.11baa6a990cd8p-436 },
+ { 0x1.44ae89d144108p-442, 0x1.91ecc31adec4ep-437 },
+ { 0x1.dc7e8d1b8f556p-443, 0x1.270b14a1f9816p-437 },
+ { 0x1.5d9a42222275cp-443, 0x1.b11d883fd3ec1p-438 },
+ { 0x1.00789e350bd1ap-443, 0x1.3ddca348b8e79p-438 },
+ { 0x1.7840aaba80c98p-444, 0x1.d27f9dd765764p-439 },
+ { 0x1.13f45ccd8c935p-444, 0x1.56472f42babf3p-439 },
+ { 0x1.94bc9a9955f26p-445, 0x1.f6359d3980ea5p-440 },
+ { 0x1.28c5f3eaf8eddp-445, 0x1.7063ccd1b83c6p-440 },
+ { 0x1.b32a3c3e46a35p-446, 0x1.0e31f012ad2b3p-440 },
+ { 0x1.3f01c91fe7f47p-446, 0x1.8c4cd2c02ec2dp-441 },
+ { 0x1.d3a718c61d154p-447, 0x1.2298481c2ca0dp-441 },
+ { 0x1.56bd3dd5a05c1p-447, 0x1.aa1de55237abcp-442 },
+ { 0x1.f65222fadfcp-448, 0x1.3861db33230bp-442 },
+ { 0x1.700eb717cfb77p-448, 0x1.c9f401331dbf6p-443 },
+ { 0x1.0da5e12700c8dp-448, 0x1.4fa3a533642f6p-443 },
+ { 0x1.8b0da54d3c71fp-449, 0x1.ebed8656f1a7bp-444 },
+ { 0x1.215aeed941b43p-449, 0x1.6873a105b43c2p-444 },
+ { 0x1.a7d28bd609e5p-450, 0x1.081521636047p-444 },
+ { 0x1.3659f3261d19p-450, 0x1.82e8d038330cap-445 },
+ { 0x1.c6770887b13f6p-451, 0x1.1b65bea6b7e6ap-445 },
+ { 0x1.4cb570f463d9dp-451, 0x1.9f1b427ce89a2p-446 },
+ { 0x1.e715dafe5cd6p-452, 0x1.2ff9fffd4f5f9p-446 },
+ { 0x1.6480ba9b1723cp-452, 0x1.bd241d06b6757p-447 },
+ { 0x1.04e575dd6f2ebp-452, 0x1.45e411382662bp-447 },
+ { 0x1.7dcff6d521467p-453, 0x1.dd1da1bc7ec85p-448 },
+ { 0x1.1759a98201ff3p-453, 0x1.5d36e9f7af39cp-448 },
+ { 0x1.98b82586ccf2dp-454, 0x1.ff233639de02ap-449 },
+ { 0x1.2af6afc0ce651p-454, 0x1.7606528b3cf28p-449 },
+ { 0x1.b54f244df93dfp-455, 0x1.11a8b54a30c34p-449 },
+ { 0x1.3fcc4e4385b18p-455, 0x1.9066e8a3084adp-450 },
+ { 0x1.d3abb2d5b9282p-456, 0x1.24e2ffedd9f78p-450 },
+ { 0x1.55eaec016b2b5p-456, 0x1.ac6e23cde6ac9p-451 },
+ { 0x1.f3e576e5bfb2cp-457, 0x1.394ff72563c26p-451 },
+ { 0x1.6d6394041cb01p-457, 0x1.ca3259bb8013ep-452 },
+ { 0x1.0b0a8012d71fbp-457, 0x1.4effb58fcce2p-452 },
+ { 0x1.8647f7f3a91dep-458, 0x1.e9cac23b8427ep-453 },
+ { 0x1.1d29e5c60946bp-458, 0x1.6602f707600f3p-453 },
+ { 0x1.a0aa72640fd47p-459, 0x1.05a7bd790a4bcp-453 },
+ { 0x1.305e23384e58ap-459, 0x1.7e6b1b23c38f4p-454 },
+ { 0x1.bc9e08de1532fp-460, 0x1.176cc55ca9b8p-454 },
+ { 0x1.44b4e89c6a35fp-460, 0x1.984a277e8539ap-455 },
+ { 0x1.da366d9d2b975p-461, 0x1.2a417253e014bp-455 },
+ { 0x1.5a3c60cb2c6b1p-461, 0x1.b3b2c9b4277c6p-456 },
+ { 0x1.f98800fc076dbp-462, 0x1.3e333559670c8p-456 },
+ { 0x1.71033226bf0afp-462, 0x1.d0b8591b88278p-457 },
+ { 0x1.0d53e944a7e18p-462, 0x1.534ff7f271b4dp-457 },
+ { 0x1.89187f3d75a14p-463, 0x1.ef6ed82d51675p-458 },
+ { 0x1.1ed5d0deddfb7p-463, 0x1.69a61d0edc9d2p-458 },
+ { 0x1.a28be72757b85p-464, 0x1.07f57aca805f1p-458 },
+ { 0x1.3154ef266983dp-464, 0x1.814481a9f253cp-459 },
+ { 0x1.bd6d859990532p-465, 0x1.1921067277b5dp-459 },
+ { 0x1.44dcd404b4fcdp-465, 0x1.9a3a7d2712f82p-460 },
+ { 0x1.d9cdf2aadd6a6p-466, 0x1.2b45137355f77p-460 },
+ { 0x1.5979672b76b96p-466, 0x1.b497e1657b91bp-461 },
+ { 0x1.f7be424410479p-467, 0x1.3e6cfcc06ed27p-461 },
+ { 0x1.6f36e7903ba4fp-467, 0x1.d06cfa865bc4ep-462 },
+ { 0x1.0ba8019bd4e86p-467, 0x1.52a47395ed2aep-462 },
+ { 0x1.8621eaa755f34p-468, 0x1.edca8e605e67ap-463 },
+ { 0x1.1c4a9efdce654p-468, 0x1.67f77ef705254p-463 },
+ { 0x1.9e475b5aaea97p-469, 0x1.0660edcde1e02p-463 },
+ { 0x1.2dd03980220acp-469, 0x1.7e727aec99554p-464 },
+ { 0x1.b7b478b8fda1cp-470, 0x1.16b24c391593bp-464 },
+ { 0x1.40424c4fd21f7p-470, 0x1.96221780dfe95p-465 },
+ { 0x1.d276d459f43c7p-471, 0x1.27e2788696d86p-465 },
+ { 0x1.53aa8c500f5dp-471, 0x1.af1357749947cp-466 },
+ { 0x1.ee9c5073f397ep-472, 0x1.39fac2bf7a531p-466 },
+ { 0x1.6812e6a2e8fcp-472, 0x1.c9538eaa71fbp-467 },
+ { 0x1.06198ecffc0ep-472, 0x1.4d04b3a802aeep-467 },
+ { 0x1.7d857ef6fe55ap-473, 0x1.e4f0604536408p-468 },
+ { 0x1.15a4dc243cc5fp-473, 0x1.610a0b4ec8401p-468 },
+ { 0x1.940cad97ee071p-474, 0x1.00fbde3ac71c6p-468 },
+ { 0x1.25f772e00c70ap-474, 0x1.7614bf61d6bfap-469 },
+ { 0x1.abb2fd3f529efp-475, 0x1.103beefa0765p-469 },
+ { 0x1.3718d87e8a0afp-475, 0x1.8c2ef94786008p-470 },
+ { 0x1.c48328a4346ebp-476, 0x1.203fa39242793p-470 },
+ { 0x1.4910b37b4de72p-476, 0x1.a36313f8e64ecp-471 },
+ { 0x1.de8817c6f33b9p-477, 0x1.310e5f6fbfd44p-471 },
+ { 0x1.5be6c950a7e6fp-477, 0x1.bbbb999bb060ap-472 },
+ { 0x1.f9ccdcf7c94fep-478, 0x1.42afa66f9fdc1p-472 },
+ { 0x1.6fa2fc442a9d3p-478, 0x1.d54340d9c375dp-473 },
+ { 0x1.0b2e58cb15f5cp-478, 0x1.552b1ae6aeaa2p-473 },
+ { 0x1.844d490056942p-479, 0x1.f004e9f45a94bp-474 },
+ { 0x1.1a217943b9ac7p-479, 0x1.68887b7750462p-474 },
+ { 0x1.99edc3fa555f4p-480, 0x1.0605cdc8a1e5ep-474 },
+ { 0x1.29c58e31af831p-480, 0x1.7ccfa0b55e3f7p-475 },
+ { 0x1.b08c96a2d341cp-481, 0x1.14b13fa04509fp-475 },
+ { 0x1.3a2063aa9bfc9p-481, 0x1.92087a96ea8f4p-476 },
+ { 0x1.c831fc61280f7p-482, 0x1.240a6edc95f53p-476 },
+ { 0x1.4b37d15842e1dp-482, 0x1.a83b0db0fa5b6p-477 },
+ { 0x1.e0e63f582488bp-483, 0x1.34170d65d2fe5p-477 },
+ { 0x1.5d11b81c3fea7p-483, 0x1.bf6f703f6c8b1p-478 },
+ { 0x1.fab1b4f400c2ep-484, 0x1.44dcd884a52dcp-478 },
+ { 0x1.6fb3ff8ccf41cp-484, 0x1.d7adc6f76430fp-479 },
+ { 0x1.0ace5d20891a2p-484, 0x1.5661968fc8c68p-479 },
+ { 0x1.8324934a763f4p-485, 0x1.f0fe41a3b588bp-480 },
+ { 0x1.18d7d8058e531p-485, 0x1.68ab147365bffp-480 },
+ { 0x1.9769602e7d2c4p-486, 0x1.05b48bc57ed71p-480 },
+ { 0x1.27797b62a04a4p-486, 0x1.7bbf2311e9661p-481 },
+ { 0x1.ac8851524d431p-487, 0x1.137b41cf9c9a4p-481 },
+ { 0x1.36b7751d5da7fp-487, 0x1.8fa3947e525d9p-482 },
+ { 0x1.c2874cefea298p-488, 0x1.21d7603b6e2ccp-482 },
+ { 0x1.4695ee8470b66p-488, 0x1.a45e3910021acp-483 },
+ { 0x1.d96c311be3eb3p-489, 0x1.30cd0207d04edp-483 },
+ { 0x1.571909f179506p-489, 0x1.b9f4dc504a668p-484 },
+ { 0x1.f13cd05945d89p-490, 0x1.40603dadb780ap-484 },
+ { 0x1.6844e0504f766p-490, 0x1.d06d41c212c13p-485 },
+ { 0x1.04ff770417c7ep-490, 0x1.509522cc01f2fp-485 },
+ { 0x1.7a1d7e8c27e5p-491, 0x1.e7cd2184183ebp-486 },
+ { 0x1.11dc1d57f7df8p-491, 0x1.616fb7b910c11p-486 },
+ { 0x1.8ca6e2e342651p-492, 0x1.000d1267395e3p-486 },
+ { 0x1.1f372812d1e14p-492, 0x1.72f3f6faafe57p-487 },
+ { 0x1.9fe4fa21e8c98p-493, 0x1.0cacf12619fe1p-487 },
+ { 0x1.2d1356c845fd1p-493, 0x1.8525cca4f244dp-488 },
+ { 0x1.b3db9cc5a58f3p-494, 0x1.19c8ed29100e2p-488 },
+ { 0x1.3b7359a6b9391p-494, 0x1.980913a0c5f1ep-489 },
+ { 0x1.c88e8c09b9bb2p-495, 0x1.2763b979d57b5p-489 },
+ { 0x1.4a59cf5958098p-495, 0x1.aba192db244fdp-490 },
+ { 0x1.de016eddfacadp-496, 0x1.357ff9fbc97f4p-490 },
+ { 0x1.59c942db45eaep-496, 0x1.bff2fa5de1e9dp-491 },
+ { 0x1.f437cec9632b8p-497, 0x1.44204156d00fcp-491 },
+ { 0x1.69c4293cefa3fp-497, 0x1.d500e0534289dp-492 },
+ { 0x1.059a8a5ce0ce7p-497, 0x1.53470ed39dd97p-492 },
+ { 0x1.7a4cdf5c8de47p-498, 0x1.eacebdf5973c2p-493 },
+ { 0x1.117e42e10afc5p-498, 0x1.62f6cc2a62dbdp-493 },
+ { 0x1.8b65a792fe14p-499, 0x1.00aff63626acfp-493 },
+ { 0x1.1dc89fe4a5f8ap-499, 0x1.7331cb44dd6ecp-494 },
+ { 0x1.9d10a7562f377p-500, 0x1.0c5bd0cbfba3p-494 },
+ { 0x1.2a7b1b1593291p-500, 0x1.83fa43f4f73d5p-495 },
+ { 0x1.af4fe4d278bf9p-501, 0x1.186c76677c8f7p-495 },
+ { 0x1.37971726a776ep-501, 0x1.955251a12574cp-496 },
+ { 0x1.c225447c48b85p-502, 0x1.24e359c6528bbp-496 },
+ { 0x1.451dde15504ecp-502, 0x1.a73bf0e7dcf7bp-497 },
+ { 0x1.d592869bae136p-503, 0x1.31c1d70a5a26cp-497 },
+ { 0x1.53109f6b70a02p-503, 0x1.b9b8fd3b82acep-498 },
+ { 0x1.e99944d35a898p-504, 0x1.3f09320694d4p-498 },
+ { 0x1.61706e7ea0b42p-504, 0x1.cccb2e7856e93p-499 },
+ { 0x1.fe3aefa4cdaa2p-505, 0x1.4cba948866255p-499 },
+ { 0x1.703e40ae0b133p-505, 0x1.e0741675f15a5p-500 },
+ { 0x1.09bc65f9b8064p-505, 0x1.5ad70c9e433d4p-500 },
+ { 0x1.7f7aeba02f7efp-506, 0x1.f4b51e95f89d5p-501 },
+ { 0x1.14a9f8443d058p-506, 0x1.695f8add0a062p-501 },
+ { 0x1.8f272381e3222p-507, 0x1.04c7c2a8ead79p-501 },
+ { 0x1.1fe6a1ccca721p-507, 0x1.7854e0a5444cfp-502 },
+ { 0x1.9f437947f2743p-508, 0x1.0f822de49bc54p-502 },
+ { 0x1.2b72bc2a1bb29p-508, 0x1.87b7be69a8c26p-503 },
+ { 0x1.afd058f4d5cb9p-509, 0x1.1a8a41a9a734p-503 },
+ { 0x1.374e8637e822fp-509, 0x1.9788b1f83908ep-504 },
+ { 0x1.c0ce07e3f5247p-510, 0x1.25e0558a5c077p-504 },
+ { 0x1.437a22e46ffc9p-510, 0x1.a7c824c7683f1p-505 },
+ { 0x1.d23ca31c0220cp-511, 0x1.3184a6ce13b46p-505 },
+ { 0x1.4ff5980398e02p-511, 0x1.b8765a48c0cf1p-506 },
+ { 0x1.e41c1da9f8a5fp-512, 0x1.3d775743f06aep-506 },
+ { 0x1.5cc0cd28b81e5p-512, 0x1.c9936e428a9d9p-507 },
+ { 0x1.f66c3f065ea05p-513, 0x1.49b86c1b194cep-507 },
+ { 0x1.69db8a882e29p-513, 0x1.db1f5331fbe71p-508 },
+ { 0x1.049650c331274p-513, 0x1.5647ccc18e717p-508 },
+ { 0x1.774577e1faf4fp-514, 0x1.ed19d0b78718cp-509 },
+ { 0x1.0e2e586d3df5cp-514, 0x1.632541cab3acp-509 },
+ { 0x1.84fe1b767669bp-515, 0x1.ff82820edeaabp-510 },
+ { 0x1.17fdd44e1dc6cp-515, 0x1.705073deb552ap-510 },
+ { 0x1.9304d9065a4b9p-516, 0x1.092c6a4a26abfp-510 },
+ { 0x1.220449767742ap-516, 0x1.7dc8eab3ed87ap-511 },
+ { 0x1.a158f0df4c356p-517, 0x1.12ce032c827cep-511 },
+ { 0x1.2c4123936432bp-517, 0x1.8b8e0c1372c25p-512 },
+ { 0x1.aff97ef6163edp-518, 0x1.1ca5926404568p-512 },
+ { 0x1.36b3b4511d82bp-518, 0x1.999f1ae9f978bp-513 },
+ { 0x1.bee57a0fbbbdcp-519, 0x1.26b285aeabdbep-513 },
+ { 0x1.415b32c89327cp-519, 0x1.a7fb366632c72p-514 },
+ { 0x1.ce1bb2fa9523ep-520, 0x1.30f431387ee69p-514 },
+ { 0x1.4c36baf8c2285p-520, 0x1.b6a15925d0c25p-515 },
+ { 0x1.dd9ad3d89a4a5p-521, 0x1.3b69cf0bd5608p-515 },
+ { 0x1.57454d4c97f21p-521, 0x1.c590587256b75p-516 },
+ { 0x1.ed615f7bfd7d2p-522, 0x1.46127e8d37ba7p-516 },
+ { 0x1.6285ce2e2e29bp-522, 0x1.d4c6e38ed7f06p-517 },
+ { 0x1.fd6db0d73348ep-523, 0x1.50ed44039bd53p-517 },
+ { 0x1.6df705a8252f7p-523, 0x1.e4438317c2a1ep-518 },
+ { 0x1.06defd40bdb09p-523, 0x1.5bf9082dc8412p-518 },
+ { 0x1.79979f15ddb0dp-524, 0x1.f4049875ce63p-519 },
+ { 0x1.0f2823287afb6p-524, 0x1.673497e5a0d03p-519 },
+ { 0x1.856628e34ac2cp-525, 0x1.02042eb28efefp-519 },
+ { 0x1.17913a85a33a7p-525, 0x1.729ea3d219a53p-520 },
+ { 0x1.9161145d0e326p-526, 0x1.0a2671c8cdbeep-520 },
+ { 0x1.20191f16dc709p-526, 0x1.7e35c0288722ep-521 },
+ { 0x1.9d86b59187f4ep-527, 0x1.12680a24c58f5p-521 },
+ { 0x1.28be97e6e9065p-527, 0x1.89f8647df9662p-522 },
+ { 0x1.a9d5434377e7bp-528, 0x1.1ac7d823a316cp-522 },
+ { 0x1.31805749922c3p-528, 0x1.95e4eba9494cap-523 },
+ { 0x1.b64ad6eec66d3p-529, 0x1.2344a7c981006p-523 },
+ { 0x1.3a5cfae5998ecp-529, 0x1.a1f993b67371dp-524 },
+ { 0x1.c2e56cdffce02p-530, 0x1.2bdd30bebc795p-524 },
+ { 0x1.43530bcc0ee3ap-530, 0x1.ae347debd307p-525 },
+ { 0x1.cfa2e45eea63dp-531, 0x1.3490165a1de5p-525 },
+ { 0x1.4c60fe9d5cbc1p-531, 0x1.ba93aee1c301fp-526 },
+ { 0x1.dc80ffece4451p-532, 0x1.3d5be7b8309a9p-526 },
+ { 0x1.558533bc564e3p-532, 0x1.c7150ead1fd0ep-527 },
+ { 0x1.e97d659702f92p-533, 0x1.463f1fe01b7dap-527 },
+ { 0x1.5ebdf78f85a03p-533, 0x1.d3b6691d169e3p-528 },
+ { 0x1.f6959f5cadd73p-534, 0x1.4f3825f642bp-528 },
+ { 0x1.680982d0eea8ap-534, 0x1.e0756e0ca137bp-529 },
+ { 0x1.01e38dd55bfc7p-534, 0x1.58454d7cf072p-529 },
+ { 0x1.7165faec70a1p-535, 0x1.ed4fb1c7fef16p-530 },
+ { 0x1.088796f5a026p-535, 0x1.6164d6a338985p-530 },
+ { 0x1.7ad1726ce2f3cp-536, 0x1.fa42ad866b6p-531 },
+ { 0x1.0f3587953aeb5p-536, 0x1.6a94eea23ecd2p-531 },
+ { 0x1.8449e977fef01p-537, 0x1.03a5dffc21d0dp-531 },
+ { 0x1.15ebef6827c9dp-537, 0x1.73d3b028fc2cfp-532 },
+ { 0x1.8dcd4e591ac76p-538, 0x1.0a3416f4dd0f1p-532 },
+ { 0x1.1ca951b79a938p-538, 0x1.7d1f23d694b62p-533 },
+ { 0x1.97597e1aad586p-539, 0x1.10ca917d13a59p-533 },
+ { 0x1.236c25d3c18a2p-539, 0x1.867540c340902p-534 },
+ { 0x1.a0ec452e85047p-540, 0x1.1767d933fa0f7p-534 },
+ { 0x1.2a32d78fe110fp-540, 0x1.8fd3ed17c059fp-535 },
+ { 0x1.aa8360248e3edp-541, 0x1.1e0a6bf884441p-535 },
+ { 0x1.30fbc7c8ab284p-541, 0x1.9938feb3469d1p-536 },
+ { 0x1.b41c7c6ff8cc6p-542, 0x1.24b0bc63cac6bp-536 },
+ { 0x1.37c54cf4ab1fcp-542, 0x1.a2a23bdfb3241p-537 },
+ { 0x1.bdb5393a7ccd2p-543, 0x1.2b59324d7fd9bp-537 },
+ { 0x1.3e8db3be9418cp-543, 0x1.ac0d5c13ef72ap-538 },
+ { 0x1.c74b284572b4cp-544, 0x1.32022b5a4d882p-538 },
+ { 0x1.45533fa93710cp-544, 0x1.b57808c42df0bp-539 },
+ { 0x1.d0dbced86364cp-545, 0x1.38a9fb93eb86p-539 },
+ { 0x1.4c142bbcdb51bp-545, 0x1.bedfde3fbf9f1p-540 },
+ { 0x1.da64a6bca7adp-546, 0x1.3f4eee0ab230dp-540 },
+ { 0x1.52ceab3daa53bp-546, 0x1.c8426c9c266d4p-541 },
+ { 0x1.e3e31f45a0a96p-547, 0x1.45ef458066425p-541 },
+ { 0x1.5980ea6ad6692p-547, 0x1.d19d38acfc932p-542 },
+ { 0x1.ed549e6504cf2p-548, 0x1.4c893d1bef1fep-542 },
+ { 0x1.60290f4619f98p-548, 0x1.daedbd083bb8ep-543 },
+ { 0x1.f6b681cab013bp-549, 0x1.531b0925a021ep-543 },
+ { 0x1.66c53a6323b06p-549, 0x1.e4316b16614afp-544 },
+ { 0x1.00031007ac3e3p-549, 0x1.59a2d7cbb3c39p-544 },
+ { 0x1.6d5387be7adf6p-550, 0x1.ed65ac2de0264p-545 },
+ { 0x1.04a064f4bdd38p-550, 0x1.601ed1ee8e719p-545 },
+ { 0x1.73d20f9b5e73bp-551, 0x1.f687e2b942e41p-546 },
+ { 0x1.0931e5b5e6c43p-551, 0x1.668d1bf455ad8p-546 },
+ { 0x1.7a3ee7681856fp-552, 0x1.ff956b675583bp-547 },
+ { 0x1.0db636a632668p-552, 0x1.6cebd6a35f863p-547 },
+ { 0x1.809822a836e1fp-553, 0x1.0445cf3250898p-547 },
+ { 0x1.122bfb19eafe7p-553, 0x1.73392002f5fc2p-548 },
+ { 0x1.86dbd3e416493p-554, 0x1.08b3e84ebc2b9p-548 },
+ { 0x1.1691d609b1ec9p-554, 0x1.79731441e1e21p-549 },
+ { 0x1.8d080d9d1c96dp-555, 0x1.0d13aa83e4b01p-549 },
+ { 0x1.1ae66ac0b0b6ap-555, 0x1.7f97cea22928bp-550 },
+ { 0x1.931ae34603f62p-556, 0x1.1163bef9eebc1p-550 },
+ { 0x1.1f285d8d6c817p-556, 0x1.85a56a6965552p-551 },
+ { 0x1.99126a3e88ca5p-557, 0x1.15a2cf3193875p-551 },
+ { 0x1.23565474c154ep-557, 0x1.8b9a03d510324p-552 },
+ { 0x1.9eecbad1cb519p-558, 0x1.19cf85b21a11fp-552 },
+ { 0x1.276ef7e686addp-558, 0x1.9173b9121e9f7p-553 },
+ { 0x1.a4a7f136af77ep-559, 0x1.1de88eb969b39p-553 },
+ { 0x1.2b70f3735b79fp-559, 0x1.9730ab373bc61p-554 },
+ { 0x1.aa422e918100dp-560, 0x1.21ec98edb9593p-554 },
+ { 0x1.2f5af68314ac2p-560, 0x1.9cceff40f1fb1p-555 },
+ { 0x1.afb999f61e5d4p-561, 0x1.25da56105b758p-555 },
+ { 0x1.332bb50b471fbp-561, 0x1.a24cdf0f0a2e7p-556 },
+ { 0x1.b50c6169e961bp-562, 0x1.29b07bb123c75p-556 },
+ { 0x1.36e1e845638bbp-562, 0x1.a7a87a6267113p-557 },
+ { 0x1.ba38bae4baa67p-563, 0x1.2d6dc3e1e1b47p-557 },
+ { 0x1.3a7c4f63d9d53p-563, 0x1.ace007da9e0c8p-558 },
+ { 0x1.bf3ce55012ad1p-564, 0x1.3110ede9680cep-558 },
+ { 0x1.3df9b045b81fcp-564, 0x1.b1f1c5f28dcc9p-559 },
+ { 0x1.c4172983c2f7ep-565, 0x1.3498bef599a58p-559 },
+ { 0x1.4158d828399aep-565, 0x1.b6dbfbfb30836p-560 },
+ { 0x1.c8c5db3f49157p-566, 0x1.380402cbf1542p-560 },
+ { 0x1.44989c55b9312p-566, 0x1.bb9cfb13e7262p-561 },
+ { 0x1.cd475a1f163eep-567, 0x1.3b518c77fb7d2p-561 },
+ { 0x1.47b7dad17cf31p-567, 0x1.c0331f1f7ac71p-562 },
+ { 0x1.d19a128cff8a4p-568, 0x1.3e8036f737914p-562 },
+ { 0x1.4ab57affd05a9p-568, 0x1.c49ccfb511d2cp-563 },
+ { 0x1.d5bc7eab14dfbp-569, 0x1.418ee5e1d890ep-563 },
+ { 0x1.4d906e49e5535p-569, 0x1.c8d8810c585d4p-564 },
+ { 0x1.d9ad27381fd3dp-570, 0x1.447c860fdcf2cp-564 },
+ { 0x1.5047b0bcf6527p-570, 0x1.cce4b4e41cdcap-565 },
+ { 0x1.dd6aa46d0f45cp-571, 0x1.47480e39f8181p-565 },
+ { 0x1.52da49a426b16p-571, 0x1.d0bffb62a59f5p-566 },
+ { 0x1.e0f39ed2991f9p-572, 0x1.49f07f95c9d66p-566 },
+ { 0x1.55474c1ca1f2bp-572, 0x1.d468f3ef07049p-567 },
+ { 0x1.e446d00e60d84p-573, 0x1.4c74e66ce3841p-567 },
+ { 0x1.578dd7a37e92bp-573, 0x1.d7de4e02c6f6fp-568 },
+ { 0x1.e76303a6f7572p-574, 0x1.4ed45aae1d60cp-568 },
+ { 0x1.59ad189ced845p-574, 0x1.db1ec9f31f5e1p-569 },
+ { 0x1.ea4717be0f8c8p-575, 0x1.510e0078c325ep-569 },
+ { 0x1.5ba448d444792p-575, 0x1.de2939b1372f7p-570 },
+ { 0x1.ecf1fdc04a7dbp-576, 0x1.532108a122ff3p-570 },
+ { 0x1.5d72aff4768dap-576, 0x1.e0fc8180b06b8p-571 },
+ { 0x1.ef62bb0a0594ap-577, 0x1.550cb12e0f1dbp-571 },
+ { 0x1.5f17a3f894e1dp-577, 0x1.e39798a3f0a89p-572 },
+ { 0x1.f19869809eb8ap-578, 0x1.56d045cee7811p-572 },
+ { 0x1.60928993f7077p-578, 0x1.e5f989fd91cadp-573 },
+ { 0x1.f392381fab056p-579, 0x1.586b2049c7737p-573 },
+ { 0x1.61e2d491b1f68p-579, 0x1.e82174a67122fp-574 },
+ { 0x1.f54f6b79a6d5fp-580, 0x1.59dca8e17880fp-574 },
+ { 0x1.6308082b0b65cp-580, 0x1.ea0e8c77dc629p-575 },
+ { 0x1.f6cf5e2bb03dcp-581, 0x1.5b2456b2d3672p-575 },
+ { 0x1.6401b7549eebbp-581, 0x1.ebc01a8965943p-576 },
+ { 0x1.f8118143e7ebp-582, 0x1.5c41b0093e8e9p-576 },
+ { 0x1.64cf8501f223bp-582, 0x1.ed357da1f18bap-577 },
+ { 0x1.f9155c9a1fbd1p-583, 0x1.5d344aaa010f1p-577 },
+ { 0x1.6571245f3d39ap-583, 0x1.ee6e2a9b9efdp-578 },
+ { 0x1.f9da8f1a8a0ccp-584, 0x1.5dfbcc1628fd2p-578 },
+ { 0x1.65e6590135ap-584, 0x1.ef69acba2f951p-579 },
+ { 0x1.fa60cf0228aadp-585, 0x1.5e97e9c2cbc7fp-579 },
+ { 0x1.662ef70ab154bp-585, 0x1.f027a5f3a7f56p-580 },
+ { 0x1.faa7ea0cc6ecbp-586, 0x1.5f0869476fb64p-580 },
+ { 0x1.664ae34801e0ep-586, 0x1.f0a7cf2ae7563p-581 },
+ { 0x1.faafc59456a8cp-587, 0x1.5f4d2082760f5p-581 },
+ { 0x1.663a133fef35p-587, 0x1.f0e9f85c03b41p-582 },
+ { 0x1.fa785ea194bf2p-588, 0x1.5f65f5b366281p-582 },
+ { 0x1.65fc8d3a43882p-588, 0x1.f0ee08ba43cd5p-583 },
+ { 0x1.fa01c9ede6a16p-589, 0x1.5f52df8b025d3p-583 },
+ { 0x1.6592683be2829p-589, 0x1.f0b3febf9cbcdp-584 },
+ { 0x1.f94c33d66f35bp-590, 0x1.5f13e53118eaap-584 },
+ { 0x1.64fbcbf86f1abp-590, 0x1.f03bf02da5a7ap-585 },
+ { 0x1.f857e040665ap-591, 0x1.5ea91e400b8afp-585 },
+ { 0x1.6438f0b98cabp-591, 0x1.ef860a0000a7ap-586 },
+ { 0x1.f7252a6ecb2bbp-592, 0x1.5e12b2b611c72p-586 },
+ { 0x1.634a1f3bd0d7ep-592, 0x1.ee92905044d53p-587 },
+ { 0x1.f5b484c995f72p-593, 0x1.5d50dadc42d9dp-587 },
+ { 0x1.622fb08184d56p-593, 0x1.ed61de2b81fc4p-588 },
+ { 0x1.f40678969b4f4p-594, 0x1.5c63df237cf4dp-588 },
+ { 0x1.60ea0d9b5d711p-594, 0x1.ebf4655983167p-589 },
+ { 0x1.f21ba5a45e2afp-595, 0x1.5b4c17f7488b1p-589 },
+ { 0x1.5f79af6759efdp-595, 0x1.ea4aae160108ap-590 },
+ { 0x1.eff4c1e71b057p-596, 0x1.5a09ed86def16p-590 },
+ { 0x1.5ddf1e460242cp-596, 0x1.e86556bc034fep-591 },
+ { 0x1.ed92990861c73p-597, 0x1.589dd784842fp-591 },
+ { 0x1.5c1af1c6454bep-597, 0x1.e6451363b8311p-592 },
+ { 0x1.eaf60be99fa59p-598, 0x1.57085cdb6c23ep-592 },
+ { 0x1.5a2dd0483fd76p-598, 0x1.e3eaad7319948p-593 },
+ { 0x1.e820101a05296p-599, 0x1.554a135c6b3d2p-593 },
+ { 0x1.58186e973c8cbp-599, 0x1.e1570321beee3p-594 },
+ { 0x1.e511af403f0e1p-600, 0x1.53639f61bab8bp-594 },
+ { 0x1.55db8f7b445c6p-600, 0x1.de8b06f0475d8p-595 },
+ { 0x1.e1cc067882b19p-601, 0x1.5155b36a1ff17p-595 },
+ { 0x1.537803429dd3dp-601, 0x1.db87bf13d1856p-596 },
+ { 0x1.de5045a77840fp-602, 0x1.4f210fabcd4fep-596 },
+ { 0x1.50eea743a03bp-602, 0x1.d84e44d6006fdp-597 },
+ { 0x1.da9faec295ac1p-603, 0x1.4cc6819f5a3a9p-597 },
+ { 0x1.4e406557456e3p-603, 0x1.d4dfc3ea1615fp-598 },
+ { 0x1.d6bb950e85a76p-604, 0x1.4a46e38335bf7p-598 },
+ { 0x1.4b6e334ceafc3p-604, 0x1.d13d79b7b4d75p-599 },
+ { 0x1.d2a55c543d97bp-605, 0x1.47a31bd7fd98ap-599 },
+ { 0x1.48791257b832ep-605, 0x1.cd68b49be13bdp-600 },
+ { 0x1.ce5e780d6c294p-606, 0x1.44dc1cd628aecp-600 },
+ { 0x1.45620e7623619p-606, 0x1.c962d320e4c77p-601 },
+ { 0x1.c9e86a88f07ffp-607, 0x1.41f2e3dd79383p-601 },
+ { 0x1.422a3dd414b5ep-607, 0x1.c52d432db963cp-602 },
+ { 0x1.c544c4080f626p-608, 0x1.3ee878deaf1c1p-602 },
+ { 0x1.3ed2c02828af5p-608, 0x1.c0c9812daaed1p-603 },
+ { 0x1.c07521d52071ep-609, 0x1.3bbdedbff743p-603 },
+ { 0x1.3b5cbe0c97302p-609, 0x1.bc391730e1bf4p-604 },
+ { 0x1.bb7b2d547171ap-610, 0x1.38745dbc97fd1p-604 },
+ { 0x1.37c9685446b6bp-610, 0x1.b77d9c068db21p-605 },
+ { 0x1.b6589b1020c3ep-611, 0x1.350cecc05d9cfp-605 },
+ { 0x1.3419f75c953bcp-611, 0x1.b298b2516cc35p-606 },
+ { 0x1.b10f29bfb2a68p-612, 0x1.3188c6bf4cd49p-606 },
+ { 0x1.304faa5c619afp-612, 0x1.ad8c07976bbcp-607 },
+ { 0x1.aba0a14c264ccp-613, 0x1.2de91f0a22435p-607 },
+ { 0x1.2c6bc6b0e1424p-613, 0x1.a859534d21642p-608 },
+ { 0x1.a60ed1d150c44p-614, 0x1.2a2f2fa027fc3p-608 },
+ { 0x1.286f9728ce321p-614, 0x1.a30255dde65bep-609 },
+ { 0x1.a05b929d439abp-615, 0x1.265c387eea954p-609 },
+ { 0x1.245c6b4e79163p-615, 0x1.9d88d7b14c6d3p-610 },
+ { 0x1.9a88c12e847c2p-616, 0x1.22717ef05792fp-610 },
+ { 0x1.203396b14a77p-616, 0x1.97eea82eb8229p-611 },
+ { 0x1.94984031d9858p-617, 0x1.1e704cd7ceb7cp-611 },
+ { 0x1.1bf6702f3caf4p-617, 0x1.92359cbfdea74p-612 },
+ { 0x1.8e8bf6806bcabp-618, 0x1.1a59effeaeef1p-612 },
+ { 0x1.17a6513ed67fap-618, 0x1.8c5f8fd2e86f6p-613 },
+ { 0x1.8865ce1efe9b6p-619, 0x1.162fb960e6361p-613 },
+ { 0x1.1344953a2bc16p-619, 0x1.866e5fdcf6e5cp-614 },
+ { 0x1.8227b33ef66f4p-620, 0x1.11f2fc7a0a0a9p-614 },
+ { 0x1.0ed298ab66e97p-620, 0x1.8063ee5dc8676p-615 },
+ { 0x1.7bd39341e60d2p-621, 0x1.0da50e937b941p-615 },
+ { 0x1.0a51b89b5ac38p-621, 0x1.7a421ee53231bp-616 },
+ { 0x1.756b5bc0538cfp-622, 0x1.0947461417eb2p-616 },
+ { 0x1.05c351e298147p-622, 0x1.740ad61b23997p-617 },
+ { 0x1.6ef0f9946142ep-623, 0x1.04daf9d1f19dp-617 },
+ { 0x1.0128c07d7eac9p-623, 0x1.6dbff8cae0f32p-618 },
+ { 0x1.686657e900799p-624, 0x1.006180668cd93p-618 },
+ { 0x1.f906bdc779cfcp-625, 0x1.67636af21f0cbp-619 },
+ { 0x1.61cd5f4e4d33cp-625, 0x1.f7b85f0c272bbp-620 },
+ { 0x1.efa90ac757637p-626, 0x1.60f70ed4a200ep-620 },
+ { 0x1.5b27f4d3aafafp-626, 0x1.ee98b6b3e4f34p-621 },
+ { 0x1.e63b1303dfbfbp-627, 0x1.5a7cc414fb8aap-621 },
+ { 0x1.5477f92833195p-627, 0x1.e566abbe94f87p-622 },
+ { 0x1.dcbf7abb88524p-628, 0x1.53f666d2fde17p-622 },
+ { 0x1.4dbf47c1fc8ap-628, 0x1.dc24dc933bf6dp-623 },
+ { 0x1.d338de3492428p-629, 0x1.4d65ced070949p-623 },
+ { 0x1.46ffb60cbd76p-629, 0x1.d2d5e0d43505p-624 },
+ { 0x1.c9a9d09a6515fp-630, 0x1.46ccce9c8cdf5p-624 },
+ { 0x1.403b12a03d499p-630, 0x1.c97c4837b573ep-625 },
+ { 0x1.c014dae645fc3p-631, 0x1.402d32c6be96dp-625 },
+ { 0x1.3973247f05596p-631, 0x1.c01a996aebdb3p-626 },
+ { 0x1.b67c7ad400b86p-632, 0x1.3988c1191e211p-626 },
+ { 0x1.32a9aa5db4bb3p-632, 0x1.b6b3510058b7ap-627 },
+ { 0x1.ace321e309c7bp-633, 0x1.32e137db0ef23p-627 },
+ { 0x1.2be059f3526f7p-633, 0x1.ad48e069f2207p-628 },
+ { 0x1.a34b346493cc3p-634, 0x1.2c384d1c64d5bp-628 },
+ { 0x1.2518df52ef492p-634, 0x1.a3ddacff96f65p-629 },
+ { 0x1.99b70897047dcp-635, 0x1.258fae0968e74p-629 },
+ { 0x1.1e54dc4edf3a3p-635, 0x1.9a740f1248851p-630 },
+ { 0x1.9028e5cf277c7p-636, 0x1.1ee8fe480d92cp-630 },
+ { 0x1.1795e7e5c7ccap-636, 0x1.910e510c93fe1p-631 },
+ { 0x1.86a303af6f699p-637, 0x1.1845d75e974c6p-631 },
+ { 0x1.10dd8db9b7b2p-637, 0x1.87aeaea087811p-632 },
+ { 0x1.7d27896d87b8ep-638, 0x1.11a7c823f5ff5p-632 },
+ { 0x1.0a2d4d917179ap-638, 0x1.7e57540380a9p-633 },
+ { 0x1.73b88d266bc5ap-639, 0x1.0b10543a01766p-633 },
+ { 0x1.03869ae409b27p-639, 0x1.750a5d3814d59p-634 },
+ { 0x1.6a58134129f18p-640, 0x1.0480f391c14fcp-634 },
+ { 0x1.f9d5b8ddde221p-641, 0x1.6bc9d56645be6p-635 },
+ { 0x1.61080de06bfbp-641, 0x1.fbf623f3bedbap-636 },
+ { 0x1.ecb6d7acd34f7p-642, 0x1.6297b642274f2p-636 },
+ { 0x1.57ca5c62d05ddp-642, 0x1.ef001d6eb49dfp-637 },
+ { 0x1.dfb32aa129cc6p-643, 0x1.5975e7810e7p-637 },
+ { 0x1.4ea0caf213789p-643, 0x1.e222785106b16p-638 },
+ { 0x1.d2cd2eb59de4cp-644, 0x1.50663e5d53392p-638 },
+ { 0x1.458d1220fa79dp-644, 0x1.d55fbee497ep-639 },
+ { 0x1.c60744f31e198p-645, 0x1.476a7d28a437bp-639 },
+ { 0x1.3c90d697e5b5dp-645, 0x1.c8ba606fb6833p-640 },
+ { 0x1.b963b20518321p-646, 0x1.3e8452ecdbe84p-640 },
+ { 0x1.33ada8cfe418fp-646, 0x1.bc34b0b8bbc6p-641 },
+ { 0x1.ace49de2283aep-647, 0x1.35b55b1b3d652p-641 },
+ { 0x1.2ae504dc15f24p-647, 0x1.afd0e79df00ebp-642 },
+ { 0x1.a08c1388db34fp-648, 0x1.2cff1d49f192cp-642 },
+ { 0x1.223852412258p-648, 0x1.a39120c175c51p-643 },
+ { 0x1.945c00d028182p-649, 0x1.24630cff92d39p-643 },
+ { 0x1.19a8e3da77fbep-649, 0x1.97775b48ec1aap-644 },
+ { 0x1.8856364b336c5p-650, 0x1.1be2898c8a8a4p-644 },
+ { 0x1.1137f7cd08642p-650, 0x1.8b8579b06ca2cp-645 },
+ { 0x1.7c7c673fe436ep-651, 0x1.137eddf1f97aep-645 },
+ { 0x1.08e6b787233bap-651, 0x1.7fbd41b078795p-646 },
+ { 0x1.70d029afc4472p-652, 0x1.0b3940d5da6fcp-646 },
+ { 0x1.00b637cd0ec0bp-652, 0x1.74205c365c73ep-647 },
+ { 0x1.6552f6729a259p-653, 0x1.0312d48405757p-647 },
+ { 0x1.f14ef1a3e4ac2p-654, 0x1.68b0556e87723p-648 },
+ { 0x1.5a06296220023p-654, 0x1.f6194df7630e5p-649 },
+ { 0x1.e176ccb941b53p-655, 0x1.5d6e9ce0425a7p-649 },
+ { 0x1.4eeb0196310cdp-655, 0x1.e64f64121563ep-650 },
+ { 0x1.d1e5afef936dap-656, 0x1.525c859a2ea9ap-650 },
+ { 0x1.4402a1b0bd9dfp-656, 0x1.d6c9b6d4d6fc5p-651 },
+ { 0x1.c29d225a230e3p-657, 0x1.477b466ee6cc1p-651 },
+ { 0x1.394e1038ce88ep-657, 0x1.c789ea0183d02p-652 },
+ { 0x1.b39e83951bdaap-658, 0x1.3ccbfa4112a58p-652 },
+ { 0x1.2ece3803d8d68p-658, 0x1.b8917a154498bp-653 },
+ { 0x1.a4eb0c6436cf4p-659, 0x1.324fa05e3adc4p-653 },
+ { 0x1.2483e8ac9d061p-659, 0x1.a9e1bcd30af1fp-654 },
+ { 0x1.9683cf6400112p-660, 0x1.28071ce79e917p-654 },
+ { 0x1.1a6fd716c7c18p-660, 0x1.9b7be1e1550cbp-655 },
+ { 0x1.8869b9cc95345p-661, 0x1.1df33948493fap-655 },
+ { 0x1.10929dfe85b79p-661, 0x1.8d60f37a227b9p-656 },
+ { 0x1.7a9d9444b613ep-662, 0x1.1414a4b7a1729p-656 },
+ { 0x1.06ecbe9338febp-662, 0x1.7f91d72bfd333p-657 },
+ { 0x1.6d2003c3fdf54p-663, 0x1.0a6bf4c7a4f95p-657 },
+ { 0x1.fafd4238f8063p-664, 0x1.720f4eaaf4bbbp-658 },
+ { 0x1.5ff18a8317f0ap-664, 0x1.00f9a5fe04069p-658 },
+ { 0x1.e8912b5139031p-665, 0x1.64d9f8b065b73p-659 },
+ { 0x1.531288f8c01c7p-665, 0x1.ef7c38ee94e41p-660 },
+ { 0x1.d695a98770e4bp-666, 0x1.57f251e86550ep-660 },
+ { 0x1.46833ee262b1p-666, 0x1.dd73492689d2p-661 },
+ { 0x1.c50b006d4e015p-667, 0x1.4b58b5eba6cc7p-661 },
+ { 0x1.3a43cc572b3d3p-667, 0x1.cbd8e7539eac7p-662 },
+ { 0x1.b3f14799b1616p-668, 0x1.3f0d6044b145dp-662 },
+ { 0x1.2e5432e458097p-668, 0x1.baad518e7426ep-663 },
+ { 0x1.a3486c40b74f1p-669, 0x1.33106d7f3cac9p-663 },
+ { 0x1.22b456b1a8db7p-669, 0x1.a9f09adee91e3p-664 },
+ { 0x1.931032d667261p-670, 0x1.2761dc408f1efp-664 },
+ { 0x1.1763ffacc46acp-670, 0x1.99a2acce5bd7fp-665 },
+ { 0x1.834838ba6fe3dp-671, 0x1.1c018e67b6eaep-665 },
+ { 0x1.0c62daba74e7cp-671, 0x1.89c349043d67ep-666 },
+ { 0x1.73eff5eb5eca5p-672, 0x1.10ef4a3481a29p-666 },
+ { 0x1.01b07aeca1f42p-672, 0x1.7a520aeb63faep-667 },
+ { 0x1.6506bebfc67bdp-673, 0x1.062abb7415c63p-667 },
+ { 0x1.ee98b577ea7cap-674, 0x1.6b4e695e9099fp-668 },
+ { 0x1.568bc5a3d72eep-674, 0x1.f766e96435041p-669 },
+ { 0x1.da6bba883d22ap-675, 0x1.5cb7b85aa6067p-669 },
+ { 0x1.487e1cd9f3e43p-675, 0x1.e311e0dabf963p-670 },
+ { 0x1.c6d89f0368fc1p-676, 0x1.4e8d2ab5187d6p-670 },
+ { 0x1.3adcb83cdccc3p-676, 0x1.cf55249e0172ap-671 },
+ { 0x1.b3ddd3216f86ep-677, 0x1.40cdd3d52967cp-671 },
+ { 0x1.2da66f0214306p-677, 0x1.bc2f50c60488ep-672 },
+ { 0x1.a1799fd5925f4p-678, 0x1.3378a96e8e29ap-672 },
+ { 0x1.20d9fd7b31257p-678, 0x1.a99ed8a2f2e6bp-673 },
+ { 0x1.8faa294857a39p-679, 0x1.268c853c2e48dp-673 },
+ { 0x1.147606d4e1ee3p-679, 0x1.97a2092e9b19dp-674 },
+ { 0x1.7e6d714d6fce7p-680, 0x1.1a0826b9b2f1ep-674 },
+ { 0x1.087916d26f37cp-680, 0x1.86370b7b69b46p-675 },
+ { 0x1.6dc159d3dbce3p-681, 0x1.0dea34dab05c3p-675 },
+ { 0x1.f9c3470942341p-682, 0x1.755be71f29feap-676 },
+ { 0x1.5da3a74ec8bc7p-682, 0x1.02313fbe40a01p-676 },
+ { 0x1.e35c1df5edf07p-683, 0x1.650e8497f58cdp-677 },
+ { 0x1.4e120315adc06p-683, 0x1.edb784bbee452p-678 },
+ { 0x1.cdb951dc67cbfp-684, 0x1.554cafa9d0c34p-678 },
+ { 0x1.3f09fdba5037ep-684, 0x1.d7d0486e476ccp-679 },
+ { 0x1.b8d760c6a3faap-685, 0x1.461419b3892c2p-679 },
+ { 0x1.308911536a23dp-685, 0x1.c2a975dad9bep-680 },
+ { 0x1.a4b2aa8c000cap-686, 0x1.37625bf981bdbp-680 },
+ { 0x1.228ca3bac6e07p-686, 0x1.ae3f97cbb25cep-681 },
+ { 0x1.914773f3bbbacp-687, 0x1.2934f9e530badp-681 },
+ { 0x1.151208bdc254ep-687, 0x1.9a8f1bb2e0d78p-682 },
+ { 0x1.7e91e9c37a26bp-688, 0x1.1b8963382a86p-682 },
+ { 0x1.0816843f2edd8p-688, 0x1.879454bd5bf1ap-683 },
+ { 0x1.6c8e23b87885fp-689, 0x1.0e5cf631ac83bp-683 },
+ { 0x1.f72e98937c4f8p-690, 0x1.754b7ed21d736p-684 },
+ { 0x1.5b38276a48eap-690, 0x1.01ad01a5b2ddp-684 },
+ { 0x1.df23162441e8bp-691, 0x1.63b0c17c2afp-685 },
+ { 0x1.4a8beb16012edp-691, 0x1.eaed8e09770edp-686 },
+ { 0x1.c804c1d0522ebp-692, 0x1.52c032be62aabp-686 },
+ { 0x1.3a855850eeeeap-692, 0x1.d36ef8a6e08fap-687 },
+ { 0x1.b1cdcc2ca0214p-693, 0x1.4275d9d00481dp-687 },
+ { 0x1.2b204ea20186ep-693, 0x1.bcd89c2310d59p-688 },
+ { 0x1.9c78595e362cep-694, 0x1.32cdb1c10f0eep-688 },
+ { 0x1.1c58a6013aaeep-694, 0x1.a724c21e93002p-689 },
+ { 0x1.87fe848fd6bffp-695, 0x1.23c3ac05a8c19p-689 },
+ { 0x1.0e2a313c94bb5p-695, 0x1.924da8624908p-690 },
+ { 0x1.745a6341bd9d3p-696, 0x1.1553b2e7eba16p-690 },
+ { 0x1.0090c041eb55fp-696, 0x1.7e4d844204d5fp-691 },
+ { 0x1.61860872f36c7p-697, 0x1.0779abdf88654p-691 },
+ { 0x1.e710449b20327p-698, 0x1.6b1e85d9cfdc3p-692 },
+ { 0x1.4f7b87a3ccd22p-698, 0x1.f462f39da55f5p-693 },
+ { 0x1.ce184ffaa0275p-699, 0x1.58badb2559681p-693 },
+ { 0x1.3e34f7b15484dp-699, 0x1.daedfe49c8a9fp-694 },
+ { 0x1.b6314a8f93441p-700, 0x1.471cb2f12adecp-694 },
+ { 0x1.2dac75898461p-700, 0x1.c28c3fc94131bp-695 },
+ { 0x1.9f52e6b0168fbp-701, 0x1.363e3fa56683p-695 },
+ { 0x1.1ddc26b854422p-701, 0x1.ab358720f461fp-696 },
+ { 0x1.8974e49b18481p-702, 0x1.2619b9e9f9276p-696 },
+ { 0x1.0ebe3bcdc6652p-702, 0x1.94e1adf5ef17ap-697 },
+ { 0x1.748f15c14a99p-703, 0x1.16a96324493c1p-697 },
+ { 0x1.004cf29d383afp-703, 0x1.7f889bf8109c7p-698 },
+ { 0x1.60995fd7916b4p-704, 0x1.07e787ce8decbp-698 },
+ { 0x1.e50530acb7a2bp-705, 0x1.6b224a16aa4ep-699 },
+ { 0x1.4d8bbfb38c98p-705, 0x1.f39d03522ee6ep-700 },
+ { 0x1.cab316f0b29dep-706, 0x1.57a6c57f8fed2p-700 },
+ { 0x1.3b5e4bf3051bbp-706, 0x1.d8b1738bdcb74p-701 },
+ { 0x1.b1987b3f62cd2p-707, 0x1.450e32693ba8dp-701 },
+ { 0x1.2a09376f26716p-707, 0x1.bf0154de94403p-702 },
+ { 0x1.99aa6a5f22416p-708, 0x1.3350cea8cd61ap-702 },
+ { 0x1.1984d37c8d151p-708, 0x1.a681c1d2f0b94p-703 },
+ { 0x1.82de1daeb9c47p-709, 0x1.2266f414ce57bp-703 },
+ { 0x1.09c991f950457p-709, 0x1.8f27fe21c9591p-704 },
+ { 0x1.6d28fdea9871ap-710, 0x1.12491ab5c17d9p-704 },
+ { 0x1.f5a00e548f085p-711, 0x1.78e979aa0c9bep-705 },
+ { 0x1.5880a5ae03598p-711, 0x1.02efdac5a4ff4p-705 },
+ { 0x1.d921d6d1c821bp-712, 0x1.63bbd32217718p-706 },
+ { 0x1.44dae3b23367bp-712, 0x1.e8a7dcff4677cp-707 },
+ { 0x1.be0a394617721p-713, 0x1.4f94da865b2a3p-707 },
+ { 0x1.322dbccd73cabp-713, 0x1.ccdc67829105bp-708 },
+ { 0x1.a44b3f5ce9c8bp-714, 0x1.3c6a934743c05p-708 },
+ { 0x1.206f6db46b93p-714, 0x1.b26f5afd4ebc9p-709 },
+ { 0x1.8bd742e227a38p-715, 0x1.2a3336386b4d7p-709 },
+ { 0x1.0f966c7fd2396p-715, 0x1.99530a15ce61ap-710 },
+ { 0x1.74a0efc06d36ep-716, 0x1.18e533433f227p-710 },
+ { 0x1.ff32d3f1c0a49p-717, 0x1.817a166d90dbdp-711 },
+ { 0x1.5e9b45aff1bep-717, 0x1.087732df4f3abp-711 },
+ { 0x1.e0dea55db81c4p-718, 0x1.6ad7728d6db01p-712 },
+ { 0x1.49b9999981d6cp-718, 0x1.f1c02ea5235f3p-713 },
+ { 0x1.c41e9fb058b1ep-719, 0x1.555e63841a093p-713 },
+ { 0x1.35ef96b0fe655p-719, 0x1.d42dfb77e321ep-714 },
+ { 0x1.a8e19002cb47fp-720, 0x1.4102823a6a0a2p-714 },
+ { 0x1.23313f4adb099p-720, 0x1.b8267dd51660dp-715 },
+ { 0x1.8f16bf19917acp-721, 0x1.2db7bc80b123ep-715 },
+ { 0x1.1172ed701cd4p-721, 0x1.9d98e007ff597p-716 },
+ { 0x1.76adf2095d808p-722, 0x1.1b7255d8af1cep-716 },
+ { 0x1.00a953345bce4p-722, 0x1.8474c5f89cf1fp-717 },
+ { 0x1.5f976a86ba7a3p-723, 0x1.0a26e7ff7c8ap-717 },
+ { 0x1.e192f5a290a0dp-724, 0x1.6caa4dc34bcc6p-718 },
+ { 0x1.49c3e6e576cf8p-724, 0x1.f394c675d5da1p-719 },
+ { 0x1.c3918d16606afp-725, 0x1.562a0ffd36fefp-719 },
+ { 0x1.3524a1ccb90cep-725, 0x1.d4a41cdb95576p-720 },
+ { 0x1.a739e0c3f00b3p-726, 0x1.40e51faa74ee4p-720 },
+ { 0x1.21ab51a49a64p-726, 0x1.b7670ded07be7p-721 },
+ { 0x1.8c781323e2b8bp-727, 0x1.2ccd09eaa341p-721 },
+ { 0x1.0f4a27c210b83p-727, 0x1.9bc980b6cd88bp-722 },
+ { 0x1.7338f3cfd4b18p-728, 0x1.19d3d560c7458p-722 },
+ { 0x1.fbe79eabbab8bp-729, 0x1.81b807901b2ddp-723 },
+ { 0x1.5b69fdd784131p-729, 0x1.07ec015b26bbfp-723 },
+ { 0x1.db36d8463b3e1p-730, 0x1.691fdebe382bep-724 },
+ { 0x1.44f955c9776f6p-730, 0x1.ee11097f70374p-725 },
+ { 0x1.bc693203fe92cp-731, 0x1.51eeeac7320bep-725 },
+ { 0x1.2fd5c7756dd24p-731, 0x1.ce39998362bf9p-726 },
+ { 0x1.9f66cc65fb2cbp-732, 0x1.3c13b67a17ff2p-726 },
+ { 0x1.1beec36eb8502p-732, 0x1.b03976c943068p-727 },
+ { 0x1.8418af0dd65edp-733, 0x1.277d70b2ebc6fp-727 },
+ { 0x1.09345c546e7cdp-733, 0x1.93f94ba2c6b6ap-728 },
+ { 0x1.6a68c4bfd764bp-734, 0x1.141be9e049453p-728 },
+ { 0x1.ef2e87ca7b717p-735, 0x1.7962a50231832p-729 },
+ { 0x1.5241d71eb6e19p-735, 0x1.01df915097b64p-729 },
+ { 0x1.ce118fc8beeeap-736, 0x1.605fee84767fp-730 },
+ { 0x1.3b8f8a28fd848p-736, 0x1.e172e498cd2fcp-731 },
+ { 0x1.aef59daa19c93p-737, 0x1.48dc6e3757e71p-731 },
+ { 0x1.263e577f574dp-737, 0x1.c1366206ca036p-732 },
+ { 0x1.91bfa9231de5cp-738, 0x1.32c440230ef3ap-732 },
+ { 0x1.123b897af1af4p-738, 0x1.a2ee0ea25a216p-733 },
+ { 0x1.7655cd85a2773p-739, 0x1.1e04519eb8f87p-733 },
+ { 0x1.feea6c3554149p-740, 0x1.867f82bdccb8fp-734 },
+ { 0x1.5c9f427a491a4p-740, 0x1.0a8a5c7678dffp-734 },
+ { 0x1.dbb4739afff2ep-741, 0x1.6bd1744d1513ep-735 },
+ { 0x1.4484548d479a3p-741, 0x1.f089c3d3d8b6fp-736 },
+ { 0x1.bab46440d8e4bp-742, 0x1.52cbafb8bc99fp-736 },
+ { 0x1.2dee5d96e696ep-742, 0x1.ce464b1286c0dp-737 },
+ { 0x1.9bcaf0aad775cp-743, 0x1.3b571085ef9dbp-737 },
+ { 0x1.18c7bd07b007fp-743, 0x1.ae2a4fedee59cp-738 },
+ { 0x1.7eda37d26ae66p-744, 0x1.255d79dbe3905p-738 },
+ { 0x1.04fbd01fd3b9ap-744, 0x1.9017432798e26p-739 },
+ { 0x1.63c5ba199716fp-745, 0x1.10c9ceee61d28p-739 },
+ { 0x1.e4edd431a7a4p-746, 0x1.73effa34f57abp-740 },
+ { 0x1.4a724e2f6eadep-746, 0x1.fb0fd6a99ec28p-741 },
+ { 0x1.c24c9890314cdp-747, 0x1.5998a4600495bp-741 },
+ { 0x1.32c615eef6a3dp-747, 0x1.d70936a92f04ap-742 },
+ { 0x1.a1f03c81340fdp-748, 0x1.40f6bfdad1f14p-742 },
+ { 0x1.1ca87340e1c39p-748, 0x1.b55b284add8c1p-743 },
+ { 0x1.83b6cbf2ba29fp-749, 0x1.29f10ece9036ep-743 },
+ { 0x1.0801fd07f7284p-749, 0x1.95e2d86ae92c8p-744 },
+ { 0x1.677ffffc31b92p-750, 0x1.146f8c6e8dc57p-744 },
+ { 0x1.e978e83ebd95dp-751, 0x1.787f26e598ebbp-745 },
+ { 0x1.4d2d2f5dd4096p-751, 0x1.005b6216a17eap-745 },
+ { 0x1.c58570e2f641dp-752, 0x1.5d10973fbab06p-746 },
+ { 0x1.34a13f272cdfap-752, 0x1.db3db8f832a58p-747 },
+ { 0x1.a4017c5ace0dep-753, 0x1.4379416dfac63p-747 },
+ { 0x1.1dc0938cfb932p-753, 0x1.b84ac1ef46255p-748 },
+ { 0x1.84c7064147f81p-754, 0x1.2b9cc2c3d6738p-748 },
+ { 0x1.087100f5e6429p-754, 0x1.97b6c5dc3637ap-749 },
+ { 0x1.67b20873fc995p-755, 0x1.15602f1227af8p-749 },
+ { 0x1.e9337a8979dap-756, 0x1.795cb2bb480b6p-750 },
+ { 0x1.4ca0667456eb8p-756, 0x1.00aa01fc8a73ep-750 },
+ { 0x1.c446a2ccade1cp-757, 0x1.5d196927cdaccp-751 },
+ { 0x1.3371d92c55c69p-757, 0x1.dac421184af19p-752 },
+ { 0x1.a1ef1650d3562p-758, 0x1.42cba823b93cbp-752 },
+ { 0x1.1c07db1df4cf6p-758, 0x1.b6e2f60b615c1p-753 },
+ { 0x1.8202debc2593cp-759, 0x1.2a53f94211ba9p-753 },
+ { 0x1.064595037ce7bp-759, 0x1.95853e0fd75adp-754 },
+ { 0x1.645a58ac6913cp-760, 0x1.13949d3b2fbd2p-754 },
+ { 0x1.e41f95cc492cep-761, 0x1.768213ee2ba9cp-755 },
+ { 0x1.48d0194e5b153p-761, 0x1.fce2f1e195a7ap-756 },
+ { 0x1.be99935f38c42p-762, 0x1.59b2d772c1b04p-756 },
+ { 0x1.2f40d4a5d287p-762, 0x1.d5a005ce1b15dp-757 },
+ { 0x1.9bc8aa74c3805p-763, 0x1.3ef3138f8ae58p-757 },
+ { 0x1.178b448b82b16p-763, 0x1.b12e626e3c8a1p-758 },
+ { 0x1.7b7f2dc7fa066p-764, 0x1.2620652c3102cp-758 },
+ { 0x1.0190106456396p-764, 0x1.8f5ecffd9c995p-759 },
+ { 0x1.5d92194746ef2p-765, 0x1.0f1a62a97a48ep-759 },
+ { 0x1.da636b2add63ap-766, 0x1.7004d0a0dd3fcp-760 },
+ { 0x1.41d8f14e2d235p-766, 0x1.f38508375a815p-761 },
+ { 0x1.b4a8e16df3a2ep-767, 0x1.52f67f4a45dbdp-761 },
+ { 0x1.282da2ee06e9fp-767, 0x1.cbf8187da97p-762 },
+ { 0x1.91bc4f0e82a1p-768, 0x1.380c6fa6ddd1bp-762 },
+ { 0x1.106c65473611bp-768, 0x1.a757e44dde4fbp-763 },
+ { 0x1.716ca73d3a1dcp-769, 0x1.1f218f165083cp-763 },
+ { 0x1.f4e737e667fe6p-770, 0x1.8571975a9ba0cp-764 },
+ { 0x1.538bdbc88035p-770, 0x1.081306aee058bp-764 },
+ { 0x1.cc4774fe05a13p-771, 0x1.661571375ee31p-765 },
+ { 0x1.37eeb586702afp-771, 0x1.e5803c9b677cp-766 },
+ { 0x1.a6be51e94d2c3p-772, 0x1.49169d29f057fp-766 },
+ { 0x1.1e6cae3cc5ce4p-772, 0x1.be144165bfdadp-767 },
+ { 0x1.841452e30c6ecp-773, 0x1.2e4b0b7596d86p-767 },
+ { 0x1.06dfcc0330324p-773, 0x1.99a8814f82396p-768 },
+ { 0x1.64157d8dbcaa1p-774, 0x1.158b4c1d7aa61p-768 },
+ { 0x1.e248fc3725278p-775, 0x1.7806fe5adc0dep-769 },
+ { 0x1.4691284199248p-775, 0x1.fd64d63539ac4p-770 },
+ { 0x1.ba32f675bcca1p-776, 0x1.58fd2560c98e3p-770 },
+ { 0x1.2b59cb5fcd07p-776, 0x1.d33b9c01b8858p-771 },
+ { 0x1.953f4278d9771p-777, 0x1.3c5b9e7be019ep-771 },
+ { 0x1.1244d4a198783p-777, 0x1.ac5a261b57bd2p-772 },
+ { 0x1.7333ac721d353p-778, 0x1.21f61f6e6a3a5p-772 },
+ { 0x1.f654f8b2c9938p-779, 0x1.8883e334bf813p-773 },
+ { 0x1.53d9d5f4e3889p-779, 0x1.09a33ffab8174p-773 },
+ { 0x1.cbcb3935e8707p-780, 0x1.678037d69a88ap-774 },
+ { 0x1.36fefd85e37f7p-780, 0x1.e678a0474dd4dp-775 },
+ { 0x1.a4a7147e53789p-781, 0x1.491a44a8cc267p-775 },
+ { 0x1.1c73c8c2f3143p-781, 0x1.bd3a60953bab8p-776 },
+ { 0x1.80a7df6e9e4abp-782, 0x1.2d20af56e98e4p-776 },
+ { 0x1.040c111171b21p-782, 0x1.9748563f2a02cp-777 },
+ { 0x1.5f9153468350dp-783, 0x1.13656dff66048p-777 },
+ { 0x1.db3d65827b6f1p-784, 0x1.7463a2ae57157p-778 },
+ { 0x1.412b4a3b0b6bbp-784, 0x1.f77b2a384d071p-779 },
+ { 0x1.b20abd232bd72p-785, 0x1.5451ae34b02aep-779 },
+ { 0x1.25417f5fe18aap-785, 0x1.cc024fa52d21ep-780 },
+ { 0x1.8c38db09c3d68p-786, 0x1.36dbe645ba702p-780 },
+ { 0x1.0ba351c6b2c44p-786, 0x1.a415d531b6e85p-781 },
+ { 0x1.69856de02317p-787, 0x1.1bcf7eeeba2f5p-781 },
+ { 0x1.e847157246bfcp-788, 0x1.7f70703ac5558p-782 },
+ { 0x1.49b2d16422141p-788, 0x1.02fd377359b1p-782 },
+ { 0x1.bd304de355d85p-789, 0x1.5dd1b0bb84b26p-783 },
+ { 0x1.2c87c2ff697dcp-789, 0x1.d87243e77ecadp-784 },
+ { 0x1.95b4456f24a66p-790, 0x1.3efdb3b369292p-784 },
+ { 0x1.11cf1a60f1d84p-790, 0x1.aeb4dc01a4631p-785 },
+ { 0x1.718a9184a8678p-791, 0x1.22bcd99dbdb06p-785 },
+ { 0x1.f2af0be1fde49p-792, 0x1.88766c06b0833p-786 },
+ { 0x1.507007917e3d9p-792, 0x1.08db80d427d79p-786 },
+ { 0x1.c5e695f15072bp-793, 0x1.65709eb54bf5ep-787 },
+ { 0x1.32266540e08c2p-793, 0x1.e253876b38acep-788 },
+ { 0x1.9cf012acb820bp-794, 0x1.45623a2f6a451p-788 },
+ { 0x1.1673fda512b46p-794, 0x1.b6f674d703273p-789 },
+ { 0x1.777d05328bd26p-795, 0x1.280eca736b4b1p-789 },
+ { 0x1.fa46d62b8e57dp-796, 0x1.8f4d804e3ad6fp-790 },
+ { 0x1.5544c8bc23e1cp-796, 0x1.0d3e50a2eecdcp-790 },
+ { 0x1.cc068b1dc8ab2p-797, 0x1.6b0c7763ce52bp-791 },
+ { 0x1.36042b906571p-797, 0x1.e979edc5b3767p-792 },
+ { 0x1.a1cbbab815b4cp-798, 0x1.49ecd657d5dd6p-792 },
+ { 0x1.197d0fe71564cp-798, 0x1.bcb59141dc715p-793 },
+ { 0x1.7b41f3bcb1869p-799, 0x1.2bad65a82bb23p-793 },
+ { 0x1.feec24eca8006p-800, 0x1.93d6de18ac6bfp-794 },
+ { 0x1.581b387627669p-800, 0x1.1011dd6dfecf6p-794 },
+ { 0x1.cf746ccaba032p-801, 0x1.6e8be31f2fe24p-795 },
+ { 0x1.380f8b864e1acp-801, 0x1.edc51c8649aaap-796 },
+ { 0x1.a4312cc2f816ap-802, 0x1.4c88f43732a1p-796 },
+ { 0x1.1adc83c96accfp-802, 0x1.bfd81ed74f1cdp-797 },
+ { 0x1.7cc835281bbf3p-803, 0x1.2d883a292df3bp-797 },
+ { 0x1.0044e6f2b903fp-803, 0x1.95fde403b5724p-798 },
+ { 0x1.58e66674c0f82p-804, 0x1.11494966870b7p-798 },
+ { 0x1.d0209514d613dp-805, 0x1.6fdef1ca550b3p-799 },
+ { 0x1.383f2f4495aedp-805, 0x1.ef217eb67d36dp-800 },
+ { 0x1.a41575f0363d6p-806, 0x1.4d2aaa5b8e28ap-800 },
+ { 0x1.1a8c12a0cae91p-806, 0x1.c04fcbf1fddd8p-801 },
+ { 0x1.7c08d08f2ccbbp-807, 0x1.2d96cdd2a30b8p-801 },
+ { 0x1.ff186c5b90604p-808, 0x1.95b8ba50a2687p-802 },
+ { 0x1.57a2b0b1c4c86p-808, 0x1.10df03cd711e3p-802 },
+ { 0x1.ce07ef98af2aep-809, 0x1.6eff939f51c8fp-803 },
+ { 0x1.36923c5eb270bp-809, 0x1.ed88d96607fb4p-804 },
+ { 0x1.a1791489717bfp-810, 0x1.4bcf1445c1d61p-804 },
+ { 0x1.188d2c2d680a3p-810, 0x1.be1a747b458c8p-805 },
+ { 0x1.7907312c7e255p-811, 0x1.2bd8dde16ba8ap-805 },
+ { 0x1.fa9e995f4c414p-812, 0x1.93089dc23e417p-806 },
+ { 0x1.5455df149c7b5p-812, 0x1.0ed4f34d6e965p-806 },
+ { 0x1.c93410e8142f8p-813, 0x1.6bf1c754a3325p-807 },
+ { 0x1.33105a5b594f7p-813, 0x1.e9027b1c5a4abp-808 },
+ { 0x1.9c67f441e11b3p-814, 0x1.487c687197597p-808 },
+ { 0x1.14e8ebae7496ep-814, 0x1.b942323a72767p-809 },
+ { 0x1.73d10c597b774p-815, 0x1.285660efb3e9ap-809 },
+ { 0x1.f330b99c7f9e7p-816, 0x1.8df9d62fb9c5ep-810 },
+ { 0x1.4f0ef77c81a6fp-816, 0x1.0b34677fe9486p-810 },
+ { 0x1.c1baedb5f2e65p-817, 0x1.66c37bb05de1ep-811 },
+ { 0x1.2dc9788ad9864p-817, 0x1.e1a30436bcde5p-812 },
+ { 0x1.94f913add4907p-818, 0x1.4341c90c553e7p-812 },
+ { 0x1.0fafd2c40ba27p-818, 0x1.b1dd0ffc5d04bp-813 },
+ { 0x1.6c7df995241d1p-819, 0x1.231f4a6757469p-813 },
+ { 0x1.e8f062cc963cep-820, 0x1.86a35930ed5e1p-814 },
+ { 0x1.47e5cbff0d92ep-820, 0x1.060dd236f49a3p-814 },
+ { 0x1.b7be34be4e18dp-821, 0x1.5f8c25cd122d7p-815 },
+ { 0x1.26d5559b935e7p-821, 0x1.d78bca82e9f37p-816 },
+ { 0x1.8b4dd6af9c05dp-822, 0x1.3c36d15093021p-816 },
+ { 0x1.08f94cfc79158p-822, 0x1.a80c62c44a65bp-817 },
+ { 0x1.632ec0e0d009cp-823, 0x1.1c4b11ed6627ap-817 },
+ { 0x1.dc0b5f2e40ea4p-824, 0x1.7d261cc2edf72p-818 },
+ { 0x1.3efa480ea698bp-824, 0x1.fef096f5252fp-819 },
+ { 0x1.ab6a5245de9e5p-825, 0x1.566c107178d1fp-819 },
+ { 0x1.1e52cde409267p-825, 0x1.cae9de8f00c0bp-820 },
+ { 0x1.7f910d0084829p-826, 0x1.337ae444bd293p-820 },
+ { 0x1.00e3012bd4171p-826, 0x1.9bfbcfe9dc1e8p-821 },
+ { 0x1.580c66bfc7cf5p-827, 0x1.13f803c0631d9p-821 },
+ { 0x1.ccba595fe34b5p-828, 0x1.71ac2109d33c9p-822 },
+ { 0x1.347383dcf4a9bp-828, 0x1.ef21caa7d80c3p-823 },
+ { 0x1.9cf52785fcd1fp-829, 0x1.4b8b6bbdb7a4fp-823 },
+ { 0x1.1466f7a4ba4b3p-829, 0x1.bbf4bcf8ca0c3p-824 },
+ { 0x1.71f5b701cb667p-830, 0x1.2934441fdae8bp-824 },
+ { 0x1.ef1fef5338f87p-831, 0x1.8de00a5d4cff3p-825 },
+ { 0x1.4b46ffc2e70ccp-831, 0x1.0a4a61359d63ap-825 },
+ { 0x1.bb3f3e667d5e5p-832, 0x1.64673b39bdd54p-826 },
+ { 0x1.287ea78b8278fp-832, 0x1.dcf3acd0cc1f4p-827 },
+ { 0x1.8c9c8347a2863p-833, 0x1.3f1926f0c2aa4p-827 },
+ { 0x1.093c166d47d9p-833, 0x1.aaecb94ca24e1p-828 },
+ { 0x1.62b5957e6b822p-834, 0x1.1d8efbbc88d6cp-828 },
+ { 0x1.da4f3c5b8c56fp-835, 0x1.7df554174928cp-829 },
+ { 0x1.3d1457a1afdaep-835, 0x1.fed6b4a9440a8p-830 },
+ { 0x1.a7e3665ffae25p-836, 0x1.558fae0fed7aap-830 },
+ { 0x1.1b4da97b89113p-836, 0x1.c8b307e047613p-831 },
+ { 0x1.7aa46b2ec675cp-837, 0x1.3149a005e5984p-831 },
+ { 0x1.fa00e080e536p-838, 0x1.9819329634547p-832 },
+ { 0x1.520f92dcad4a2p-838, 0x1.10bba52994e8ep-832 },
+ { 0x1.c3a9666328faap-839, 0x1.6c7dd2d93c0f9p-833 },
+ { 0x1.2dae795ce73b6p-839, 0x1.e70fd5d6d806dp-834 },
+ { 0x1.92f5963d343cfp-840, 0x1.45629dffe1fa7p-834 },
+ { 0x1.0d15f439254bep-840, 0x1.b2b2e959996bp-835 },
+ { 0x1.675546ac2c967p-841, 0x1.2255364dfcfd7p-835 },
+ { 0x1.dfca1ff236f02p-842, 0x1.83c6a3841fccap-836 },
+ { 0x1.4046155930cfbp-842, 0x1.02ee197efc99dp-836 },
+ { 0x1.ab8846c89a496p-843, 0x1.59bfc8bdbfffep-837 },
+ { 0x1.1d5226b496f7ep-843, 0x1.cd9f4c973304p-838 },
+ { 0x1.7cc7edd2bedd1p-844, 0x1.3420703d360eap-838 },
+ { 0x1.fc1e021531b11p-845, 0x1.9b4a6e4580455p-839 },
+ { 0x1.52f9fd29afa7bp-845, 0x1.1276cde31355ep-839 },
+ { 0x1.c439018f9e7bp-846, 0x1.6e44a0da72dedp-840 },
+ { 0x1.2d9d4a3bfacfap-846, 0x1.e8b82d35e9882p-841 },
+ { 0x1.9247c7d6b7109p-847, 0x1.4603c1a2de688p-841 },
+ { 0x1.0c3d4d5746632p-847, 0x1.b2e6fa531d555p-842 },
+ { 0x1.65add59367765p-848, 0x1.220b241172407p-842 },
+ { 0x1.dce1e8301e6efp-849, 0x1.82d28ae825549p-843 },
+ { 0x1.3dde18cb97a8dp-849, 0x1.01ea51e3f541cp-843 },
+ { 0x1.a7b31ccb0b2f4p-850, 0x1.57e3d8e31e749p-844 },
+ { 0x1.1a59798dd7aa2p-850, 0x1.ca77ce984ce61p-845 },
+ { 0x1.7843a7981f8e3p-851, 0x1.3192c63185ef2p-845 },
+ { 0x1.f55b0f3ffe463p-852, 0x1.974911a73b1a7p-846 },
+ { 0x1.4df9fe655b0fbp-852, 0x1.0f64b579273f6p-846 },
+ { 0x1.bce68ce6bcfedp-853, 0x1.69a3e1bad13dap-847 },
+ { 0x1.284bfe1cdea24p-853, 0x1.e1d6859c11527p-848 },
+ { 0x1.8a9c29acbf47dp-854, 0x1.40f425a16dca3p-848 },
+ { 0x1.06bd70b72892bp-854, 0x1.ab8633790b1e2p-849 },
+ { 0x1.5dd55c1a48477p-855, 0x1.1cb4a43b9229fp-849 },
+ { 0x1.d1bd6b173b9f2p-856, 0x1.7b25cc6523c3bp-850 },
+ { 0x1.35fc8451ff49ep-856, 0x1.f8db2dc70232bp-851 },
+ { 0x1.9c9712232f548p-857, 0x1.5014bc06e7f91p-851 },
+ { 0x1.128b47439dcd5p-857, 0x1.bf66ba3b9066cp-852 },
+ { 0x1.6d53d2be0a0b6p-858, 0x1.29c2c1dc958dbp-852 },
+ { 0x1.e6122171333dfp-859, 0x1.8c4a9d76af90fp-853 },
+ { 0x1.435229d0cc681p-859, 0x1.07ae5a7347d0bp-853 },
+ { 0x1.ae1371b74ea2dp-860, 0x1.5ed9539dfd0c9p-854 },
+ { 0x1.1e01427183001p-860, 0x1.d2c69c7599edcp-855 },
+ { 0x1.7c589442700ecp-861, 0x1.3677341a98a13p-855 },
+ { 0x1.f9be9e1d7b4e4p-862, 0x1.9cf2c5625685ep-856 },
+ { 0x1.5033c96eb757p-862, 0x1.1298aebe8af0fp-856 },
+ { 0x1.bef014f36ffa9p-863, 0x1.6d2655c8560ebp-857 },
+ { 0x1.290979be09b3bp-863, 0x1.e58166789d0bcp-858 },
+ { 0x1.8ac6ba86dcc3cp-864, 0x1.42b9e90b536b6p-858 },
+ { 0x1.064e638fb2517p-864, 0x1.acfe7e64002b1p-859 },
+ { 0x1.5c884857d8adep-865, 0x1.1d179e12ade6ep-859 },
+ { 0x1.cf0beaeb1b319p-866, 0x1.7ae01eb0f55cbp-860 },
+ { 0x1.338e29511ffcdp-866, 0x1.f772a9e0423a1p-861 },
+ { 0x1.9881a23b2ff9bp-867, 0x1.4e72e15f0f016p-861 },
+ { 0x1.0f43798c4f845p-867, 0x1.bc4e2f5a8c9afp-862 },
+ { 0x1.6836e63bd7d88p-868, 0x1.27165d875ec78p-862 },
+ { 0x1.de466f9c32fdap-869, 0x1.87eb54ae1860dp-863 },
+ { 0x1.3d79f883687bfp-869, 0x1.043b38d103ec9p-863 },
+ { 0x1.a56d48500b8a3p-870, 0x1.598a7d65e3b67p-864 },
+ { 0x1.17ac327f9b5e5p-870, 0x1.cac2d1ee89db1p-865 },
+ { 0x1.73278f241bb95p-871, 0x1.308090afcd9f3p-865 },
+ { 0x1.ec801820c3f3dp-872, 0x1.942d41e7bf2a3p-866 },
+ { 0x1.46b841565ab3ep-872, 0x1.0c34dc595f4bfp-866 },
+ { 0x1.b16ea850bfa34p-873, 0x1.63e9cb83e74b2p-867 },
+ { 0x1.1f76e44abf0ecp-873, 0x1.d83e5a3ffd7adp-868 },
+ { 0x1.7d432d7dd0ca1p-874, 0x1.39428e0fd00c5p-868 },
+ { 0x1.f99abec00b682p-875, 0x1.9f8c2eadfb109p-869 },
+ { 0x1.4f35579392d4bp-875, 0x1.13957092e7741p-869 },
+ { 0x1.bc6c19eee10e8p-876, 0x1.6d7ad6ac744f9p-870 },
+ { 0x1.2692d6adc530fp-876, 0x1.e4a41e3c393c2p-871 },
+ { 0x1.8673fad41c337p-877, 0x1.4149a31665d1ep-871 },
+ { 0x1.02bd066e6e446p-877, 0x1.a9efbad7c9909p-872 },
+ { 0x1.56dece3f159c3p-878, 0x1.1a4d14ca40e6p-872 },
+ { 0x1.c64dabfd6babdp-879, 0x1.7628f37011dc7p-873 },
+ { 0x1.2cf07ed3ac7cap-879, 0x1.efd93aae49244p-874 },
+ { 0x1.8ea5cdb1b77f8p-880, 0x1.4884565714d83p-874 },
+ { 0x1.0801f05da3babp-880, 0x1.b341347ab9d2ep-875 },
+ { 0x1.5da3ba0723cbcp-881, 0x1.204d0f497ca7dp-875 },
+ { 0x1.cefd7b19fc691p-882, 0x1.7de10a24a9be3p-876 },
+ { 0x1.3281b7ca3d771p-882, 0x1.f9c4f419d97b9p-877 },
+ { 0x1.95c663259c5d8p-883, 0x1.4ee2a6bb63f1dp-877 },
+ { 0x1.0c90568fe453bp-883, 0x1.bb6bea4d790c6p-878 },
+ { 0x1.6374ef6370a23p-884, 0x1.258802fee3a1bp-878 },
+ { 0x1.d668024e6e773p-885, 0x1.8491dcb50d65p-879 },
+ { 0x1.3739f6c74a992p-885, 0x1.012888bcf5e1bp-879 },
+ { 0x1.9bc5a2748239p-886, 0x1.5456466d99824p-880 },
+ { 0x1.105de86fb726ep-886, 0x1.c25d7813e5a28p-881 },
+ { 0x1.68453b252f9afp-887, 0x1.29f220ff323bdp-881 },
+ { 0x1.dc7c640bf856fp-888, 0x1.8a2c46b36447dp-882 },
+ { 0x1.3b0e7a2d8004dp-888, 0x1.04b5178932d9ep-882 },
+ { 0x1.a095d99893beap-889, 0x1.58d2d04dcdef9p-883 },
+ { 0x1.1361f24d04a1ep-889, 0x1.c8060b8a624d8p-884 },
+ { 0x1.6c0994513d45bp-890, 0x1.2d8154e3020f5p-884 },
+ { 0x1.e12caa0268707p-891, 0x1.8ea37661d565fp-885 },
+ { 0x1.3df6725a60cf5p-891, 0x1.078003d294269p-885 },
+ { 0x1.a42bf15180a09p-892, 0x1.5c4df6da1a5fp-886 },
+ { 0x1.15957e82800c6p-892, 0x1.cc58a0676d26ep-887 },
+ { 0x1.6eb9463d29a0dp-893, 0x1.302d6b1661efp-887 },
+ { 0x1.e46dfa81a2018p-894, 0x1.91ed1d851d1ddp-888 },
+ { 0x1.3feb236502138p-894, 0x1.0982d94421652p-888 },
+ { 0x1.a67f97b02e026p-895, 0x1.5ebfab91b4a2bp-889 },
+ { 0x1.16f37032d6085p-895, 0x1.cf4b3235443f5p-890 },
+ { 0x1.704e120e656fdp-896, 0x1.31f0304f01ddbp-890 },
+ { 0x1.e638c247f445dp-897, 0x1.940198fd0e1c2p-891 },
+ { 0x1.40e7ff18c854cp-897, 0x1.0ab8eaa8fae67p-891 },
+ { 0x1.a78b6039c7039p-898, 0x1.60223e0067b2cp-892 },
+ { 0x1.1778970df4481p-898, 0x1.d0d6e2f89dd66p-893 },
+ { 0x1.70c446e7535ccp-899, 0x1.32c589802b4bap-893 },
+ { 0x1.e688d1dc06742p-900, 0x1.94dc0e4e3bd62p-894 },
+ { 0x1.40eab69ffb357p-900, 0x1.0b1f64079cf15p-894 },
+ { 0x1.a74cd8f49285bp-901, 0x1.607271cb1c23p-895 },
+ { 0x1.1723bbb37e71p-901, 0x1.d0f815d3e30e4p-896 },
+ { 0x1.701ad03f5aba2p-902, 0x1.32ab83cb1b9aap-896 },
+ { 0x1.e55d6dd34aeb5p-903, 0x1.947a7e7d08e62p-897 },
+ { 0x1.3ff3437e5e592p-903, 0x1.0ab555a059592p-897 },
+ { 0x1.a5c493ec4b75bp-904, 0x1.5faf8b45ee11cp-898 },
+ { 0x1.15f5a46f2a8c5p-904, 0x1.cfae7d166a387p-899 },
+ { 0x1.6e533a1804da5p-905, 0x1.31a25c153692fp-899 },
+ { 0x1.e2b951ac76b4bp-906, 0x1.92ddcdd3a585ap-900 },
+ { 0x1.3e03e7aaf4a23p-906, 0x1.097bb793410b5p-900 },
+ { 0x1.a2f624fa2da41p-907, 0x1.5ddb524f58124p-901 },
+ { 0x1.13f112353b2e2p-907, 0x1.ccfd1b6b2b0d1p-902 },
+ { 0x1.6b71aaf8395acp-908, 0x1.2fac7e1ac1a55p-902 },
+ { 0x1.dea2a52e6f8d6p-909, 0x1.9009c068a7447p-903 },
+ { 0x1.3b2124c85eb7dp-909, 0x1.077566199da13p-903 },
+ { 0x1.9ee813dcc82f4p-910, 0x1.5afa0b60e30adp-904 },
+ { 0x1.111ab5ef7d9cep-910, 0x1.c8ea38207b48cp-905 },
+ { 0x1.677cd3ce598a2p-911, 0x1.2cce7b0334e93p-905 },
+ { 0x1.d922e485849dfp-912, 0x1.8c04eb792831bp-906 },
+ { 0x1.3751aaab95803p-912, 0x1.04a716678c7d9p-906 },
+ { 0x1.99a3c2eb312dfp-913, 0x1.571266fb205e7p-907 },
+ { 0x1.0d791e54efc95p-913, 0x1.c37f46c8a36cep-908 },
+ { 0x1.627dd610c1f2fp-914, 0x1.290ef7aa6784ep-908 },
+ { 0x1.d246bba093dddp-915, 0x1.86d89be61c44fp-909 },
+ { 0x1.329e3d8fc35e5p-915, 0x1.011744722e8f8p-909 },
+ { 0x1.93354aecb0f91p-916, 0x1.522d67c700dd9p-910 },
+ { 0x1.09149eae599f4p-916, 0x1.bcc8c2b79e5e6p-911 },
+ { 0x1.5c8020a89d6a7p-917, 0x1.247692feaf7c7p-911 },
+ { 0x1.ca1dd59404578p-918, 0x1.8090b25f1fb1cp-912 },
+ { 0x1.2d1194826d1d9p-918, 0x1.f99c33fa36826p-913 },
+ { 0x1.8bab4cd7bc185p-919, 0x1.4c563ff8738edp-913 },
+ { 0x1.03f72f0fa181cp-919, 0x1.b4d5ff233ee8bp-914 },
+ { 0x1.559144638d7d2p-920, 0x1.1f0fc4fe41aefp-914 },
+ { 0x1.c0baa10766979p-921, 0x1.793b75fbd2367p-915 },
+ { 0x1.26b830bbc4f33p-921, 0x1.efaa9eeaa4992p-916 },
+ { 0x1.8316ba6f8ef74p-922, 0x1.459a26ac43fcfp-916 },
+ { 0x1.fc588d5eeb3p-923, 0x1.abb8ece685efep-917 },
+ { 0x1.4dc0c0d42f863p-923, 0x1.18e6b704952c1p-917 },
+ { 0x1.b6320aea7077ap-924, 0x1.70e95e366ca95p-918 },
+ { 0x1.1fa02ebad6485p-924, 0x1.e4700e7fab75ep-919 },
+ { 0x1.798a96e59845bp-925, 0x1.3e0826243926dp-919 },
+ { 0x1.ef81624855ca5p-926, 0x1.a185d71d9ae78p-920 },
+ { 0x1.451fcaaed5e7p-926, 0x1.1209163a43d8ap-920 },
+ { 0x1.aa9b30dd7b333p-927, 0x1.67acd56555624p-921 },
+ { 0x1.17d9121b4ff43p-927, 0x1.d805487b20ec2p-922 },
+ { 0x1.6f1bb0c9eff18p-928, 0x1.35b0e3e76f72ap-922 },
+ { 0x1.e184bec96bcc5p-929, 0x1.965317fc3f8ebp-923 },
+ { 0x1.3bc10ccdff1d7p-929, 0x1.0a85e11600392p-923 },
+ { 0x1.9e0f0cdf83a76p-930, 0x1.5d99f4f4fa7a2p-924 },
+ { 0x1.0f738d3253e75p-930, 0x1.ca8538b911cc2p-925 },
+ { 0x1.63e056b37b486p-931, 0x1.2ca663e8f6c6ep-925 },
+ { 0x1.d2806afda0512p-932, 0x1.8a38c763ae5p-926 },
+ { 0x1.31b865207923bp-932, 0x1.026d30f31261ep-926 },
+ { 0x1.90a81bef15367p-933, 0x1.52c63cbe5201dp-927 },
+ { 0x1.068145905baddp-933, 0x1.bc0c903e2dd51p-928 },
+ { 0x1.57f0081c7461bp-934, 0x1.22fbc7eb40c8ep-928 },
+ { 0x1.c293abfeb81c1p-935, 0x1.7d5064d5d2e6ap-929 },
+ { 0x1.271a9ed146425p-935, 0x1.f3a001a1da12ap-930 },
+ { 0x1.8282015bfd093p-936, 0x1.474846e880b8p-930 },
+ { 0x1.fa292d1f4b615p-937, 0x1.acb96019278e3p-931 },
+ { 0x1.4b6323fa7fafcp-937, 0x1.18c50c637e437p-931 },
+ { 0x1.b1ded81f6cf48p-938, 0x1.6fb47e7243b1p-932 },
+ { 0x1.1bfd2aff12d23p-938, 0x1.e17fe4af1cdcdp-933 },
+ { 0x1.73b9288cf980bp-939, 0x1.3b3779cd081bcp-933 },
+ { 0x1.e680a6315c8f9p-940, 0x1.9caab20737c4bp-934 },
+ { 0x1.3e52969a46a03p-940, 0x1.0e16c42489121p-934 },
+ { 0x1.a082ea93d471fp-941, 0x1.618056ad2fa0dp-935 },
+ { 0x1.1075d9566cab2p-941, 0x1.ce9e247afa7efp-936 },
+ { 0x1.646a66f6fb197p-942, 0x1.2eabb9557e4c3p-936 },
+ { 0x1.d22f0f82317a8p-943, 0x1.8c0020c90fd02p-937 },
+ { 0x1.30d7883df3e07p-943, 0x1.0305d4157bdecp-937 },
+ { 0x1.8ea1187daf8b3p-944, 0x1.52cf8a69cbdeep-938 },
+ { 0x1.049a91d747c02p-944, 0x1.bb1f3a4ce848cp-939 },
+ { 0x1.54b29ff375e83p-945, 0x1.21bd19407d3a8p-939 },
+ { 0x1.bd5a7cbaf896dp-946, 0x1.7ad97206eb3e9p-940 },
+ { 0x1.230b0dec754dap-946, 0x1.ef4e6059f1fe4p-941 },
+ { 0x1.7c5a693980a4p-947, 0x1.43bdb9112e65bp-941 },
+ { 0x1.f10221f87a1cap-948, 0x1.a7278c0b2c815p-942 },
+ { 0x1.44ae6c097e3b8p-948, 0x1.148391a9b5b7p-942 },
+ { 0x1.a8288818abb4p-949, 0x1.69563388e87eep-943 },
+},
+};
diff --git a/contrib/arm-optimized-routines/pl/math/erfcf_1u7.c b/contrib/arm-optimized-routines/pl/math/erfcf_1u7.c
new file mode 100644
index 000000000000..c8ce95cca058
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erfcf_1u7.c
@@ -0,0 +1,103 @@
+/*
+ * Single-precision erfc(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Shift 0x1p17f
+#define OneThird 0x1.555556p-2f
+#define TwoThird 0x1.555556p-1f
+
+#define TwoOverFifteen 0x1.111112p-3f
+#define TwoOverFive 0x1.99999ap-2f
+#define Tenth 0x1.99999ap-4f
+
+#define SignMask 0x7fffffff
+
+/* Fast erfcf approximation based on series expansion near x rounded to
+ nearest multiple of 1/64.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
+
+ poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
+ + (2/15 r^4 - 2/5 r^2 + 1/10) d^4
+
+ Values of erfc(r) and scale are read from lookup tables. Stored values
+ are scaled to avoid hitting the subnormal range.
+
+ Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
+
+ Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
+ erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
+ want 0x1.f51216p-120. */
+float
+erfcf (float x)
+{
+ /* Get top words and sign. */
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & SignMask;
+ uint32_t sign = ix & ~SignMask;
+
+ /* |x| < 0x1.0p-26 => accurate to 0.5 ULP (top12(0x1p-26) = 0x328). */
+ if (unlikely (ia < 0x32800000))
+ return 1.0f - x; /* Small case. */
+
+ /* For |x| < 10.0625, the following approximation holds. */
+ if (likely (ia < 0x41210000))
+ {
+ /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale
+ to 2/sqrt(pi), when x reduced to r = 0. */
+ float a = asfloat (ia);
+ float z = a + Shift;
+ uint32_t i = asuint (z) - asuint (Shift);
+ float r = z - Shift;
+
+ /* These values are scaled by 2^-47. */
+ float erfcr = __erfcf_data.tab[i].erfc;
+ float scale = __erfcf_data.tab[i].scale;
+
+ /* erfc(x) ~ erfc(r) - scale * d * poly (r, d). */
+ float d = a - r;
+ float d2 = d * d;
+ float r2 = r * r;
+ float p1 = -r;
+ float p2 = fmaf (TwoThird, r2, -OneThird);
+ float p3 = -r * fmaf (OneThird, r2, -0.5f);
+ float p4 = fmaf (fmaf (TwoOverFifteen, r2, -TwoOverFive), r2, Tenth);
+ float y = fmaf (p4, d, p3);
+ y = fmaf (y, d, p2);
+ y = fmaf (y, d, p1);
+ y = fmaf (-fmaf (y, d2, d), scale, erfcr);
+ /* Handle sign and scale back in a single fma. */
+ float off = asfloat (sign >> 1);
+ float fac = asfloat (asuint (0x1p-47f) | sign);
+ y = fmaf (y, fac, off);
+ /* The underflow exception needs to be signaled explicitly when
+ result gets into subormnal range. */
+ if (x >= 0x1.2639cp+3f)
+ force_eval_float (opt_barrier_float (0x1p-123f) * 0x1p-123f);
+ return y;
+ }
+
+ /* erfcf(nan)=nan, erfcf(+inf)=0 and erfcf(-inf)=2. */
+ if (unlikely (ia >= 0x7f800000))
+ return asfloat (sign >> 1) + 1.0f / x; /* Special cases. */
+
+ /* Above this threshold erfcf is constant and needs to raise underflow
+ exception for positive x. */
+ return sign ? 2.0f : __math_uflowf (0);
+}
+
+PL_SIG (S, F, 1, erfc, -4.0, 10.0)
+PL_TEST_ULP (erfcf, 1.14)
+PL_TEST_SYM_INTERVAL (erfcf, 0, 0x1p-26, 40000)
+PL_TEST_INTERVAL (erfcf, 0x1p-26, 10.0625, 40000)
+PL_TEST_INTERVAL (erfcf, -0x1p-26, -4.0, 40000)
+PL_TEST_INTERVAL (erfcf, 10.0625, inf, 40000)
+PL_TEST_INTERVAL (erfcf, -4.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erfcf_data.c b/contrib/arm-optimized-routines/pl/math/erfcf_data.c
new file mode 100644
index 000000000000..a54e11973819
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erfcf_data.c
@@ -0,0 +1,664 @@
+/*
+ * Data used in single-precision erfc(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Lookup table used in erfcf.
+ For each possible rounded input r (multiples of 1/64), between
+ r = 0.0 and r = 10.0625 (645 values):
+ - the first entry __erfcf_data.tab.erfc contains the values of erfc(r),
+ - the second entry __erfcf_data.tab.scale contains the values of
+ 2/sqrt(pi)*exp(-r^2). Both values may go into subnormal range, therefore
+ they are scaled by a large enough value 2^47 (fits in 8 bits). */
+const struct erfcf_data __erfcf_data = {
+ .tab = { { 0x1p47, 0x1.20dd76p47 },
+ { 0x1.f6f944p46, 0x1.20cb68p47 },
+ { 0x1.edf3aap46, 0x1.209546p47 },
+ { 0x1.e4f05p46, 0x1.203b26p47 },
+ { 0x1.dbf056p46, 0x1.1fbd28p47 },
+ { 0x1.d2f4dcp46, 0x1.1f1b7ap47 },
+ { 0x1.c9fefep46, 0x1.1e565cp47 },
+ { 0x1.c10fd4p46, 0x1.1d6e14p47 },
+ { 0x1.b8287ap46, 0x1.1c62fap47 },
+ { 0x1.af4ap46, 0x1.1b3572p47 },
+ { 0x1.a6757ep46, 0x1.19e5eap47 },
+ { 0x1.9dabfcp46, 0x1.1874dep47 },
+ { 0x1.94ee88p46, 0x1.16e2d8p47 },
+ { 0x1.8c3e24p46, 0x1.153068p47 },
+ { 0x1.839bd6p46, 0x1.135e3p47 },
+ { 0x1.7b0894p46, 0x1.116cd8p47 },
+ { 0x1.728558p46, 0x1.0f5d16p47 },
+ { 0x1.6a1312p46, 0x1.0d2fa6p47 },
+ { 0x1.61b2acp46, 0x1.0ae55p47 },
+ { 0x1.596508p46, 0x1.087ee4p47 },
+ { 0x1.512b06p46, 0x1.05fd3ep47 },
+ { 0x1.49057ap46, 0x1.03614p47 },
+ { 0x1.40f536p46, 0x1.00abdp47 },
+ { 0x1.38fbp46, 0x1.fbbbbep46 },
+ { 0x1.311796p46, 0x1.f5f0cep46 },
+ { 0x1.294bb4p46, 0x1.eff8c4p46 },
+ { 0x1.21980ap46, 0x1.e9d5a8p46 },
+ { 0x1.19fd3ep46, 0x1.e38988p46 },
+ { 0x1.127bf2p46, 0x1.dd167cp46 },
+ { 0x1.0b14bcp46, 0x1.d67ea2p46 },
+ { 0x1.03c82ap46, 0x1.cfc41ep46 },
+ { 0x1.f92d8cp45, 0x1.c8e91cp46 },
+ { 0x1.eb0214p45, 0x1.c1efcap46 },
+ { 0x1.dd0edap45, 0x1.bada5ap46 },
+ { 0x1.cf54b4p45, 0x1.b3aafcp46 },
+ { 0x1.c1d46ap45, 0x1.ac63e8p46 },
+ { 0x1.b48eaep45, 0x1.a5074ep46 },
+ { 0x1.a78428p45, 0x1.9d9762p46 },
+ { 0x1.9ab566p45, 0x1.96165p46 },
+ { 0x1.8e22eap45, 0x1.8e8646p46 },
+ { 0x1.81cd24p45, 0x1.86e96ap46 },
+ { 0x1.75b47p45, 0x1.7f41dcp46 },
+ { 0x1.69d91ep45, 0x1.7791b8p46 },
+ { 0x1.5e3b66p45, 0x1.6fdb12p46 },
+ { 0x1.52db78p45, 0x1.681ff2p46 },
+ { 0x1.47b96ep45, 0x1.60625cp46 },
+ { 0x1.3cd554p45, 0x1.58a446p46 },
+ { 0x1.322f26p45, 0x1.50e79ep46 },
+ { 0x1.27c6d2p45, 0x1.492e42p46 },
+ { 0x1.1d9c34p45, 0x1.417a0cp46 },
+ { 0x1.13af1ep45, 0x1.39ccc2p46 },
+ { 0x1.09ff5p45, 0x1.32281ep46 },
+ { 0x1.008c8p45, 0x1.2a8dcep46 },
+ { 0x1.eeaca8p44, 0x1.22ff72p46 },
+ { 0x1.dcb8cap44, 0x1.1b7e98p46 },
+ { 0x1.cb3c86p44, 0x1.140cc4p46 },
+ { 0x1.ba36dap44, 0x1.0cab62p46 },
+ { 0x1.a9a6bap44, 0x1.055bd6p46 },
+ { 0x1.998afap44, 0x1.fc3ee6p45 },
+ { 0x1.89e25ep44, 0x1.edeeeep45 },
+ { 0x1.7aab98p44, 0x1.dfca26p45 },
+ { 0x1.6be542p44, 0x1.d1d2dp45 },
+ { 0x1.5d8decp44, 0x1.c40b08p45 },
+ { 0x1.4fa40ep44, 0x1.b674c8p45 },
+ { 0x1.422616p44, 0x1.a911fp45 },
+ { 0x1.351262p44, 0x1.9be438p45 },
+ { 0x1.28674p44, 0x1.8eed36p45 },
+ { 0x1.1c22f8p44, 0x1.822e66p45 },
+ { 0x1.1043c2p44, 0x1.75a91ap45 },
+ { 0x1.04c7cap44, 0x1.695e8cp45 },
+ { 0x1.f35a72p43, 0x1.5d4fd4p45 },
+ { 0x1.dde456p43, 0x1.517de6p45 },
+ { 0x1.c9296cp43, 0x1.45e99cp45 },
+ { 0x1.b525d6p43, 0x1.3a93b2p45 },
+ { 0x1.a1d5a6p43, 0x1.2f7cc4p45 },
+ { 0x1.8f34eap43, 0x1.24a554p45 },
+ { 0x1.7d3fa6p43, 0x1.1a0dc6p45 },
+ { 0x1.6bf1dcp43, 0x1.0fb662p45 },
+ { 0x1.5b4784p43, 0x1.059f5ap45 },
+ { 0x1.4b3c98p43, 0x1.f79184p44 },
+ { 0x1.3bcd14p43, 0x1.e4653p44 },
+ { 0x1.2cf4eep43, 0x1.d1b982p44 },
+ { 0x1.1eb024p43, 0x1.bf8e1cp44 },
+ { 0x1.10fab8p43, 0x1.ade26cp44 },
+ { 0x1.03d0acp43, 0x1.9cb5bep44 },
+ { 0x1.ee5c18p42, 0x1.8c0732p44 },
+ { 0x1.d61dd6p42, 0x1.7bd5c8p44 },
+ { 0x1.bedec8p42, 0x1.6c2056p44 },
+ { 0x1.a8973cp42, 0x1.5ce596p44 },
+ { 0x1.933f9p42, 0x1.4e241ep44 },
+ { 0x1.7ed03ap42, 0x1.3fda6cp44 },
+ { 0x1.6b41ccp42, 0x1.3206dcp44 },
+ { 0x1.588cf2p42, 0x1.24a7b8p44 },
+ { 0x1.46aa72p42, 0x1.17bb2cp44 },
+ { 0x1.359332p42, 0x1.0b3f52p44 },
+ { 0x1.254038p42, 0x1.fe646p43 },
+ { 0x1.15aaa8p42, 0x1.e72372p43 },
+ { 0x1.06cbcap42, 0x1.d0b7ap43 },
+ { 0x1.f13a04p41, 0x1.bb1c98p43 },
+ { 0x1.d62fbep41, 0x1.a64de6p43 },
+ { 0x1.bc6c1ep41, 0x1.92470ap43 },
+ { 0x1.a3e2ccp41, 0x1.7f036cp43 },
+ { 0x1.8c87b8p41, 0x1.6c7e64p43 },
+ { 0x1.764f2p41, 0x1.5ab342p43 },
+ { 0x1.612d8ap41, 0x1.499d48p43 },
+ { 0x1.4d17cap41, 0x1.3937b2p43 },
+ { 0x1.3a03p41, 0x1.297dbap43 },
+ { 0x1.27e498p41, 0x1.1a6a96p43 },
+ { 0x1.16b24cp41, 0x1.0bf97ep43 },
+ { 0x1.066222p41, 0x1.fc4b5ep42 },
+ { 0x1.edd4d2p40, 0x1.e1d4dp42 },
+ { 0x1.d08382p40, 0x1.c885ep42 },
+ { 0x1.b4be2p40, 0x1.b0553p42 },
+ { 0x1.9a7316p40, 0x1.99397ap42 },
+ { 0x1.81915cp40, 0x1.83298ep42 },
+ { 0x1.6a088p40, 0x1.6e1c58p42 },
+ { 0x1.53c89ep40, 0x1.5a08e8p42 },
+ { 0x1.3ec25ep40, 0x1.46e66cp42 },
+ { 0x1.2ae6fap40, 0x1.34ac36p42 },
+ { 0x1.18282ep40, 0x1.2351c2p42 },
+ { 0x1.067844p40, 0x1.12ceb4p42 },
+ { 0x1.eb940ep39, 0x1.031ad6p42 },
+ { 0x1.cc2186p39, 0x1.e85c44p41 },
+ { 0x1.ae808cp39, 0x1.cc018p41 },
+ { 0x1.9299bp39, 0x1.b1160ap41 },
+ { 0x1.785674p39, 0x1.978ae8p41 },
+ { 0x1.5fa14ap39, 0x1.7f5188p41 },
+ { 0x1.486586p39, 0x1.685bb6p41 },
+ { 0x1.328f5ep39, 0x1.529b9ep41 },
+ { 0x1.1e0be6p39, 0x1.3e03d8p41 },
+ { 0x1.0ac8fcp39, 0x1.2a875cp41 },
+ { 0x1.f16aaep38, 0x1.181984p41 },
+ { 0x1.cf80d4p38, 0x1.06ae14p41 },
+ { 0x1.afb4e2p38, 0x1.ec7262p40 },
+ { 0x1.91e8bep38, 0x1.cd5ecap40 },
+ { 0x1.75ffb4p38, 0x1.b00b38p40 },
+ { 0x1.5bde72p38, 0x1.94624ep40 },
+ { 0x1.436af4p38, 0x1.7a4f6ap40 },
+ { 0x1.2c8c7ap38, 0x1.61beaep40 },
+ { 0x1.172b7ap38, 0x1.4a9cf6p40 },
+ { 0x1.033198p38, 0x1.34d7dcp40 },
+ { 0x1.e11332p37, 0x1.205dacp40 },
+ { 0x1.be3ebp37, 0x1.0d1d6ap40 },
+ { 0x1.9dbf72p37, 0x1.f60d8ap39 },
+ { 0x1.7f714p37, 0x1.d4143ap39 },
+ { 0x1.6331cap37, 0x1.b430ecp39 },
+ { 0x1.48e09cp37, 0x1.9646f4p39 },
+ { 0x1.305ef8p37, 0x1.7a3adep39 },
+ { 0x1.198fd6p37, 0x1.5ff276p39 },
+ { 0x1.0457c6p37, 0x1.4754acp39 },
+ { 0x1.e139bcp36, 0x1.30499cp39 },
+ { 0x1.bc8d52p36, 0x1.1aba78p39 },
+ { 0x1.9a7c3p36, 0x1.06918cp39 },
+ { 0x1.7adadep36, 0x1.e77448p38 },
+ { 0x1.5d806ap36, 0x1.c4412cp38 },
+ { 0x1.424642p36, 0x1.a36454p38 },
+ { 0x1.290826p36, 0x1.84ba3p38 },
+ { 0x1.11a3f8p36, 0x1.6821p38 },
+ { 0x1.f7f358p35, 0x1.4d78bcp38 },
+ { 0x1.cfd652p35, 0x1.34a306p38 },
+ { 0x1.aab85ap35, 0x1.1d8318p38 },
+ { 0x1.88647p35, 0x1.07fdb4p38 },
+ { 0x1.68a8e4p35, 0x1.e7f232p37 },
+ { 0x1.4b5726p35, 0x1.c2b9dp37 },
+ { 0x1.30439cp35, 0x1.a02436p37 },
+ { 0x1.174578p35, 0x1.8005fp37 },
+ { 0x1.003692p35, 0x1.6235fcp37 },
+ { 0x1.d5e678p34, 0x1.468daep37 },
+ { 0x1.aeb442p34, 0x1.2ce898p37 },
+ { 0x1.8a9848p34, 0x1.15246ep37 },
+ { 0x1.695876p34, 0x1.fe41cep36 },
+ { 0x1.4abea2p34, 0x1.d57f52p36 },
+ { 0x1.2e984ep34, 0x1.afc85ep36 },
+ { 0x1.14b676p34, 0x1.8ce75ep36 },
+ { 0x1.f9daap33, 0x1.6caa0ep36 },
+ { 0x1.ce283ap33, 0x1.4ee142p36 },
+ { 0x1.a609f8p33, 0x1.3360ccp36 },
+ { 0x1.81396ap33, 0x1.19ff46p36 },
+ { 0x1.5f7524p33, 0x1.0295fp36 },
+ { 0x1.40806ep33, 0x1.da011p35 },
+ { 0x1.2422eep33, 0x1.b23a5ap35 },
+ { 0x1.0a286p33, 0x1.8d986ap35 },
+ { 0x1.e4c0bp32, 0x1.6be022p35 },
+ { 0x1.b93bf4p32, 0x1.4cda54p35 },
+ { 0x1.916f7cp32, 0x1.30539p35 },
+ { 0x1.6d0e7p32, 0x1.161be4p35 },
+ { 0x1.4bd1cp32, 0x1.fc0d56p34 },
+ { 0x1.2d77bep32, 0x1.cfd4a6p34 },
+ { 0x1.11c3bep32, 0x1.a74068p34 },
+ { 0x1.f0fb86p31, 0x1.8208bcp34 },
+ { 0x1.c2e43ep31, 0x1.5feadap34 },
+ { 0x1.98e254p31, 0x1.40a8c2p34 },
+ { 0x1.729df6p31, 0x1.2408eap34 },
+ { 0x1.4fc63cp31, 0x1.09d5f8p34 },
+ { 0x1.3010aap31, 0x1.e3bcf4p33 },
+ { 0x1.1338b8p31, 0x1.b7e946p33 },
+ { 0x1.f1fecp30, 0x1.8fdc1cp33 },
+ { 0x1.c2556ap30, 0x1.6b4702p33 },
+ { 0x1.970b06p30, 0x1.49e178p33 },
+ { 0x1.6fbddep30, 0x1.2b6876p33 },
+ { 0x1.4c144ep30, 0x1.0f9e1cp33 },
+ { 0x1.2bbc1ep30, 0x1.ec929ap32 },
+ { 0x1.0e69f2p30, 0x1.be6abcp32 },
+ { 0x1.e7b188p29, 0x1.94637ep32 },
+ { 0x1.b792bcp29, 0x1.6e2368p32 },
+ { 0x1.8c03d2p29, 0x1.4b581cp32 },
+ { 0x1.649b02p29, 0x1.2bb5ccp32 },
+ { 0x1.40f794p29, 0x1.0ef6c4p32 },
+ { 0x1.20c13p29, 0x1.e9b5e8p31 },
+ { 0x1.03a72ap29, 0x1.ba4f04p31 },
+ { 0x1.d2bfc6p28, 0x1.8f4cccp31 },
+ { 0x1.a35068p28, 0x1.684c22p31 },
+ { 0x1.7885cep28, 0x1.44f21ep31 },
+ { 0x1.51f06ap28, 0x1.24eb72p31 },
+ { 0x1.2f2aaap28, 0x1.07ebd2p31 },
+ { 0x1.0fd816p28, 0x1.db5adp30 },
+ { 0x1.e7493p27, 0x1.abe09ep30 },
+ { 0x1.b48774p27, 0x1.80f43ap30 },
+ { 0x1.86e006p27, 0x1.5a2aep30 },
+ { 0x1.5dd4bp27, 0x1.37231p30 },
+ { 0x1.38f2e8p27, 0x1.1783cep30 },
+ { 0x1.17d2c6p27, 0x1.f5f7d8p29 },
+ { 0x1.f42c18p26, 0x1.c282cep29 },
+ { 0x1.beceb2p26, 0x1.94219cp29 },
+ { 0x1.8ef2aap26, 0x1.6a5972p29 },
+ { 0x1.640bf6p26, 0x1.44ba86p29 },
+ { 0x1.3d9be6p26, 0x1.22df2ap29 },
+ { 0x1.1b2fe4p26, 0x1.046aeap29 },
+ { 0x1.f8c0c2p25, 0x1.d21398p28 },
+ { 0x1.c19fa8p25, 0x1.a0df1p28 },
+ { 0x1.90538cp25, 0x1.74adc8p28 },
+ { 0x1.6443fep25, 0x1.4d0232p28 },
+ { 0x1.3ce784p25, 0x1.296a7p28 },
+ { 0x1.19c232p25, 0x1.097f62p28 },
+ { 0x1.f4c8c4p24, 0x1.d9c736p27 },
+ { 0x1.bcd30ep24, 0x1.a6852cp27 },
+ { 0x1.8aee4cp24, 0x1.789fb8p27 },
+ { 0x1.5e77b6p24, 0x1.4f8c96p27 },
+ { 0x1.36dcf2p24, 0x1.2acee2p27 },
+ { 0x1.139a7cp24, 0x1.09f5dp27 },
+ { 0x1.e8747p23, 0x1.d9371ep26 },
+ { 0x1.b0a44ap23, 0x1.a4c89ep26 },
+ { 0x1.7f064ap23, 0x1.75fa8ep26 },
+ { 0x1.52efep23, 0x1.4c37cp26 },
+ { 0x1.2bc82ap23, 0x1.26f9ep26 },
+ { 0x1.09064p23, 0x1.05c804p26 },
+ { 0x1.d45f16p22, 0x1.d06ad6p25 },
+ { 0x1.9dacb2p22, 0x1.9bc0ap25 },
+ { 0x1.6d3126p22, 0x1.6ce1aap25 },
+ { 0x1.423d14p22, 0x1.43302cp25 },
+ { 0x1.1c33cep22, 0x1.1e1e86p25 },
+ { 0x1.f512dep21, 0x1.fa5b5p24 },
+ { 0x1.b9823cp21, 0x1.bfd756p24 },
+ { 0x1.84d6fep21, 0x1.8be4f8p24 },
+ { 0x1.564a92p21, 0x1.5dcd66p24 },
+ { 0x1.2d2c0ap21, 0x1.34ecf8p24 },
+ { 0x1.08ddd2p21, 0x1.10b148p24 },
+ { 0x1.d1a75p20, 0x1.e12eep23 },
+ { 0x1.99218cp20, 0x1.a854eap23 },
+ { 0x1.674c6ap20, 0x1.7603bap23 },
+ { 0x1.3b62b6p20, 0x1.4980ccp23 },
+ { 0x1.14b54p20, 0x1.2225b2p23 },
+ { 0x1.e55102p19, 0x1.febc1p22 },
+ { 0x1.a964eep19, 0x1.c14b22p22 },
+ { 0x1.74b17ap19, 0x1.8b0cfcp22 },
+ { 0x1.465daap19, 0x1.5b2fe6p22 },
+ { 0x1.1da944p19, 0x1.30f93cp22 },
+ { 0x1.f3d41p18, 0x1.0bc30cp22 },
+ { 0x1.b512a2p18, 0x1.d5f3a8p21 },
+ { 0x1.7e03b2p18, 0x1.9c3518p21 },
+ { 0x1.4dbb98p18, 0x1.6961b8p21 },
+ { 0x1.236a1ap18, 0x1.3cab14p21 },
+ { 0x1.fcae94p17, 0x1.155a0ap21 },
+ { 0x1.bbc1ap17, 0x1.e5989p20 },
+ { 0x1.82eedcp17, 0x1.a8e406p20 },
+ { 0x1.5139a6p17, 0x1.7397c6p20 },
+ { 0x1.25c354p17, 0x1.44d26ep20 },
+ { 0x1.ff8f84p16, 0x1.1bcca4p20 },
+ { 0x1.bd3474p16, 0x1.efac52p19 },
+ { 0x1.834586p16, 0x1.b0a68ap19 },
+ { 0x1.50b75cp16, 0x1.7974e8p19 },
+ { 0x1.249ef2p16, 0x1.4924a8p19 },
+ { 0x1.fc5b88p15, 0x1.1edfa4p19 },
+ { 0x1.b95ceep15, 0x1.f3d218p18 },
+ { 0x1.7f03bap15, 0x1.b334fap18 },
+ { 0x1.4c389cp15, 0x1.7ac2d8p18 },
+ { 0x1.2006aep15, 0x1.4979acp18 },
+ { 0x1.f32eap14, 0x1.1e767cp18 },
+ { 0x1.b05cfep14, 0x1.f1e352p17 },
+ { 0x1.764f46p14, 0x1.b0778cp17 },
+ { 0x1.43e56cp14, 0x1.77756ep17 },
+ { 0x1.18238p14, 0x1.45ce66p17 },
+ { 0x1.e45a98p13, 0x1.1a95p17 },
+ { 0x1.a284ccp13, 0x1.e9f2p16 },
+ { 0x1.697596p13, 0x1.a887bep16 },
+ { 0x1.3807acp13, 0x1.6fab64p16 },
+ { 0x1.0d3b36p13, 0x1.3e44e4p16 },
+ { 0x1.d0624p12, 0x1.135f28p16 },
+ { 0x1.904e0cp12, 0x1.dc479ep15 },
+ { 0x1.58e72ap12, 0x1.9baed4p15 },
+ { 0x1.2906ccp12, 0x1.63ac6cp15 },
+ { 0x1.ff58dap11, 0x1.33225ap15 },
+ { 0x1.b7f1f4p11, 0x1.0916fp15 },
+ { 0x1.7a551p11, 0x1.c960cp14 },
+ { 0x1.453142p11, 0x1.8a6174p14 },
+ { 0x1.1761f8p11, 0x1.53e4f8p14 },
+ { 0x1.dfd296p10, 0x1.24caf2p14 },
+ { 0x1.9bd5fp10, 0x1.f830cp13 },
+ { 0x1.61501p10, 0x1.b1e5acp13 },
+ { 0x1.2ef6p10, 0x1.7538c6p13 },
+ { 0x1.03a918p10, 0x1.40dfd8p13 },
+ { 0x1.bce26ap9, 0x1.13bc08p13 },
+ { 0x1.7cef42p9, 0x1.d9a88p12 },
+ { 0x1.46056p9, 0x1.96a0b4p12 },
+ { 0x1.16e3cap9, 0x1.5ce9acp12 },
+ { 0x1.dcea68p8, 0x1.2b3e54p12 },
+ { 0x1.97945ap8, 0x1.0085p12 },
+ { 0x1.5c2828p8, 0x1.b7937ep11 },
+ { 0x1.29415p8, 0x1.7872dap11 },
+ { 0x1.fb58fap7, 0x1.423acp11 },
+ { 0x1.b0c1a8p7, 0x1.13af5p11 },
+ { 0x1.70f474p7, 0x1.d77f0cp10 },
+ { 0x1.3a68a8p7, 0x1.92ff34p10 },
+ { 0x1.0bcc6p7, 0x1.5847eep10 },
+ { 0x1.c7fa0cp6, 0x1.25f9eep10 },
+ { 0x1.8401b6p6, 0x1.f5cc78p9 },
+ { 0x1.4a029ap6, 0x1.ac0f6p9 },
+ { 0x1.188c46p6, 0x1.6cfa9cp9 },
+ { 0x1.dcc4fap5, 0x1.370ab8p9 },
+ { 0x1.94ec06p5, 0x1.08f24p9 },
+ { 0x1.57bc96p5, 0x1.c324c2p8 },
+ { 0x1.23a81ap5, 0x1.7fe904p8 },
+ { 0x1.eeb278p4, 0x1.46897ep8 },
+ { 0x1.a35794p4, 0x1.159a38p8 },
+ { 0x1.634b8p4, 0x1.d7c594p7 },
+ { 0x1.2ce2a4p4, 0x1.90ae4ep7 },
+ { 0x1.fd5f08p3, 0x1.5422fp7 },
+ { 0x1.aef3cep3, 0x1.20998p7 },
+ { 0x1.6c6e62p3, 0x1.e98102p6 },
+ { 0x1.3407b6p3, 0x1.9eee06p6 },
+ { 0x1.043bap3, 0x1.5f8b88p6 },
+ { 0x1.b77e5cp2, 0x1.29b294p6 },
+ { 0x1.72f0c4p2, 0x1.f7f338p5 },
+ { 0x1.38ee18p2, 0x1.aa5772p5 },
+ { 0x1.07dd68p2, 0x1.68823ep5 },
+ { 0x1.bcc58ep1, 0x1.30b14ep5 },
+ { 0x1.76aca4p1, 0x1.01647cp5 },
+ { 0x1.3b7912p1, 0x1.b2a87ep4 },
+ { 0x1.097f82p1, 0x1.6ed2f2p4 },
+ { 0x1.beaa3ep0, 0x1.356cd6p4 },
+ { 0x1.778be2p0, 0x1.04e15ep4 },
+ { 0x1.3b9984p0, 0x1.b7b04p3 },
+ { 0x1.09182cp0, 0x1.725862p3 },
+ { 0x1.bd20fcp-1, 0x1.37c92cp3 },
+ { 0x1.75892p-1, 0x1.065b96p3 },
+ { 0x1.394e7ap-1, 0x1.b950d4p2 },
+ { 0x1.06a996p-1, 0x1.72fd94p2 },
+ { 0x1.b8328ep-2, 0x1.37b83cp2 },
+ { 0x1.70aff4p-2, 0x1.05ca5p2 },
+ { 0x1.34a53cp-2, 0x1.b7807ep1 },
+ { 0x1.0241dep-2, 0x1.70bebp1 },
+ { 0x1.affb9p-3, 0x1.353a6cp1 },
+ { 0x1.691c7cp-3, 0x1.0330fp1 },
+ { 0x1.2db8cap-3, 0x1.b24a16p0 },
+ { 0x1.f7f4f8p-4, 0x1.6ba91ap0 },
+ { 0x1.a4ab64p-4, 0x1.305e98p0 },
+ { 0x1.5efa4ep-4, 0x1.fd3de2p-1 },
+ { 0x1.24b0d8p-4, 0x1.a9cc94p-1 },
+ { 0x1.e7eeap-5, 0x1.63daf8p-1 },
+ { 0x1.96826ep-5, 0x1.294176p-1 },
+ { 0x1.5282d2p-5, 0x1.f05e82p-2 },
+ { 0x1.19c05p-5, 0x1.9e39dcp-2 },
+ { 0x1.d4ca9cp-6, 0x1.5982p-2 },
+ { 0x1.85cfacp-6, 0x1.200c8ap-2 },
+ { 0x1.43fb32p-6, 0x1.e00e92p-3 },
+ { 0x1.0d2382p-6, 0x1.8fd4ep-3 },
+ { 0x1.bef1b2p-7, 0x1.4cd9cp-3 },
+ { 0x1.72ede4p-7, 0x1.14f48ap-3 },
+ { 0x1.33b1cap-7, 0x1.ccaaeap-4 },
+ { 0x1.fe3bdp-8, 0x1.7eef14p-4 },
+ { 0x1.a6d7d2p-8, 0x1.3e2964p-4 },
+ { 0x1.5e4062p-8, 0x1.083768p-4 },
+ { 0x1.21fb7ap-8, 0x1.b69f1p-5 },
+ { 0x1.dfefbep-9, 0x1.6be574p-5 },
+ { 0x1.8cf816p-9, 0x1.2dc11ap-5 },
+ { 0x1.482fa8p-9, 0x1.f4343cp-6 },
+ { 0x1.0f30c4p-9, 0x1.9e614ep-6 },
+ { 0x1.bff86ep-10, 0x1.571d34p-6 },
+ { 0x1.71d0b6p-10, 0x1.1bf742p-6 },
+ { 0x1.3125f6p-10, 0x1.d5cc6cp-7 },
+ { 0x1.f755eap-11, 0x1.846e9ep-7 },
+ { 0x1.9eebaap-11, 0x1.410048p-7 },
+ { 0x1.55df18p-11, 0x1.09258p-7 },
+ { 0x1.198c18p-11, 0x1.b5ceb6p-8 },
+ { 0x1.cf82ep-12, 0x1.69468p-8 },
+ { 0x1.7d5af6p-12, 0x1.29f9e8p-8 },
+ { 0x1.399c28p-12, 0x1.eb4b9ep-9 },
+ { 0x1.01c65ap-12, 0x1.94d1dep-9 },
+ { 0x1.a78e82p-13, 0x1.4d6706p-9 },
+ { 0x1.5bcf92p-13, 0x1.127346p-9 },
+ { 0x1.1d791cp-13, 0x1.c39fap-10 },
+ { 0x1.d463dcp-14, 0x1.73679cp-10 },
+ { 0x1.8011fcp-14, 0x1.314916p-10 },
+ { 0x1.3ac71cp-14, 0x1.f5a11ap-11 },
+ { 0x1.01dcc2p-14, 0x1.9beca8p-11 },
+ { 0x1.a6459cp-15, 0x1.52189ap-11 },
+ { 0x1.59962ap-15, 0x1.155d48p-11 },
+ { 0x1.1ab0e4p-15, 0x1.c6dc8ap-12 },
+ { 0x1.ce42dep-16, 0x1.74ca88p-12 },
+ { 0x1.79c43p-16, 0x1.31612ap-12 },
+ { 0x1.349128p-16, 0x1.f4125ap-13 },
+ { 0x1.f7d80ep-17, 0x1.993e82p-13 },
+ { 0x1.9b270cp-17, 0x1.4ec006p-13 },
+ { 0x1.4f59fap-17, 0x1.11aebp-13 },
+ { 0x1.1164acp-17, 0x1.bf4ab2p-14 },
+ { 0x1.bd8c96p-18, 0x1.6d561ep-14 },
+ { 0x1.6ae172p-18, 0x1.2a406ep-14 },
+ { 0x1.276874p-18, 0x1.e6bba6p-15 },
+ { 0x1.e0bad2p-19, 0x1.8cf814p-15 },
+ { 0x1.86f788p-19, 0x1.4399f8p-15 },
+ { 0x1.3dcfaep-19, 0x1.07aa3p-15 },
+ { 0x1.023828p-19, 0x1.ad7302p-16 },
+ { 0x1.a3666ep-20, 0x1.5d90f4p-16 },
+ { 0x1.546e38p-20, 0x1.1c674ep-16 },
+ { 0x1.143264p-20, 0x1.ce8ccp-17 },
+ { 0x1.bff316p-21, 0x1.77f562p-17 },
+ { 0x1.6b13ecp-21, 0x1.316da8p-17 },
+ { 0x1.2624f4p-21, 0x1.f0046p-18 },
+ { 0x1.dc5de4p-22, 0x1.92920ap-18 },
+ { 0x1.818d3ap-22, 0x1.4691b2p-18 },
+ { 0x1.37e62p-22, 0x1.08c96ap-18 },
+ { 0x1.f8637ep-23, 0x1.ad2d0ap-19 },
+ { 0x1.97a3dcp-23, 0x1.5ba462p-19 },
+ { 0x1.494a4p-23, 0x1.1975ep-19 },
+ { 0x1.09dee4p-23, 0x1.c78892p-20 },
+ { 0x1.ad1fap-24, 0x1.7073c4p-20 },
+ { 0x1.5a245ep-24, 0x1.29df48p-20 },
+ { 0x1.171278p-24, 0x1.e163bep-21 },
+ { 0x1.c1c74cp-25, 0x1.84cbbp-21 },
+ { 0x1.6a46f4p-25, 0x1.39dbcep-21 },
+ { 0x1.23a858p-25, 0x1.fa7b92p-22 },
+ { 0x1.d56196p-26, 0x1.9876ap-22 },
+ { 0x1.7984b6p-26, 0x1.4940bcp-22 },
+ { 0x1.2f7cc4p-26, 0x1.094608p-22 },
+ { 0x1.e7b62cp-27, 0x1.ab3e8cp-23 },
+ { 0x1.87b15ep-27, 0x1.57e33ep-23 },
+ { 0x1.3a6dp-27, 0x1.14a8b6p-23 },
+ { 0x1.f88ebap-28, 0x1.bcede6p-24 },
+ { 0x1.94a282p-28, 0x1.659918p-24 },
+ { 0x1.44580ap-28, 0x1.1f4498p-24 },
+ { 0x1.03dbf8p-28, 0x1.cd5086p-25 },
+ { 0x1.a03066p-29, 0x1.723974p-25 },
+ { 0x1.4d1f2ep-29, 0x1.28f9cap-25 },
+ { 0x1.0a814ap-29, 0x1.dc34b6p-26 },
+ { 0x1.aa36cap-30, 0x1.7d9dbp-26 },
+ { 0x1.54a6b6p-30, 0x1.31aa56p-26 },
+ { 0x1.102232p-30, 0x1.e96c26p-27 },
+ { 0x1.b2959ep-31, 0x1.87a218p-27 },
+ { 0x1.5ad66cp-31, 0x1.393ad2p-27 },
+ { 0x1.14ac7ep-31, 0x1.f4ccdap-28 },
+ { 0x1.b931b8p-32, 0x1.9026a8p-28 },
+ { 0x1.5f9a24p-32, 0x1.3f92eap-28 },
+ { 0x1.181154p-32, 0x1.fe3208p-29 },
+ { 0x1.bdf55ep-33, 0x1.970fbp-29 },
+ { 0x1.62e226p-33, 0x1.449de6p-29 },
+ { 0x1.1a4576p-33, 0x1.02be7p-29 },
+ { 0x1.c0d0bep-34, 0x1.9c4672p-30 },
+ { 0x1.64a386p-34, 0x1.484b1ep-30 },
+ { 0x1.1b418cp-34, 0x1.054a9ap-30 },
+ { 0x1.c1ba4ap-35, 0x1.9fb994p-31 },
+ { 0x1.64d86p-35, 0x1.4a8e4ep-31 },
+ { 0x1.1b0242p-35, 0x1.06b4fep-31 },
+ { 0x1.c0aee6p-36, 0x1.a15d86p-32 },
+ { 0x1.637ffap-36, 0x1.4b5fdep-32 },
+ { 0x1.198862p-36, 0x1.06f8dap-32 },
+ { 0x1.bdb204p-37, 0x1.a12cc8p-33 },
+ { 0x1.609ec2p-37, 0x1.4abd0ap-33 },
+ { 0x1.16d8d2p-37, 0x1.06154ap-33 },
+ { 0x1.b8cd88p-38, 0x1.9f27fap-34 },
+ { 0x1.5c3e42p-38, 0x1.48a7fcp-34 },
+ { 0x1.12fc6cp-38, 0x1.040d4ap-34 },
+ { 0x1.b2119p-39, 0x1.9b55e8p-35 },
+ { 0x1.566cep-39, 0x1.4527acp-35 },
+ { 0x1.0dffep-39, 0x1.00e7acp-35 },
+ { 0x1.a99426p-40, 0x1.95c358p-36 },
+ { 0x1.4f3d92p-40, 0x1.4047cep-36 },
+ { 0x1.07f35ep-40, 0x1.f95dcep-37 },
+ { 0x1.9f70cp-41, 0x1.8e82cep-37 },
+ { 0x1.46c77ap-41, 0x1.3a1882p-37 },
+ { 0x1.00ea48p-41, 0x1.eee1d4p-38 },
+ { 0x1.93c7acp-42, 0x1.85ac18p-38 },
+ { 0x1.3d256ap-42, 0x1.32ae04p-38 },
+ { 0x1.f1f59p-43, 0x1.e27d88p-39 },
+ { 0x1.86bd6ap-43, 0x1.7b5bdap-39 },
+ { 0x1.327554p-43, 0x1.2a2036p-39 },
+ { 0x1.e07ab4p-44, 0x1.d458ap-40 },
+ { 0x1.7879ecp-44, 0x1.6fb2eap-40 },
+ { 0x1.26d7bp-44, 0x1.208a2cp-40 },
+ { 0x1.cd98a2p-45, 0x1.c49f8ap-41 },
+ { 0x1.6927c2p-45, 0x1.62d5aap-41 },
+ { 0x1.1a6ed6p-45, 0x1.16098ep-41 },
+ { 0x1.b986acp-46, 0x1.b3828ep-42 },
+ { 0x1.58f35ap-46, 0x1.54eb3ep-42 },
+ { 0x1.0d5e6p-46, 0x1.0abe0ep-42 },
+ { 0x1.a47db6p-47, 0x1.a134d4p-43 },
+ { 0x1.480a18p-47, 0x1.461cdap-43 },
+ { 0x1.ff94e4p-48, 0x1.fd9182p-44 },
+ { 0x1.8eb738p-48, 0x1.8deb62p-44 },
+ { 0x1.369994p-48, 0x1.3694e8p-44 },
+ { 0x1.e3ae4ap-49, 0x1.e49706p-45 },
+ { 0x1.786c3ep-49, 0x1.79dc28p-45 },
+ { 0x1.24cec8p-49, 0x1.267e46p-45 },
+ { 0x1.c74fc4p-50, 0x1.cad0bp-46 },
+ { 0x1.61d46cp-50, 0x1.653d08p-46 },
+ { 0x1.12d55cp-50, 0x1.16038cp-46 },
+ { 0x1.aabdacp-51, 0x1.b081aap-47 },
+ { 0x1.4b252ep-51, 0x1.5042e2p-47 },
+ { 0x1.00d6f8p-51, 0x1.054e44p-47 },
+ { 0x1.8e38ep-52, 0x1.95eb2cp-48 },
+ { 0x1.3490e8p-52, 0x1.3b20c6p-48 },
+ { 0x1.ddf56ap-53, 0x1.e90cb6p-49 },
+ { 0x1.71fdep-53, 0x1.7b4b76p-49 },
+ { 0x1.1e465ap-53, 0x1.26072ap-49 },
+ { 0x1.bac92ep-54, 0x1.c7a2ecp-50 },
+ { 0x1.56441cp-54, 0x1.60dcfp-50 },
+ { 0x1.08700cp-54, 0x1.112346p-50 },
+ { 0x1.986a66p-55, 0x1.a6a50ap-51 },
+ { 0x1.3b3d56p-55, 0x1.46d572p-51 },
+ { 0x1.e667dap-56, 0x1.f93d0ep-52 },
+ { 0x1.7712b8p-56, 0x1.86529ep-52 },
+ { 0x1.211544p-56, 0x1.2d65aep-52 },
+ { 0x1.bd660ap-57, 0x1.d13c32p-53 },
+ { 0x1.56f3eep-57, 0x1.66e45ap-53 },
+ { 0x1.07f14ap-57, 0x1.14b8b6p-53 },
+ { 0x1.96129cp-58, 0x1.aa854cp-54 },
+ { 0x1.3837cp-58, 0x1.488b94p-54 },
+ { 0x1.dfe0c2p-59, 0x1.f9e772p-55 },
+ { 0x1.709b5ap-59, 0x1.85503p-55 },
+ { 0x1.1affd2p-59, 0x1.2b7218p-55 },
+ { 0x1.b2564p-60, 0x1.cc6bb6p-56 },
+ { 0x1.4d23fap-60, 0x1.61cb1ap-56 },
+ { 0x1.fecbdp-61, 0x1.0fba0ep-56 },
+ { 0x1.8767d8p-61, 0x1.a13072p-57 },
+ { 0x1.2bc67ep-61, 0x1.401abcp-57 },
+ { 0x1.caf846p-62, 0x1.eafc2cp-58 },
+ { 0x1.5f2e7ap-62, 0x1.785cp-58 },
+ { 0x1.0c93acp-62, 0x1.205a7ep-58 },
+ { 0x1.9a9b06p-63, 0x1.b9a31ap-59 },
+ { 0x1.39b7fcp-63, 0x1.520968p-59 },
+ { 0x1.df277ap-64, 0x1.029ce6p-59 },
+ { 0x1.6dbcdp-64, 0x1.8b81d6p-60 },
+ { 0x1.17080ap-64, 0x1.2e48f2p-60 },
+ { 0x1.a98e26p-65, 0x1.cdd86cp-61 },
+ { 0x1.445a6ap-65, 0x1.60a47ap-61 },
+ { 0x1.ee324ep-66, 0x1.0d210cp-61 },
+ { 0x1.784e3p-66, 0x1.9a961ep-62 },
+ { 0x1.1e65fep-66, 0x1.390b74p-62 },
+ { 0x1.b3bb86p-67, 0x1.dd1e52p-63 },
+ { 0x1.4b4e36p-67, 0x1.6b6a7ap-63 },
+ { 0x1.f790f6p-68, 0x1.14acc2p-63 },
+ { 0x1.7e82cep-68, 0x1.a511aap-64 },
+ { 0x1.226a7ap-68, 0x1.404114p-64 },
+ { 0x1.b8c634p-69, 0x1.e6ea96p-65 },
+ { 0x1.4e53acp-69, 0x1.71f97ap-65 },
+ { 0x1.faed5cp-70, 0x1.18fb2ep-65 },
+ { 0x1.80217ep-70, 0x1.aa947ep-66 },
+ { 0x1.22f066p-70, 0x1.43a796p-66 },
+ { 0x1.b87f86p-71, 0x1.eae2fp-67 },
+ { 0x1.4d4ec8p-71, 0x1.7414e6p-67 },
+ { 0x1.f8283ep-72, 0x1.19e474p-67 },
+ { 0x1.7d1b22p-72, 0x1.aaeb7ep-68 },
+ { 0x1.1ff2dp-72, 0x1.431f66p-68 },
+ { 0x1.b2e9e8p-73, 0x1.e8e272p-69 },
+ { 0x1.4848dep-73, 0x1.71a91ep-69 },
+ { 0x1.ef5b16p-74, 0x1.176014p-69 },
+ { 0x1.758b92p-74, 0x1.a6137cp-70 },
+ { 0x1.198d42p-74, 0x1.3ead74p-70 },
+ { 0x1.a838bp-75, 0x1.e0fbc2p-71 },
+ { 0x1.3f700cp-75, 0x1.6accaep-71 },
+ { 0x1.e0d68ep-76, 0x1.118578p-71 },
+ { 0x1.69b7f4p-76, 0x1.9c3974p-72 },
+ { 0x1.0ffa12p-76, 0x1.367afap-72 },
+ { 0x1.98cd1cp-77, 0x1.d377fap-73 },
+ { 0x1.33148p-77, 0x1.5fbee6p-73 },
+ { 0x1.cd1dbap-78, 0x1.088a8p-73 },
+ { 0x1.5a0a9cp-78, 0x1.8db7ccp-74 },
+ { 0x1.038ef4p-78, 0x1.2ad2ecp-74 },
+ { 0x1.85308ap-79, 0x1.c0d23ep-75 },
+ { 0x1.23a3cp-79, 0x1.50e41ap-75 },
+ { 0x1.b4de68p-80, 0x1.f980a8p-76 },
+ { 0x1.470ce4p-80, 0x1.7b10fep-76 },
+ { 0x1.e9700cp-81, 0x1.1c1d98p-76 },
+ { 0x1.6e0c9p-81, 0x1.a9b08p-77 },
+ { 0x1.11a25ap-81, 0x1.3ebfb4p-77 },
+ { 0x1.98e73ap-82, 0x1.dd1d36p-78 },
+ { 0x1.315f58p-82, 0x1.64e7fp-78 },
+ { 0x1.c7e35cp-83, 0x1.0ada94p-78 },
+ { 0x1.542176p-83, 0x1.8ed9e8p-79 },
+ { 0x1.fb491ep-84, 0x1.29ecb2p-79 },
+ { 0x1.7a1c34p-84, 0x1.bcdb34p-80 },
+ { 0x1.19b0f2p-84, 0x1.4bf6cap-80 },
+ { 0x1.a383cap-85, 0x1.ef3318p-81 },
+ { 0x1.383bf2p-85, 0x1.712bc2p-81 },
+ { 0x1.d08cdap-86, 0x1.13151p-81 },
+ { 0x1.596adp-86, 0x1.99bf36p-82 },
+ { 0x1.00b602p-86, 0x1.3104d6p-82 },
+ { 0x1.7d62a2p-87, 0x1.c5e534p-83 },
+ { 0x1.1b2abcp-87, 0x1.518db2p-83 },
+ { 0x1.a4480ep-88, 0x1.f5d1c6p-84 },
+ { 0x1.37be42p-88, 0x1.74d45ap-84 },
+ { 0x1.ce3ee4p-89, 0x1.14dc4ap-84 },
+ { 0x1.568986p-89, 0x1.9afd0ep-85 },
+ { 0x1.fb69c6p-90, 0x1.30e632p-85 },
+ { 0x1.77a47ep-90, 0x1.c42b48p-86 },
+ { 0x1.15f4ep-90, 0x1.4f1f52p-86 },
+ { 0x1.9b25dcp-91, 0x1.f08156p-87 },
+ { 0x1.2feeeep-91, 0x1.6f9f62p-87 },
+ { 0x1.c122bcp-92, 0x1.100ffap-87 },
+ { 0x1.4bb154p-92, 0x1.927ce6p-88 },
+ { 0x1.e9ae56p-93, 0x1.2992f4p-88 },
+ { 0x1.6948e8p-93, 0x1.b7cccap-89 },
+ { 0x1.0a6cd2p-93, 0x1.44d7c4p-89 },
+ { 0x1.88c0cap-94, 0x1.dfa22p-90 },
+ { 0x1.215988p-94, 0x1.61eb26p-90 },
+ { 0x1.aa222ap-95, 0x1.0506e2p-90 },
+ { 0x1.39a30ep-95, 0x1.80d828p-91 },
+ { 0x1.cd740ep-96, 0x1.1b8f04p-91 },
+ { 0x1.534d82p-96, 0x1.a1a7ecp-92 },
+ { 0x1.f2bb06p-97, 0x1.336f3p-92 },
+ { 0x1.6e5b34p-97, 0x1.c46172p-93 },
+ { 0x1.0cfc82p-97, 0x1.4cab82p-93 },
+ { 0x1.8acc82p-98, 0x1.e9094cp-94 },
+ { 0x1.219686p-98, 0x1.67465p-94 },
+ { 0x1.a89fa6p-99, 0x1.07d0b8p-94 },
+ { 0x1.372982p-99, 0x1.833ffap-95 },
+ { 0x1.c7d094p-100, 0x1.1c147ap-95 },
+ { 0x1.4db1c8p-100, 0x1.a096ccp-96 },
+ { 0x1.e858d8p-101, 0x1.314decp-96 },
+ { 0x1.6529ep-101, 0x1.bf46cep-97 },
+ { 0x1.0517bap-101, 0x1.47796ap-97 },
+ { 0x1.7d8a8p-102, 0x1.df49a2p-98 },
+ { 0x1.16a46p-102, 0x1.5e9198p-98 },
+ { 0x1.96ca76p-103, 0x1.004b34p-98 },
+ { 0x1.28cb2cp-103, 0x1.768f3ep-99 },
+ { 0x1.b0de98p-104, 0x1.1190d2p-99 },
+ },
+ };
diff --git a/contrib/arm-optimized-routines/pl/math/erff_2u.c b/contrib/arm-optimized-routines/pl/math/erff_2u.c
new file mode 100644
index 000000000000..f43e647072f8
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erff_2u.c
@@ -0,0 +1,82 @@
+/*
+ * Single-precision erf(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define TwoOverSqrtPiMinusOne 0x1.06eba8p-3f
+#define Shift 0x1p16f
+#define OneThird 0x1.555556p-2f
+
+/* Fast erff approximation based on series expansion near x rounded to
+ nearest multiple of 1/128.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erf(x) ~ erf(r)
+ + scale * d * [
+ + 1
+ - r d
+ + 1/3 (2 r^2 - 1) d^2
+ - 1/6 (r (2 r^2 - 3) ) d^3
+ + 1/30 (4 r^4 - 12 r^2 + 3) d^4
+ ]
+
+ This single precision implementation uses only the following terms:
+
+ erf(x) ~ erf(r) + scale * d * [1 - r * d - 1/3 * d^2]
+
+ Values of erf(r) and scale are read from lookup tables.
+ For |x| > 3.9375, erf(|x|) rounds to 1.0f.
+
+ Maximum error: 1.93 ULP
+ erff(0x1.c373e6p-9) got 0x1.fd686cp-9
+ want 0x1.fd6868p-9. */
+float
+erff (float x)
+{
+ /* Get absolute value and sign. */
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & 0x7fffffff;
+ uint32_t sign = ix & ~0x7fffffff;
+
+ /* |x| < 0x1p-62. Triggers exceptions. */
+ if (unlikely (ia < 0x20800000))
+ return fmaf (TwoOverSqrtPiMinusOne, x, x);
+
+ if (ia < 0x407b8000) /* |x| < 4 - 8 / 128 = 3.9375. */
+ {
+ /* Lookup erf(r) and scale(r) in tables, e.g. set erf(r) to 0 and scale
+ to 2/sqrt(pi), when x reduced to r = 0. */
+ float a = asfloat (ia);
+ float z = a + Shift;
+ uint32_t i = asuint (z) - asuint (Shift);
+ float r = z - Shift;
+ float erfr = __erff_data.tab[i].erf;
+ float scale = __erff_data.tab[i].scale;
+
+ /* erf(x) ~ erf(r) + scale * d * (1 - r * d - 1/3 * d^2). */
+ float d = a - r;
+ float d2 = d * d;
+ float y = -fmaf (OneThird, d, r);
+ y = fmaf (fmaf (y, d2, d), scale, erfr);
+ return asfloat (asuint (y) | sign);
+ }
+
+ /* Special cases : erff(nan)=nan, erff(+inf)=+1 and erff(-inf)=-1. */
+ if (unlikely (ia >= 0x7f800000))
+ return (1.0f - (float) (sign >> 30)) + 1.0f / x;
+
+ /* Boring domain (|x| >= 4.0). */
+ return asfloat (sign | asuint (1.0f));
+}
+
+PL_SIG (S, F, 1, erf, -4.0, 4.0)
+PL_TEST_ULP (erff, 1.43)
+PL_TEST_SYM_INTERVAL (erff, 0, 3.9375, 40000)
+PL_TEST_SYM_INTERVAL (erff, 3.9375, inf, 40000)
+PL_TEST_SYM_INTERVAL (erff, 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erff_data.c b/contrib/arm-optimized-routines/pl/math/erff_data.c
new file mode 100644
index 000000000000..84c0d2e95463
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erff_data.c
@@ -0,0 +1,532 @@
+/*
+ * Data for approximation of erff.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Lookup table used in erff.
+ For each possible rounded input r (multiples of 1/128), between
+ r = 0.0 and r = 4.0 (513 values):
+ - the first entry __erff_data.tab.erf contains the values of erf(r),
+ - the second entry __erff_data.tab.scale contains the values of
+ 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the
+ algorithm, since lookup is performed only for x >= 1/64-1/512. */
+const struct erff_data __erff_data = {
+ .tab = { { 0x0.000000p+0, 0x1.20dd76p+0 },
+ { 0x1.20dbf4p-7, 0x1.20d8f2p+0 },
+ { 0x1.20d770p-6, 0x1.20cb68p+0 },
+ { 0x1.b137e0p-6, 0x1.20b4d8p+0 },
+ { 0x1.20c564p-5, 0x1.209546p+0 },
+ { 0x1.68e5d4p-5, 0x1.206cb4p+0 },
+ { 0x1.b0fafep-5, 0x1.203b26p+0 },
+ { 0x1.f902a8p-5, 0x1.2000a0p+0 },
+ { 0x1.207d48p-4, 0x1.1fbd28p+0 },
+ { 0x1.44703ep-4, 0x1.1f70c4p+0 },
+ { 0x1.68591ap-4, 0x1.1f1b7ap+0 },
+ { 0x1.8c36bep-4, 0x1.1ebd56p+0 },
+ { 0x1.b00812p-4, 0x1.1e565cp+0 },
+ { 0x1.d3cbf8p-4, 0x1.1de698p+0 },
+ { 0x1.f7815ap-4, 0x1.1d6e14p+0 },
+ { 0x1.0d9390p-3, 0x1.1cecdcp+0 },
+ { 0x1.1f5e1ap-3, 0x1.1c62fap+0 },
+ { 0x1.311fc2p-3, 0x1.1bd07cp+0 },
+ { 0x1.42d7fcp-3, 0x1.1b3572p+0 },
+ { 0x1.548642p-3, 0x1.1a91e6p+0 },
+ { 0x1.662a0cp-3, 0x1.19e5eap+0 },
+ { 0x1.77c2d2p-3, 0x1.19318cp+0 },
+ { 0x1.895010p-3, 0x1.1874dep+0 },
+ { 0x1.9ad142p-3, 0x1.17aff0p+0 },
+ { 0x1.ac45e4p-3, 0x1.16e2d8p+0 },
+ { 0x1.bdad72p-3, 0x1.160da4p+0 },
+ { 0x1.cf076ep-3, 0x1.153068p+0 },
+ { 0x1.e05354p-3, 0x1.144b3cp+0 },
+ { 0x1.f190aap-3, 0x1.135e30p+0 },
+ { 0x1.015f78p-2, 0x1.12695ep+0 },
+ { 0x1.09eed6p-2, 0x1.116cd8p+0 },
+ { 0x1.127632p-2, 0x1.1068bap+0 },
+ { 0x1.1af54ep-2, 0x1.0f5d16p+0 },
+ { 0x1.236bf0p-2, 0x1.0e4a08p+0 },
+ { 0x1.2bd9dcp-2, 0x1.0d2fa6p+0 },
+ { 0x1.343ed6p-2, 0x1.0c0e0ap+0 },
+ { 0x1.3c9aa8p-2, 0x1.0ae550p+0 },
+ { 0x1.44ed18p-2, 0x1.09b590p+0 },
+ { 0x1.4d35f0p-2, 0x1.087ee4p+0 },
+ { 0x1.5574f4p-2, 0x1.07416cp+0 },
+ { 0x1.5da9f4p-2, 0x1.05fd3ep+0 },
+ { 0x1.65d4b8p-2, 0x1.04b27cp+0 },
+ { 0x1.6df50ap-2, 0x1.036140p+0 },
+ { 0x1.760abap-2, 0x1.0209a6p+0 },
+ { 0x1.7e1594p-2, 0x1.00abd0p+0 },
+ { 0x1.861566p-2, 0x1.fe8fb0p-1 },
+ { 0x1.8e0a02p-2, 0x1.fbbbbep-1 },
+ { 0x1.95f336p-2, 0x1.f8dc0ap-1 },
+ { 0x1.9dd0d2p-2, 0x1.f5f0cep-1 },
+ { 0x1.a5a2acp-2, 0x1.f2fa4cp-1 },
+ { 0x1.ad6896p-2, 0x1.eff8c4p-1 },
+ { 0x1.b52264p-2, 0x1.ecec78p-1 },
+ { 0x1.bccfecp-2, 0x1.e9d5a8p-1 },
+ { 0x1.c47104p-2, 0x1.e6b498p-1 },
+ { 0x1.cc0584p-2, 0x1.e38988p-1 },
+ { 0x1.d38d44p-2, 0x1.e054bep-1 },
+ { 0x1.db081cp-2, 0x1.dd167cp-1 },
+ { 0x1.e275eap-2, 0x1.d9cf06p-1 },
+ { 0x1.e9d68ap-2, 0x1.d67ea2p-1 },
+ { 0x1.f129d4p-2, 0x1.d32592p-1 },
+ { 0x1.f86faap-2, 0x1.cfc41ep-1 },
+ { 0x1.ffa7eap-2, 0x1.cc5a8ap-1 },
+ { 0x1.03693ap-1, 0x1.c8e91cp-1 },
+ { 0x1.06f794p-1, 0x1.c5701ap-1 },
+ { 0x1.0a7ef6p-1, 0x1.c1efcap-1 },
+ { 0x1.0dff50p-1, 0x1.be6872p-1 },
+ { 0x1.117894p-1, 0x1.bada5ap-1 },
+ { 0x1.14eab4p-1, 0x1.b745c6p-1 },
+ { 0x1.1855a6p-1, 0x1.b3aafcp-1 },
+ { 0x1.1bb95cp-1, 0x1.b00a46p-1 },
+ { 0x1.1f15ccp-1, 0x1.ac63e8p-1 },
+ { 0x1.226ae8p-1, 0x1.a8b828p-1 },
+ { 0x1.25b8a8p-1, 0x1.a5074ep-1 },
+ { 0x1.28ff02p-1, 0x1.a1519ep-1 },
+ { 0x1.2c3decp-1, 0x1.9d9762p-1 },
+ { 0x1.2f755cp-1, 0x1.99d8dap-1 },
+ { 0x1.32a54cp-1, 0x1.961650p-1 },
+ { 0x1.35cdb4p-1, 0x1.925008p-1 },
+ { 0x1.38ee8ap-1, 0x1.8e8646p-1 },
+ { 0x1.3c07cap-1, 0x1.8ab950p-1 },
+ { 0x1.3f196ep-1, 0x1.86e96ap-1 },
+ { 0x1.42236ep-1, 0x1.8316d6p-1 },
+ { 0x1.4525c8p-1, 0x1.7f41dcp-1 },
+ { 0x1.482074p-1, 0x1.7b6abcp-1 },
+ { 0x1.4b1372p-1, 0x1.7791b8p-1 },
+ { 0x1.4dfebap-1, 0x1.73b714p-1 },
+ { 0x1.50e24cp-1, 0x1.6fdb12p-1 },
+ { 0x1.53be26p-1, 0x1.6bfdf0p-1 },
+ { 0x1.569244p-1, 0x1.681ff2p-1 },
+ { 0x1.595ea6p-1, 0x1.644156p-1 },
+ { 0x1.5c2348p-1, 0x1.60625cp-1 },
+ { 0x1.5ee02ep-1, 0x1.5c8342p-1 },
+ { 0x1.619556p-1, 0x1.58a446p-1 },
+ { 0x1.6442c0p-1, 0x1.54c5a6p-1 },
+ { 0x1.66e86ep-1, 0x1.50e79ep-1 },
+ { 0x1.69865ep-1, 0x1.4d0a68p-1 },
+ { 0x1.6c1c98p-1, 0x1.492e42p-1 },
+ { 0x1.6eab18p-1, 0x1.455366p-1 },
+ { 0x1.7131e6p-1, 0x1.417a0cp-1 },
+ { 0x1.73b102p-1, 0x1.3da26ep-1 },
+ { 0x1.762870p-1, 0x1.39ccc2p-1 },
+ { 0x1.789836p-1, 0x1.35f940p-1 },
+ { 0x1.7b0058p-1, 0x1.32281ep-1 },
+ { 0x1.7d60d8p-1, 0x1.2e5992p-1 },
+ { 0x1.7fb9c0p-1, 0x1.2a8dcep-1 },
+ { 0x1.820b12p-1, 0x1.26c508p-1 },
+ { 0x1.8454d6p-1, 0x1.22ff72p-1 },
+ { 0x1.869712p-1, 0x1.1f3d3cp-1 },
+ { 0x1.88d1cep-1, 0x1.1b7e98p-1 },
+ { 0x1.8b050ep-1, 0x1.17c3b6p-1 },
+ { 0x1.8d30dep-1, 0x1.140cc4p-1 },
+ { 0x1.8f5544p-1, 0x1.1059eep-1 },
+ { 0x1.91724ap-1, 0x1.0cab62p-1 },
+ { 0x1.9387f6p-1, 0x1.09014cp-1 },
+ { 0x1.959652p-1, 0x1.055bd6p-1 },
+ { 0x1.979d68p-1, 0x1.01bb2cp-1 },
+ { 0x1.999d42p-1, 0x1.fc3ee6p-2 },
+ { 0x1.9b95e8p-1, 0x1.f511aap-2 },
+ { 0x1.9d8768p-1, 0x1.edeeeep-2 },
+ { 0x1.9f71cap-1, 0x1.e6d700p-2 },
+ { 0x1.a1551ap-1, 0x1.dfca26p-2 },
+ { 0x1.a33162p-1, 0x1.d8c8aap-2 },
+ { 0x1.a506b0p-1, 0x1.d1d2d0p-2 },
+ { 0x1.a6d50cp-1, 0x1.cae8dap-2 },
+ { 0x1.a89c86p-1, 0x1.c40b08p-2 },
+ { 0x1.aa5d26p-1, 0x1.bd3998p-2 },
+ { 0x1.ac16fcp-1, 0x1.b674c8p-2 },
+ { 0x1.adca14p-1, 0x1.afbcd4p-2 },
+ { 0x1.af767ap-1, 0x1.a911f0p-2 },
+ { 0x1.b11c3cp-1, 0x1.a27456p-2 },
+ { 0x1.b2bb68p-1, 0x1.9be438p-2 },
+ { 0x1.b4540ap-1, 0x1.9561c8p-2 },
+ { 0x1.b5e630p-1, 0x1.8eed36p-2 },
+ { 0x1.b771e8p-1, 0x1.8886b2p-2 },
+ { 0x1.b8f742p-1, 0x1.822e66p-2 },
+ { 0x1.ba764ap-1, 0x1.7be47ap-2 },
+ { 0x1.bbef10p-1, 0x1.75a91ap-2 },
+ { 0x1.bd61a2p-1, 0x1.6f7c6ap-2 },
+ { 0x1.bece0ep-1, 0x1.695e8cp-2 },
+ { 0x1.c03464p-1, 0x1.634fa6p-2 },
+ { 0x1.c194b2p-1, 0x1.5d4fd4p-2 },
+ { 0x1.c2ef08p-1, 0x1.575f34p-2 },
+ { 0x1.c44376p-1, 0x1.517de6p-2 },
+ { 0x1.c5920ap-1, 0x1.4bac00p-2 },
+ { 0x1.c6dad2p-1, 0x1.45e99cp-2 },
+ { 0x1.c81de2p-1, 0x1.4036d0p-2 },
+ { 0x1.c95b46p-1, 0x1.3a93b2p-2 },
+ { 0x1.ca930ep-1, 0x1.350052p-2 },
+ { 0x1.cbc54cp-1, 0x1.2f7cc4p-2 },
+ { 0x1.ccf20cp-1, 0x1.2a0916p-2 },
+ { 0x1.ce1962p-1, 0x1.24a554p-2 },
+ { 0x1.cf3b5cp-1, 0x1.1f518ap-2 },
+ { 0x1.d0580cp-1, 0x1.1a0dc6p-2 },
+ { 0x1.d16f7ep-1, 0x1.14da0ap-2 },
+ { 0x1.d281c4p-1, 0x1.0fb662p-2 },
+ { 0x1.d38ef0p-1, 0x1.0aa2d0p-2 },
+ { 0x1.d49710p-1, 0x1.059f5ap-2 },
+ { 0x1.d59a34p-1, 0x1.00ac00p-2 },
+ { 0x1.d6986cp-1, 0x1.f79184p-3 },
+ { 0x1.d791cap-1, 0x1.edeb40p-3 },
+ { 0x1.d8865ep-1, 0x1.e46530p-3 },
+ { 0x1.d97636p-1, 0x1.daff4ap-3 },
+ { 0x1.da6162p-1, 0x1.d1b982p-3 },
+ { 0x1.db47f4p-1, 0x1.c893cep-3 },
+ { 0x1.dc29fcp-1, 0x1.bf8e1cp-3 },
+ { 0x1.dd0788p-1, 0x1.b6a856p-3 },
+ { 0x1.dde0aap-1, 0x1.ade26cp-3 },
+ { 0x1.deb570p-1, 0x1.a53c42p-3 },
+ { 0x1.df85eap-1, 0x1.9cb5bep-3 },
+ { 0x1.e0522ap-1, 0x1.944ec2p-3 },
+ { 0x1.e11a3ep-1, 0x1.8c0732p-3 },
+ { 0x1.e1de36p-1, 0x1.83deeap-3 },
+ { 0x1.e29e22p-1, 0x1.7bd5c8p-3 },
+ { 0x1.e35a12p-1, 0x1.73eba4p-3 },
+ { 0x1.e41214p-1, 0x1.6c2056p-3 },
+ { 0x1.e4c638p-1, 0x1.6473b6p-3 },
+ { 0x1.e5768cp-1, 0x1.5ce596p-3 },
+ { 0x1.e62322p-1, 0x1.5575c8p-3 },
+ { 0x1.e6cc08p-1, 0x1.4e241ep-3 },
+ { 0x1.e7714ap-1, 0x1.46f066p-3 },
+ { 0x1.e812fcp-1, 0x1.3fda6cp-3 },
+ { 0x1.e8b12ap-1, 0x1.38e1fap-3 },
+ { 0x1.e94be4p-1, 0x1.3206dcp-3 },
+ { 0x1.e9e336p-1, 0x1.2b48dap-3 },
+ { 0x1.ea7730p-1, 0x1.24a7b8p-3 },
+ { 0x1.eb07e2p-1, 0x1.1e233ep-3 },
+ { 0x1.eb9558p-1, 0x1.17bb2cp-3 },
+ { 0x1.ec1fa2p-1, 0x1.116f48p-3 },
+ { 0x1.eca6ccp-1, 0x1.0b3f52p-3 },
+ { 0x1.ed2ae6p-1, 0x1.052b0cp-3 },
+ { 0x1.edabfcp-1, 0x1.fe6460p-4 },
+ { 0x1.ee2a1ep-1, 0x1.f2a902p-4 },
+ { 0x1.eea556p-1, 0x1.e72372p-4 },
+ { 0x1.ef1db4p-1, 0x1.dbd32ap-4 },
+ { 0x1.ef9344p-1, 0x1.d0b7a0p-4 },
+ { 0x1.f00614p-1, 0x1.c5d04ap-4 },
+ { 0x1.f07630p-1, 0x1.bb1c98p-4 },
+ { 0x1.f0e3a6p-1, 0x1.b09bfcp-4 },
+ { 0x1.f14e82p-1, 0x1.a64de6p-4 },
+ { 0x1.f1b6d0p-1, 0x1.9c31c6p-4 },
+ { 0x1.f21ca0p-1, 0x1.92470ap-4 },
+ { 0x1.f27ff8p-1, 0x1.888d1ep-4 },
+ { 0x1.f2e0eap-1, 0x1.7f036cp-4 },
+ { 0x1.f33f7ep-1, 0x1.75a960p-4 },
+ { 0x1.f39bc2p-1, 0x1.6c7e64p-4 },
+ { 0x1.f3f5c2p-1, 0x1.6381e2p-4 },
+ { 0x1.f44d88p-1, 0x1.5ab342p-4 },
+ { 0x1.f4a31ep-1, 0x1.5211ecp-4 },
+ { 0x1.f4f694p-1, 0x1.499d48p-4 },
+ { 0x1.f547f2p-1, 0x1.4154bcp-4 },
+ { 0x1.f59742p-1, 0x1.3937b2p-4 },
+ { 0x1.f5e490p-1, 0x1.31458ep-4 },
+ { 0x1.f62fe8p-1, 0x1.297dbap-4 },
+ { 0x1.f67952p-1, 0x1.21df9ap-4 },
+ { 0x1.f6c0dcp-1, 0x1.1a6a96p-4 },
+ { 0x1.f7068cp-1, 0x1.131e14p-4 },
+ { 0x1.f74a6ep-1, 0x1.0bf97ep-4 },
+ { 0x1.f78c8cp-1, 0x1.04fc3ap-4 },
+ { 0x1.f7cceep-1, 0x1.fc4b5ep-5 },
+ { 0x1.f80ba2p-1, 0x1.eeea8cp-5 },
+ { 0x1.f848acp-1, 0x1.e1d4d0p-5 },
+ { 0x1.f8841ap-1, 0x1.d508fap-5 },
+ { 0x1.f8bdf2p-1, 0x1.c885e0p-5 },
+ { 0x1.f8f63ep-1, 0x1.bc4a54p-5 },
+ { 0x1.f92d08p-1, 0x1.b05530p-5 },
+ { 0x1.f96256p-1, 0x1.a4a54ap-5 },
+ { 0x1.f99634p-1, 0x1.99397ap-5 },
+ { 0x1.f9c8a8p-1, 0x1.8e109cp-5 },
+ { 0x1.f9f9bap-1, 0x1.83298ep-5 },
+ { 0x1.fa2974p-1, 0x1.78832cp-5 },
+ { 0x1.fa57dep-1, 0x1.6e1c58p-5 },
+ { 0x1.fa84fep-1, 0x1.63f3f6p-5 },
+ { 0x1.fab0dep-1, 0x1.5a08e8p-5 },
+ { 0x1.fadb84p-1, 0x1.505a18p-5 },
+ { 0x1.fb04f6p-1, 0x1.46e66cp-5 },
+ { 0x1.fb2d40p-1, 0x1.3dacd2p-5 },
+ { 0x1.fb5464p-1, 0x1.34ac36p-5 },
+ { 0x1.fb7a6cp-1, 0x1.2be38cp-5 },
+ { 0x1.fb9f60p-1, 0x1.2351c2p-5 },
+ { 0x1.fbc344p-1, 0x1.1af5d2p-5 },
+ { 0x1.fbe61ep-1, 0x1.12ceb4p-5 },
+ { 0x1.fc07fap-1, 0x1.0adb60p-5 },
+ { 0x1.fc28d8p-1, 0x1.031ad6p-5 },
+ { 0x1.fc48c2p-1, 0x1.f7182ap-6 },
+ { 0x1.fc67bcp-1, 0x1.e85c44p-6 },
+ { 0x1.fc85d0p-1, 0x1.da0006p-6 },
+ { 0x1.fca2fep-1, 0x1.cc0180p-6 },
+ { 0x1.fcbf52p-1, 0x1.be5ecep-6 },
+ { 0x1.fcdaccp-1, 0x1.b1160ap-6 },
+ { 0x1.fcf576p-1, 0x1.a4255ap-6 },
+ { 0x1.fd0f54p-1, 0x1.978ae8p-6 },
+ { 0x1.fd286ap-1, 0x1.8b44e6p-6 },
+ { 0x1.fd40bep-1, 0x1.7f5188p-6 },
+ { 0x1.fd5856p-1, 0x1.73af0cp-6 },
+ { 0x1.fd6f34p-1, 0x1.685bb6p-6 },
+ { 0x1.fd8562p-1, 0x1.5d55ccp-6 },
+ { 0x1.fd9ae2p-1, 0x1.529b9ep-6 },
+ { 0x1.fdafb8p-1, 0x1.482b84p-6 },
+ { 0x1.fdc3e8p-1, 0x1.3e03d8p-6 },
+ { 0x1.fdd77ap-1, 0x1.3422fep-6 },
+ { 0x1.fdea6ep-1, 0x1.2a875cp-6 },
+ { 0x1.fdfcccp-1, 0x1.212f62p-6 },
+ { 0x1.fe0e96p-1, 0x1.181984p-6 },
+ { 0x1.fe1fd0p-1, 0x1.0f443ep-6 },
+ { 0x1.fe3080p-1, 0x1.06ae14p-6 },
+ { 0x1.fe40a6p-1, 0x1.fcab14p-7 },
+ { 0x1.fe504cp-1, 0x1.ec7262p-7 },
+ { 0x1.fe5f70p-1, 0x1.dcaf36p-7 },
+ { 0x1.fe6e18p-1, 0x1.cd5ecap-7 },
+ { 0x1.fe7c46p-1, 0x1.be7e5ap-7 },
+ { 0x1.fe8a00p-1, 0x1.b00b38p-7 },
+ { 0x1.fe9748p-1, 0x1.a202bep-7 },
+ { 0x1.fea422p-1, 0x1.94624ep-7 },
+ { 0x1.feb090p-1, 0x1.87275ep-7 },
+ { 0x1.febc96p-1, 0x1.7a4f6ap-7 },
+ { 0x1.fec836p-1, 0x1.6dd7fep-7 },
+ { 0x1.fed374p-1, 0x1.61beaep-7 },
+ { 0x1.fede52p-1, 0x1.56011cp-7 },
+ { 0x1.fee8d4p-1, 0x1.4a9cf6p-7 },
+ { 0x1.fef2fep-1, 0x1.3f8ff6p-7 },
+ { 0x1.fefccep-1, 0x1.34d7dcp-7 },
+ { 0x1.ff064cp-1, 0x1.2a727ap-7 },
+ { 0x1.ff0f76p-1, 0x1.205dacp-7 },
+ { 0x1.ff1852p-1, 0x1.169756p-7 },
+ { 0x1.ff20e0p-1, 0x1.0d1d6ap-7 },
+ { 0x1.ff2924p-1, 0x1.03ede2p-7 },
+ { 0x1.ff3120p-1, 0x1.f60d8ap-8 },
+ { 0x1.ff38d6p-1, 0x1.e4cc4ap-8 },
+ { 0x1.ff4048p-1, 0x1.d4143ap-8 },
+ { 0x1.ff4778p-1, 0x1.c3e1a6p-8 },
+ { 0x1.ff4e68p-1, 0x1.b430ecp-8 },
+ { 0x1.ff551ap-1, 0x1.a4fe84p-8 },
+ { 0x1.ff5b90p-1, 0x1.9646f4p-8 },
+ { 0x1.ff61ccp-1, 0x1.8806d8p-8 },
+ { 0x1.ff67d0p-1, 0x1.7a3adep-8 },
+ { 0x1.ff6d9ep-1, 0x1.6cdfccp-8 },
+ { 0x1.ff7338p-1, 0x1.5ff276p-8 },
+ { 0x1.ff789ep-1, 0x1.536fc2p-8 },
+ { 0x1.ff7dd4p-1, 0x1.4754acp-8 },
+ { 0x1.ff82dap-1, 0x1.3b9e40p-8 },
+ { 0x1.ff87b2p-1, 0x1.30499cp-8 },
+ { 0x1.ff8c5cp-1, 0x1.2553eep-8 },
+ { 0x1.ff90dcp-1, 0x1.1aba78p-8 },
+ { 0x1.ff9532p-1, 0x1.107a8cp-8 },
+ { 0x1.ff9960p-1, 0x1.06918cp-8 },
+ { 0x1.ff9d68p-1, 0x1.f9f9d0p-9 },
+ { 0x1.ffa14ap-1, 0x1.e77448p-9 },
+ { 0x1.ffa506p-1, 0x1.d58da6p-9 },
+ { 0x1.ffa8a0p-1, 0x1.c4412cp-9 },
+ { 0x1.ffac18p-1, 0x1.b38a3ap-9 },
+ { 0x1.ffaf6ep-1, 0x1.a36454p-9 },
+ { 0x1.ffb2a6p-1, 0x1.93cb12p-9 },
+ { 0x1.ffb5bep-1, 0x1.84ba30p-9 },
+ { 0x1.ffb8b8p-1, 0x1.762d84p-9 },
+ { 0x1.ffbb98p-1, 0x1.682100p-9 },
+ { 0x1.ffbe5ap-1, 0x1.5a90b0p-9 },
+ { 0x1.ffc102p-1, 0x1.4d78bcp-9 },
+ { 0x1.ffc390p-1, 0x1.40d564p-9 },
+ { 0x1.ffc606p-1, 0x1.34a306p-9 },
+ { 0x1.ffc862p-1, 0x1.28de12p-9 },
+ { 0x1.ffcaa8p-1, 0x1.1d8318p-9 },
+ { 0x1.ffccd8p-1, 0x1.128ebap-9 },
+ { 0x1.ffcef4p-1, 0x1.07fdb4p-9 },
+ { 0x1.ffd0fap-1, 0x1.fb99b8p-10 },
+ { 0x1.ffd2eap-1, 0x1.e7f232p-10 },
+ { 0x1.ffd4cap-1, 0x1.d4fed8p-10 },
+ { 0x1.ffd696p-1, 0x1.c2b9d0p-10 },
+ { 0x1.ffd84ep-1, 0x1.b11d70p-10 },
+ { 0x1.ffd9f8p-1, 0x1.a02436p-10 },
+ { 0x1.ffdb90p-1, 0x1.8fc8c8p-10 },
+ { 0x1.ffdd18p-1, 0x1.8005f0p-10 },
+ { 0x1.ffde90p-1, 0x1.70d6a4p-10 },
+ { 0x1.ffdffap-1, 0x1.6235fcp-10 },
+ { 0x1.ffe154p-1, 0x1.541f34p-10 },
+ { 0x1.ffe2a2p-1, 0x1.468daep-10 },
+ { 0x1.ffe3e2p-1, 0x1.397ceep-10 },
+ { 0x1.ffe514p-1, 0x1.2ce898p-10 },
+ { 0x1.ffe63cp-1, 0x1.20cc76p-10 },
+ { 0x1.ffe756p-1, 0x1.15246ep-10 },
+ { 0x1.ffe866p-1, 0x1.09ec86p-10 },
+ { 0x1.ffe96ap-1, 0x1.fe41cep-11 },
+ { 0x1.ffea64p-1, 0x1.e97ba4p-11 },
+ { 0x1.ffeb54p-1, 0x1.d57f52p-11 },
+ { 0x1.ffec3ap-1, 0x1.c245d4p-11 },
+ { 0x1.ffed16p-1, 0x1.afc85ep-11 },
+ { 0x1.ffedeap-1, 0x1.9e0058p-11 },
+ { 0x1.ffeeb4p-1, 0x1.8ce75ep-11 },
+ { 0x1.ffef76p-1, 0x1.7c7744p-11 },
+ { 0x1.fff032p-1, 0x1.6caa0ep-11 },
+ { 0x1.fff0e4p-1, 0x1.5d79ecp-11 },
+ { 0x1.fff18ep-1, 0x1.4ee142p-11 },
+ { 0x1.fff232p-1, 0x1.40daa4p-11 },
+ { 0x1.fff2d0p-1, 0x1.3360ccp-11 },
+ { 0x1.fff366p-1, 0x1.266ea8p-11 },
+ { 0x1.fff3f6p-1, 0x1.19ff46p-11 },
+ { 0x1.fff480p-1, 0x1.0e0de8p-11 },
+ { 0x1.fff504p-1, 0x1.0295f0p-11 },
+ { 0x1.fff582p-1, 0x1.ef25d4p-12 },
+ { 0x1.fff5fcp-1, 0x1.da0110p-12 },
+ { 0x1.fff670p-1, 0x1.c5b542p-12 },
+ { 0x1.fff6dep-1, 0x1.b23a5ap-12 },
+ { 0x1.fff74ap-1, 0x1.9f8894p-12 },
+ { 0x1.fff7aep-1, 0x1.8d986ap-12 },
+ { 0x1.fff810p-1, 0x1.7c629ap-12 },
+ { 0x1.fff86cp-1, 0x1.6be022p-12 },
+ { 0x1.fff8c6p-1, 0x1.5c0a38p-12 },
+ { 0x1.fff91cp-1, 0x1.4cda54p-12 },
+ { 0x1.fff96cp-1, 0x1.3e4a24p-12 },
+ { 0x1.fff9bap-1, 0x1.305390p-12 },
+ { 0x1.fffa04p-1, 0x1.22f0b4p-12 },
+ { 0x1.fffa4cp-1, 0x1.161be4p-12 },
+ { 0x1.fffa90p-1, 0x1.09cfa4p-12 },
+ { 0x1.fffad0p-1, 0x1.fc0d56p-13 },
+ { 0x1.fffb0ep-1, 0x1.e577bcp-13 },
+ { 0x1.fffb4ap-1, 0x1.cfd4a6p-13 },
+ { 0x1.fffb82p-1, 0x1.bb1a96p-13 },
+ { 0x1.fffbb8p-1, 0x1.a74068p-13 },
+ { 0x1.fffbecp-1, 0x1.943d4ap-13 },
+ { 0x1.fffc1ep-1, 0x1.8208bcp-13 },
+ { 0x1.fffc4ep-1, 0x1.709a8ep-13 },
+ { 0x1.fffc7ap-1, 0x1.5feadap-13 },
+ { 0x1.fffca6p-1, 0x1.4ff208p-13 },
+ { 0x1.fffccep-1, 0x1.40a8c2p-13 },
+ { 0x1.fffcf6p-1, 0x1.3207fcp-13 },
+ { 0x1.fffd1ap-1, 0x1.2408eap-13 },
+ { 0x1.fffd3ep-1, 0x1.16a502p-13 },
+ { 0x1.fffd60p-1, 0x1.09d5f8p-13 },
+ { 0x1.fffd80p-1, 0x1.fb2b7ap-14 },
+ { 0x1.fffda0p-1, 0x1.e3bcf4p-14 },
+ { 0x1.fffdbep-1, 0x1.cd5528p-14 },
+ { 0x1.fffddap-1, 0x1.b7e946p-14 },
+ { 0x1.fffdf4p-1, 0x1.a36eecp-14 },
+ { 0x1.fffe0ep-1, 0x1.8fdc1cp-14 },
+ { 0x1.fffe26p-1, 0x1.7d2738p-14 },
+ { 0x1.fffe3ep-1, 0x1.6b4702p-14 },
+ { 0x1.fffe54p-1, 0x1.5a329cp-14 },
+ { 0x1.fffe68p-1, 0x1.49e178p-14 },
+ { 0x1.fffe7ep-1, 0x1.3a4b60p-14 },
+ { 0x1.fffe90p-1, 0x1.2b6876p-14 },
+ { 0x1.fffea2p-1, 0x1.1d3120p-14 },
+ { 0x1.fffeb4p-1, 0x1.0f9e1cp-14 },
+ { 0x1.fffec4p-1, 0x1.02a868p-14 },
+ { 0x1.fffed4p-1, 0x1.ec929ap-15 },
+ { 0x1.fffee4p-1, 0x1.d4f4b4p-15 },
+ { 0x1.fffef2p-1, 0x1.be6abcp-15 },
+ { 0x1.ffff00p-1, 0x1.a8e8ccp-15 },
+ { 0x1.ffff0cp-1, 0x1.94637ep-15 },
+ { 0x1.ffff18p-1, 0x1.80cfdcp-15 },
+ { 0x1.ffff24p-1, 0x1.6e2368p-15 },
+ { 0x1.ffff30p-1, 0x1.5c540cp-15 },
+ { 0x1.ffff3ap-1, 0x1.4b581cp-15 },
+ { 0x1.ffff44p-1, 0x1.3b2652p-15 },
+ { 0x1.ffff4ep-1, 0x1.2bb5ccp-15 },
+ { 0x1.ffff56p-1, 0x1.1cfe02p-15 },
+ { 0x1.ffff60p-1, 0x1.0ef6c4p-15 },
+ { 0x1.ffff68p-1, 0x1.019842p-15 },
+ { 0x1.ffff70p-1, 0x1.e9b5e8p-16 },
+ { 0x1.ffff78p-1, 0x1.d16f58p-16 },
+ { 0x1.ffff7ep-1, 0x1.ba4f04p-16 },
+ { 0x1.ffff84p-1, 0x1.a447b8p-16 },
+ { 0x1.ffff8cp-1, 0x1.8f4cccp-16 },
+ { 0x1.ffff92p-1, 0x1.7b5224p-16 },
+ { 0x1.ffff98p-1, 0x1.684c22p-16 },
+ { 0x1.ffff9cp-1, 0x1.562facp-16 },
+ { 0x1.ffffa2p-1, 0x1.44f21ep-16 },
+ { 0x1.ffffa6p-1, 0x1.34894ap-16 },
+ { 0x1.ffffacp-1, 0x1.24eb72p-16 },
+ { 0x1.ffffb0p-1, 0x1.160f44p-16 },
+ { 0x1.ffffb4p-1, 0x1.07ebd2p-16 },
+ { 0x1.ffffb8p-1, 0x1.f4f12ep-17 },
+ { 0x1.ffffbcp-1, 0x1.db5ad0p-17 },
+ { 0x1.ffffc0p-1, 0x1.c304f0p-17 },
+ { 0x1.ffffc4p-1, 0x1.abe09ep-17 },
+ { 0x1.ffffc6p-1, 0x1.95df98p-17 },
+ { 0x1.ffffcap-1, 0x1.80f43ap-17 },
+ { 0x1.ffffccp-1, 0x1.6d1178p-17 },
+ { 0x1.ffffd0p-1, 0x1.5a2ae0p-17 },
+ { 0x1.ffffd2p-1, 0x1.483488p-17 },
+ { 0x1.ffffd4p-1, 0x1.372310p-17 },
+ { 0x1.ffffd6p-1, 0x1.26eb9ep-17 },
+ { 0x1.ffffd8p-1, 0x1.1783cep-17 },
+ { 0x1.ffffdcp-1, 0x1.08e1bap-17 },
+ { 0x1.ffffdep-1, 0x1.f5f7d8p-18 },
+ { 0x1.ffffdep-1, 0x1.db92b6p-18 },
+ { 0x1.ffffe0p-1, 0x1.c282cep-18 },
+ { 0x1.ffffe2p-1, 0x1.aab7acp-18 },
+ { 0x1.ffffe4p-1, 0x1.94219cp-18 },
+ { 0x1.ffffe6p-1, 0x1.7eb1a2p-18 },
+ { 0x1.ffffe8p-1, 0x1.6a5972p-18 },
+ { 0x1.ffffe8p-1, 0x1.570b6ap-18 },
+ { 0x1.ffffeap-1, 0x1.44ba86p-18 },
+ { 0x1.ffffeap-1, 0x1.335a62p-18 },
+ { 0x1.ffffecp-1, 0x1.22df2ap-18 },
+ { 0x1.ffffeep-1, 0x1.133d96p-18 },
+ { 0x1.ffffeep-1, 0x1.046aeap-18 },
+ { 0x1.fffff0p-1, 0x1.ecb9d0p-19 },
+ { 0x1.fffff0p-1, 0x1.d21398p-19 },
+ { 0x1.fffff2p-1, 0x1.b8d094p-19 },
+ { 0x1.fffff2p-1, 0x1.a0df10p-19 },
+ { 0x1.fffff2p-1, 0x1.8a2e26p-19 },
+ { 0x1.fffff4p-1, 0x1.74adc8p-19 },
+ { 0x1.fffff4p-1, 0x1.604ea8p-19 },
+ { 0x1.fffff4p-1, 0x1.4d0232p-19 },
+ { 0x1.fffff6p-1, 0x1.3aba86p-19 },
+ { 0x1.fffff6p-1, 0x1.296a70p-19 },
+ { 0x1.fffff6p-1, 0x1.190562p-19 },
+ { 0x1.fffff8p-1, 0x1.097f62p-19 },
+ { 0x1.fffff8p-1, 0x1.f59a20p-20 },
+ { 0x1.fffff8p-1, 0x1.d9c736p-20 },
+ { 0x1.fffff8p-1, 0x1.bf716cp-20 },
+ { 0x1.fffffap-1, 0x1.a6852cp-20 },
+ { 0x1.fffffap-1, 0x1.8eefd8p-20 },
+ { 0x1.fffffap-1, 0x1.789fb8p-20 },
+ { 0x1.fffffap-1, 0x1.6383f8p-20 },
+ { 0x1.fffffap-1, 0x1.4f8c96p-20 },
+ { 0x1.fffffap-1, 0x1.3caa62p-20 },
+ { 0x1.fffffcp-1, 0x1.2acee2p-20 },
+ { 0x1.fffffcp-1, 0x1.19ec60p-20 },
+ { 0x1.fffffcp-1, 0x1.09f5d0p-20 },
+ { 0x1.fffffcp-1, 0x1.f5bd96p-21 },
+ { 0x1.fffffcp-1, 0x1.d9371ep-21 },
+ { 0x1.fffffcp-1, 0x1.be41dep-21 },
+ { 0x1.fffffcp-1, 0x1.a4c89ep-21 },
+ { 0x1.fffffcp-1, 0x1.8cb738p-21 },
+ { 0x1.fffffep-1, 0x1.75fa8ep-21 },
+ { 0x1.fffffep-1, 0x1.608078p-21 },
+ { 0x1.fffffep-1, 0x1.4c37c0p-21 },
+ { 0x1.fffffep-1, 0x1.39100ep-21 },
+ { 0x1.fffffep-1, 0x1.26f9e0p-21 },
+ { 0x1.fffffep-1, 0x1.15e682p-21 },
+ { 0x1.fffffep-1, 0x1.05c804p-21 },
+ { 0x1.fffffep-1, 0x1.ed2254p-22 },
+ { 0x1.fffffep-1, 0x1.d06ad6p-22 },
+ { 0x1.fffffep-1, 0x1.b551c8p-22 },
+ { 0x1.fffffep-1, 0x1.9bc0a0p-22 },
+ { 0x1.fffffep-1, 0x1.83a200p-22 },
+ { 0x1.fffffep-1, 0x1.6ce1aap-22 },
+ { 0x1.fffffep-1, 0x1.576c72p-22 },
+ { 0x1.fffffep-1, 0x1.43302cp-22 },
+ { 0x1.fffffep-1, 0x1.301ba2p-22 },
+ { 0x1.fffffep-1, 0x1.1e1e86p-22 },
+ { 0x1.fffffep-1, 0x1.0d2966p-22 },
+ { 0x1.000000p+0, 0x1.fa5b50p-23 },
+ { 0x1.000000p+0, 0x1.dc3ae4p-23 },
+ { 0x1.000000p+0, 0x1.bfd756p-23 },
+ { 0x1.000000p+0, 0x1.a517dap-23 },
+ { 0x1.000000p+0, 0x1.8be4f8p-23 },
+ { 0x1.000000p+0, 0x1.74287ep-23 },
+ { 0x1.000000p+0, 0x1.5dcd66p-23 },
+ { 0x1.000000p+0, 0x1.48bfd4p-23 },
+ { 0x1.000000p+0, 0x1.34ecf8p-23 },
+ { 0x1.000000p+0, 0x1.224310p-23 },
+ { 0x1.000000p+0, 0x1.10b148p-23 },
+ },
+};
diff --git a/contrib/arm-optimized-routines/pl/math/erfinv_24u5.c b/contrib/arm-optimized-routines/pl/math/erfinv_24u5.c
new file mode 100644
index 000000000000..20e1e361befc
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erfinv_24u5.c
@@ -0,0 +1,81 @@
+/*
+ * Double-precision inverse error function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+#include "poly_scalar_f64.h"
+#include "pl_sig.h"
+#define IGNORE_SCALAR_FENV
+#include "pl_test.h"
+
+const static struct
+{
+ /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
+ coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
+ of the denominator. */
+ double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10];
+} data = {
+ .P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7,
+ -0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3,
+ 0x1.689181bbafd0cp-3 },
+ .Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7,
+ -0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4,
+ 0x1p+0 },
+ .P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1,
+ 0x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4,
+ -0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 },
+ .Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1,
+ 0x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4,
+ -0x1.4075c56404eecp+3, 0x1p+0 },
+ .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
+ 0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
+ 0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
+ .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
+ 0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
+ 0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2,
+ 0x1p+0 }
+};
+
+/* Inverse error function approximation, based on rational approximation as
+ described in
+ J. M. Blair, C. A. Edwards, and J. H. Johnson,
+ "Rational Chebyshev approximations for the inverse of the error function",
+ Math. Comp. 30, pp. 827--830 (1976).
+ https://doi.org/10.1090/S0025-5718-1976-0421040-7
+ Largest observed error is 24.46 ULP, in the extreme tail:
+ erfinv(0x1.fd9504351b757p-1) got 0x1.ff72c1092917p+0
+ want 0x1.ff72c10929158p+0. */
+double
+erfinv (double x)
+{
+ double a = fabs (x);
+
+ if (a <= 0.75)
+ {
+ /* Largest observed error in this region is 6.06 ULP:
+ erfinv(0x1.1884650fd2d41p-2) got 0x1.fb65998cbd3fep-3
+ want 0x1.fb65998cbd404p-3. */
+ double t = x * x - 0.5625;
+ return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17);
+ }
+
+ if (a <= 0.9375)
+ {
+ /* Largest observed error in this region is 6.95 ULP:
+ erfinv(0x1.a8d65b94d8c6p-1) got 0x1.f08325591b54p-1
+ want 0x1.f08325591b547p-1. */
+ double t = x * x - 0.87890625;
+ return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37);
+ }
+
+ double t = 1.0 / (sqrt (-log (1 - a)));
+ return horner_8_f64 (t, data.P_57)
+ / (copysign (t, x) * horner_9_f64 (t, data.Q_57));
+}
+
+PL_SIG (S, D, 1, erfinv, -0.99, 0.99)
+PL_TEST_ULP (erfinv, 24.0)
+PL_TEST_INTERVAL (erfinv, 0, 1, 40000)
+PL_TEST_INTERVAL (erfinv, -0x1p-1022, -1, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erfinvf_4u7.c b/contrib/arm-optimized-routines/pl/math/erfinvf_4u7.c
new file mode 100644
index 000000000000..40736da08be8
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erfinvf_4u7.c
@@ -0,0 +1,74 @@
+/*
+ * Single-precision inverse error function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "poly_scalar_f32.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+const static struct
+{
+ /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
+ coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
+ of the denominator. */
+ float P_10[3], Q_10[4], P_29[4], Q_29[4], P_50[6], Q_50[3];
+} data = { .P_10 = { -0x1.a31268p+3, 0x1.ac9048p+4, -0x1.293ff6p+3 },
+ .Q_10 = { -0x1.8265eep+3, 0x1.ef5eaep+4, -0x1.12665p+4, 0x1p+0 },
+ .P_29
+ = { -0x1.fc0252p-4, 0x1.119d44p+0, -0x1.f59ee2p+0, 0x1.b13626p-2 },
+ .Q_29 = { -0x1.69952p-4, 0x1.c7b7d2p-1, -0x1.167d7p+1, 0x1p+0 },
+ .P_50 = { 0x1.3d8948p-3, 0x1.61f9eap+0, 0x1.61c6bcp-1,
+ -0x1.20c9f2p+0, 0x1.5c704cp-1, -0x1.50c6bep-3 },
+ .Q_50 = { 0x1.3d7dacp-3, 0x1.629e5p+0, 0x1p+0 } };
+
+/* Inverse error function approximation, based on rational approximation as
+ described in
+ J. M. Blair, C. A. Edwards, and J. H. Johnson,
+ "Rational Chebyshev approximations for the inverse of the error function",
+ Math. Comp. 30, pp. 827--830 (1976).
+ https://doi.org/10.1090/S0025-5718-1976-0421040-7
+ Largest error is 4.71 ULP, in the tail region:
+ erfinvf(0x1.f84e9ap-1) got 0x1.b8326ap+0
+ want 0x1.b83274p+0. */
+float
+erfinvf (float x)
+{
+ if (x == 1.0f)
+ return __math_oflowf (0);
+ if (x == -1.0f)
+ return __math_oflowf (1);
+
+ float a = fabsf (x);
+ if (a > 1.0f)
+ return __math_invalidf (x);
+
+ if (a <= 0.75f)
+ {
+ /* Greatest error in this region is 4.60 ULP:
+ erfinvf(0x1.0a98bap-5) got 0x1.d8a93ep-6
+ want 0x1.d8a948p-6. */
+ float t = x * x - 0.5625f;
+ return x * horner_2_f32 (t, data.P_10) / horner_3_f32 (t, data.Q_10);
+ }
+ if (a < 0.9375f)
+ {
+ /* Greatest error in this region is 3.79 ULP:
+ erfinvf(0x1.ac82d6p-1) got 0x1.f8fc54p-1
+ want 0x1.f8fc5cp-1. */
+ float t = x * x - 0.87890625f;
+ return x * horner_3_f32 (t, data.P_29) / horner_3_f32 (t, data.Q_29);
+ }
+
+ /* Tail region, where error is greatest (and sensitive to sqrt and log1p
+ implementations. */
+ float t = 1.0 / sqrtf (-log1pf (-a));
+ return horner_5_f32 (t, data.P_50)
+ / (copysignf (t, x) * horner_2_f32 (t, data.Q_50));
+}
+
+PL_SIG (S, F, 1, erfinv, -0.99, 0.99)
+PL_TEST_ULP (erfinvf, 4.09)
+PL_TEST_SYM_INTERVAL (erfinvf, 0, 1, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/erfinvl.c b/contrib/arm-optimized-routines/pl/math/erfinvl.c
new file mode 100644
index 000000000000..ea4aadfccd00
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/erfinvl.c
@@ -0,0 +1,114 @@
+/*
+ * Extended precision inverse error function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#define _GNU_SOURCE
+#include <math.h>
+#include <stdbool.h>
+#include <float.h>
+
+#include "math_config.h"
+#include "poly_scalar_f64.h"
+
+#define SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p0l
+#define HF_SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p-1l
+
+const static struct
+{
+ /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
+ coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
+ of the denominator. */
+ double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10];
+} data = {
+ .P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7,
+ -0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3,
+ 0x1.689181bbafd0cp-3 },
+ .Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7,
+ -0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4,
+ 0x1p+0 },
+ .P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1,
+ 0x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4,
+ -0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 },
+ .Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1,
+ 0x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4,
+ -0x1.4075c56404eecp+3, 0x1p+0 },
+ .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
+ 0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
+ 0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
+ .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
+ 0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
+ 0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2,
+ 0x1p+0 }
+};
+
+/* Inverse error function approximation, based on rational approximation as
+ described in
+ J. M. Blair, C. A. Edwards, and J. H. Johnson,
+ "Rational Chebyshev approximations for the inverse of the error function",
+ Math. Comp. 30, pp. 827--830 (1976).
+ https://doi.org/10.1090/S0025-5718-1976-0421040-7. */
+static inline double
+__erfinv (double x)
+{
+ if (x == 1.0)
+ return __math_oflow (0);
+ if (x == -1.0)
+ return __math_oflow (1);
+
+ double a = fabs (x);
+ if (a > 1)
+ return __math_invalid (x);
+
+ if (a <= 0.75)
+ {
+ double t = x * x - 0.5625;
+ return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17);
+ }
+
+ if (a <= 0.9375)
+ {
+ double t = x * x - 0.87890625;
+ return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37);
+ }
+
+ double t = 1.0 / (sqrtl (-log1pl (-a)));
+ return horner_8_f64 (t, data.P_57)
+ / (copysign (t, x) * horner_9_f64 (t, data.Q_57));
+}
+
+/* Extended-precision variant, which uses the above (or asymptotic estimate) as
+ starting point for Newton refinement. This implementation is a port to C of
+ the version in the SpecialFunctions.jl Julia package, with relaxed stopping
+ criteria for the Newton refinement. */
+long double
+erfinvl (long double x)
+{
+ if (x == 0)
+ return 0;
+
+ double yf = __erfinv (x);
+ long double y;
+ if (isfinite (yf))
+ y = yf;
+ else
+ {
+ /* Double overflowed, use asymptotic estimate instead. */
+ y = copysignl (sqrtl (-logl (1.0l - fabsl (x)) * SQRT_PIl), x);
+ if (!isfinite (y))
+ return y;
+ }
+
+ double eps = fabs (yf - nextafter (yf, 0));
+ while (true)
+ {
+ long double dy = HF_SQRT_PIl * (erfl (y) - x) * exp (y * y);
+ y -= dy;
+ /* Stopping criterion is different to Julia implementation, but is enough
+ to ensure result is accurate when rounded to double-precision. */
+ if (fabsl (dy) < eps)
+ break;
+ }
+ return y;
+}
diff --git a/contrib/arm-optimized-routines/pl/math/exp.c b/contrib/arm-optimized-routines/pl/math/exp.c
new file mode 100644
index 000000000000..90253b68875d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/exp.c
@@ -0,0 +1,163 @@
+/*
+ * Double-precision e^x function.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+#define N (1 << EXP_TABLE_BITS)
+#define InvLn2N __exp_data.invln2N
+#define NegLn2hiN __exp_data.negln2hiN
+#define NegLn2loN __exp_data.negln2loN
+#define Shift __exp_data.shift
+#define T __exp_data.tab
+#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
+#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
+#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
+#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
+#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]
+
+/* Handle cases that may overflow or underflow when computing the result that
+ is scale*(1+TMP) without intermediate rounding. The bit representation of
+ scale is in SBITS, however it has a computed exponent that may have
+ overflown into the sign bit so that needs to be adjusted before using it as
+ a double. (int32_t)KI is the k used in the argument reduction and exponent
+ adjustment of scale, positive k here means the result may overflow and
+ negative k means the result may underflow. */
+static inline double
+specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
+{
+ double_t scale, y;
+
+ if ((ki & 0x80000000) == 0)
+ {
+ /* k > 0, the exponent of scale might have overflowed by <= 460. */
+ sbits -= 1009ull << 52;
+ scale = asdouble (sbits);
+ y = 0x1p1009 * (scale + scale * tmp);
+ return check_oflow (eval_as_double (y));
+ }
+ /* k < 0, need special care in the subnormal range. */
+ sbits += 1022ull << 52;
+ scale = asdouble (sbits);
+ y = scale + scale * tmp;
+ if (y < 1.0)
+ {
+ /* Round y to the right precision before scaling it into the subnormal
+ range to avoid double rounding that can cause 0.5+E/2 ulp error where
+ E is the worst-case ulp error outside the subnormal range. So this
+ is only useful if the goal is better than 1 ulp worst-case error. */
+ double_t hi, lo;
+ lo = scale - y + scale * tmp;
+ hi = 1.0 + y;
+ lo = 1.0 - hi + y + lo;
+ y = eval_as_double (hi + lo) - 1.0;
+ /* Avoid -0.0 with downward rounding. */
+ if (WANT_ROUNDING && y == 0.0)
+ y = 0.0;
+ /* The underflow exception needs to be signaled explicitly. */
+ force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+ }
+ y = 0x1p-1022 * y;
+ return check_uflow (eval_as_double (y));
+}
+
+/* Top 12 bits of a double (sign and exponent bits). */
+static inline uint32_t
+top12 (double x)
+{
+ return asuint64 (x) >> 52;
+}
+
+/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+ If hastail is 0 then xtail is assumed to be 0 too. */
+static inline double
+exp_inline (double x, double xtail, int hastail)
+{
+ uint32_t abstop;
+ uint64_t ki, idx, top, sbits;
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t kd, z, r, r2, scale, tail, tmp;
+
+ abstop = top12 (x) & 0x7ff;
+ if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
+ {
+ if (abstop - top12 (0x1p-54) >= 0x80000000)
+ /* Avoid spurious underflow for tiny x. */
+ /* Note: 0 is common input. */
+ return WANT_ROUNDING ? 1.0 + x : 1.0;
+ if (abstop >= top12 (1024.0))
+ {
+ if (asuint64 (x) == asuint64 (-INFINITY))
+ return 0.0;
+ if (abstop >= top12 (INFINITY))
+ return 1.0 + x;
+ if (asuint64 (x) >> 63)
+ return __math_uflow (0);
+ else
+ return __math_oflow (0);
+ }
+ /* Large x is special cased below. */
+ abstop = 0;
+ }
+
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
+ z = InvLn2N * x;
+#if TOINT_INTRINSICS
+ kd = roundtoint (z);
+ ki = converttoint (z);
+#elif EXP_USE_TOINT_NARROW
+ /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
+ kd = eval_as_double (z + Shift);
+ ki = asuint64 (kd) >> 16;
+ kd = (double_t) (int32_t) ki;
+#else
+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
+ kd = eval_as_double (z + Shift);
+ ki = asuint64 (kd);
+ kd -= Shift;
+#endif
+ r = x + kd * NegLn2hiN + kd * NegLn2loN;
+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
+ if (hastail)
+ r += xtail;
+ /* 2^(k/N) ~= scale * (1 + tail). */
+ idx = 2 * (ki % N);
+ top = ki << (52 - EXP_TABLE_BITS);
+ tail = asdouble (T[idx]);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ sbits = T[idx + 1] + top;
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ r2 = r * r;
+ /* Without fma the worst case error is 0.25/N ulp larger. */
+ /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
+#if EXP_POLY_ORDER == 4
+ tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
+#elif EXP_POLY_ORDER == 5
+ tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+#elif EXP_POLY_ORDER == 6
+ tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
+#endif
+ if (unlikely (abstop == 0))
+ return specialcase (tmp, sbits, ki);
+ scale = asdouble (sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ return eval_as_double (scale + scale * tmp);
+}
+
+/* May be useful for implementing pow where more than double
+ precision input is needed. */
+double
+__exp_dd (double x, double xtail)
+{
+ return exp_inline (x, xtail, 1);
+}
+
diff --git a/contrib/arm-optimized-routines/pl/math/exp_data.c b/contrib/arm-optimized-routines/pl/math/exp_data.c
new file mode 100644
index 000000000000..2354be76cfab
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/exp_data.c
@@ -0,0 +1,1120 @@
+/*
+ * Shared data between exp, exp2 and pow.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << EXP_TABLE_BITS)
+
+const struct exp_data __exp_data = {
+// N/ln2
+.invln2N = 0x1.71547652b82fep0 * N,
+// -ln2/N
+#if N == 64
+.negln2hiN = -0x1.62e42fefa0000p-7,
+.negln2loN = -0x1.cf79abc9e3b3ap-46,
+#elif N == 128
+.negln2hiN = -0x1.62e42fefa0000p-8,
+.negln2loN = -0x1.cf79abc9e3b3ap-47,
+#elif N == 256
+.negln2hiN = -0x1.62e42fefc0000p-9,
+.negln2loN = 0x1.c610ca86c3899p-45,
+#elif N == 512
+.negln2hiN = -0x1.62e42fef80000p-10,
+.negln2loN = -0x1.1cf79abc9e3b4p-45,
+#endif
+// Used for rounding when !TOINT_INTRINSICS
+#if EXP_USE_TOINT_NARROW
+.shift = 0x1800000000.8p0,
+#else
+.shift = 0x1.8p52,
+#endif
+// exp polynomial coefficients.
+.poly = {
+#if N == 64 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
+// abs error: 1.5543*2^-60
+// ulp error: 0.529 (0.533 without fma)
+// if |x| < ln2/128+eps
+// abs error if |x| < ln2/64: 1.7157*2^-50
+0x1.fffffffffdbcdp-2,
+0x1.555555555444cp-3,
+0x1.555573c6a9f7dp-5,
+0x1.1111266d28935p-7,
+#elif N == 64 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
+// abs error: 1.6735*2^-64
+// ulp error: 0.518 (0.522 without fma)
+// if |x| < ln2/64
+0x1.5555555548f9ap-3,
+0x1.555555554bf5dp-5,
+0x1.11115b75f0f4dp-7,
+0x1.6c171a6b6303ep-10,
+#elif N == 128 && EXP_POLY_ORDER == 5 && !EXP_POLY_WIDE
+// abs error: 1.555*2^-66
+// ulp error: 0.509 (0.511 without fma)
+// if |x| < ln2/256+eps
+// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
+// abs error if |x| < ln2/128: 1.7145*2^-56
+0x1.ffffffffffdbdp-2,
+0x1.555555555543cp-3,
+0x1.55555cf172b91p-5,
+0x1.1111167a4d017p-7,
+#elif N == 128 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
+// abs error: 1.5542*2^-60
+// ulp error: 0.521 (0.523 without fma)
+// if |x| < ln2/128
+0x1.fffffffffdbcep-2,
+0x1.55555555543c2p-3,
+0x1.555573c64f2e3p-5,
+0x1.111126b4eff73p-7,
+#elif N == 128 && EXP_POLY_ORDER == 6 && EXP_POLY_WIDE
+// abs error: 1.6861*2^-71
+// ulp error: 0.509 (0.511 without fma)
+// if |x| < ln2/128
+0x1.55555555548fdp-3,
+0x1.555555555658fp-5,
+0x1.111123a859bb6p-7,
+0x1.6c16ba6920cabp-10,
+#elif N == 256 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
+// abs error: 1.43*2^-58
+// ulp error: 0.549 (0.550 without fma)
+// if |x| < ln2/512
+0x1p0, // unused
+0x1.fffffffffffd4p-2,
+0x1.5555571d6ef9p-3,
+0x1.5555576a5adcep-5,
+#elif N == 256 && EXP_POLY_ORDER == 5 && EXP_POLY_WIDE
+// abs error: 1.5547*2^-66
+// ulp error: 0.505 (0.506 without fma)
+// if |x| < ln2/256
+0x1.ffffffffffdbdp-2,
+0x1.555555555543cp-3,
+0x1.55555cf16e1edp-5,
+0x1.1111167a4b553p-7,
+#elif N == 512 && EXP_POLY_ORDER == 4 && !EXP_POLY_WIDE
+// abs error: 1.4300*2^-63
+// ulp error: 0.504
+// if |x| < ln2/1024
+// abs error if |x| < ln2/512: 1.0689*2^-55
+0x1p0, // unused
+0x1.ffffffffffffdp-2,
+0x1.555555c75bb6p-3,
+0x1.555555dec04a8p-5,
+#endif
+},
+.exp2_shift = 0x1.8p52 / N,
+// exp2 polynomial coefficients.
+.exp2_poly = {
+#if N == 64 && EXP2_POLY_ORDER == 6 && EXP2_POLY_WIDE
+// abs error: 1.3054*2^-63
+// ulp error: 0.515
+// if |x| < 1/64
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c58fp-3,
+0x1.c6b08d7045cf1p-5,
+0x1.3b2ab6fb8fd0ep-7,
+0x1.5d884afec48d7p-10,
+0x1.43097dc684ae1p-13,
+#elif N == 128 && EXP2_POLY_ORDER == 5 && !EXP2_POLY_WIDE
+// abs error: 1.2195*2^-65
+// ulp error: 0.507 (0.511 without fma)
+// if |x| < 1/256
+// abs error if |x| < 1/128: 1.9941*2^-56
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c424p-3,
+0x1.c6b08d70cf4b5p-5,
+0x1.3b2abd24650ccp-7,
+0x1.5d7e09b4e3a84p-10,
+#elif N == 256 && EXP2_POLY_ORDER == 5 && EXP2_POLY_WIDE
+// abs error: 1.2195*2^-65
+// ulp error: 0.504 (0.508 without fma)
+// if |x| < 1/256
+0x1.62e42fefa39efp-1,
+0x1.ebfbdff82c424p-3,
+0x1.c6b08d70cf4b5p-5,
+0x1.3b2abd24650ccp-7,
+0x1.5d7e09b4e3a84p-10,
+#elif N == 512 && EXP2_POLY_ORDER == 4 && !EXP2_POLY_WIDE
+// abs error: 1.4411*2^-64
+// ulp error: 0.5024 (0.5063 without fma)
+// if |x| < 1/1024
+// abs error if |x| < 1/512: 1.9430*2^-56
+0x1.62e42fefa39ecp-1,
+0x1.ebfbdff82c58bp-3,
+0x1.c6b08e46de41fp-5,
+0x1.3b2ab786ee1dap-7,
+#endif
+},
+// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
+// tab[2*k] = asuint64(T[k])
+// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
+.tab = {
+#if N == 64
+0x0, 0x3ff0000000000000,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0xbc93cedd78565858, 0x3feea23882552225,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+#elif N == 128
+0x0, 0x3ff0000000000000,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc93cedd78565858, 0x3feea23882552225,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+#elif N == 256
+0x0, 0x3ff0000000000000,
+0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0xbc82985dd8521d32, 0x3feff168143b0281,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c845fad437fa426, 0x3fefde5f72f654b1,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0xbc954529642b232f, 0x3fefd50a0e3c1f89,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0x3c9407fb30d06420, 0x3fefb0f145e46c85,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc65704e90c9f860, 0x3fef86a814f204ab,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0xbc897cea57e46280, 0x3fef7e95934f312e,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c8cdc1873af2155, 0x3fef582f95281c6b,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc9493684653a131, 0x3fef50e75eb44027,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0xbc98e2899077520a, 0x3fef49c18438ce4d,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0x3c9120fcd4f59273, 0x3fef42be3578a819,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0x3c877afbca90ef84, 0x3fef351ffb82140a,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c91512f082876ee, 0x3fef2e85711ece75,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0xbc803297e78260bf, 0x3fef21ba7591bb70,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0xbc91e75c40b4251e, 0x3fef157e39771b2f,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c98a911f1f7785a, 0x3fef0f961f641589,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0xbc979517a03e2847, 0x3feefeb83ba8ea32,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0xbc87430803972b34, 0x3feef431a2de883b,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc954de30ae02d94, 0x3feeef26231e754a,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c79c3bba5562a2f, 0x3feee0e544ede173,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc85a71612e21658, 0x3feedc70df1c5175,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0x3c86421f6f1d24d6, 0x3feed822c367a024,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc9348a6815fce65, 0x3feed3fb2709468a,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0x3c835c43984d9871, 0x3feecffa3f84b9d4,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0xbc632afc8d9473a0, 0x3feecc2042a7d232,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0xbc95fc5e44de020e, 0x3feec86d668b3237,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c892ca3bf144e63, 0x3feebe41b817c114,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0x3c73e34f67e67118, 0x3feeb8417f4531ee,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0xbc943a3540d1898a, 0x3feeaa11fba87a03,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0xbc951f58ddaa8090, 0x3feea84590998b93,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0xbc82e1648e50a17c, 0x3feea6a320dceb71,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
+0xbc93cedd78565858, 0x3feea23882552225,
+0xbc85c33fdf910406, 0x3feea1c1c70833f6,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0x3c81079ab5789604, 0x3feea0f4b19e9538,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0x3c727df161cd7778, 0x3feea052fa75173e,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0x3c93f9924a05b767, 0x3fee9fdcddd47645,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc88e67a9006c909, 0x3fee9f8286ead08a,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0xbc760a3629969871, 0x3feea3878491c491,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c7c88549b958471, 0x3feea9cad931a436,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c931143962f7877, 0x3feeabd0a478580f,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0x3c93e9e96f112479, 0x3feeae05bad61778,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0xbc51669428996971, 0x3feebbdd9a7670b3,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0xbc79740b58a20091, 0x3feeca5e8d07f29e,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0xbc903d5cbe27874b, 0x3feed2c980460ad8,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c5986178980fce0, 0x3feed74a8af46052,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc7274aedac8ff80, 0x3feef68415b749b1,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc302899507554e5, 0x3fef0f69c3f3a207,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0xbc80dda2d4c0010c, 0x3fef16286141b33d,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0x3c836909391181d3, 0x3fef244778fafb22,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0xbc7ac28b7bef6621, 0x3fef33405751c4db,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0xbc8cc734592af7fc, 0x3fef43155b5bab74,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0x3c87752a44f587e8, 0x3fef4b532b08c968,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0x3c8c254d16117a68, 0x3fef655d71ff6075,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0xbc776caa4c2ff1cf, 0x3fef953924676d76,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0x3c901f3a75ee0efe, 0x3fefd632798844f8,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0xbc699c7db2effc76, 0x3fefedba3692d514,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+0x3c64b458677f9840, 0x3feff9d96b2a23d9,
+#elif N == 512
+0x0, 0x3ff0000000000000,
+0xbc75d87ade1f60d5, 0x3feffd8c86da1c0a,
+0xbc84e82fc61851ac, 0x3feffb1afa5abcbf,
+0x3c9bffdaa7ac4bac, 0x3feff8ab5b2cbd11,
+0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+0x3c75c18e5ae0563a, 0x3feff3d1e77170b4,
+0xbc82985dd8521d32, 0x3feff168143b0281,
+0xbc705b1125cf49a5, 0x3fefef003103b10e,
+0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+0x3c9f879abbff3f87, 0x3fefea363d42b027,
+0x3c651e617061bfbd, 0x3fefe7d42e11bbcc,
+0x3c9b14003824712a, 0x3fefe57411915a8a,
+0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+0x3c61cbf0f38af658, 0x3fefe0b9b35659d8,
+0x3c845fad437fa426, 0x3fefde5f72f654b1,
+0xbc9a3316383dcbc5, 0x3fefdc0727fc1762,
+0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+0x3c9901c9e0e797fd, 0x3fefd75c74f0bec2,
+0xbc954529642b232f, 0x3fefd50a0e3c1f89,
+0xbc89b3236d111646, 0x3fefd2b99fa6407c,
+0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+0xbc8cb191be99b1b0, 0x3fefce1ead925493,
+0x3c8293708ef5c32e, 0x3fefcbd42b72a836,
+0xbc9acb71e83765b7, 0x3fefc98ba42e7d30,
+0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+0x3c5cd3e58b03697e, 0x3fefc50088f8093f,
+0xbc95b9280905b2a4, 0x3fefc2bdf66607e0,
+0xbc8bfb07d4755452, 0x3fefc07d61701716,
+0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+0x3c8aedeb3e7b14cd, 0x3fefbc02331b9715,
+0x3c84f31f32c4b7e7, 0x3fefb9c79b1f3919,
+0x3c9a8eb1f3d914b4, 0x3fefb78f03834e52,
+0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+0xbc85b9eb0402507b, 0x3fefb323d833d93f,
+0x3c9407fb30d06420, 0x3fefb0f145e46c85,
+0xbc93f0f225bbf3ee, 0x3fefaec0b6bdae53,
+0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+0xbc9c3fe7282d1784, 0x3fefaa65a4b520ba,
+0xbc9a5d04b3b9911b, 0x3fefa83b23395dec,
+0x3c9c8be44bf4cde8, 0x3fefa612a7b26300,
+0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+0x3c820c5444c93c44, 0x3fefa1c7c55189c6,
+0xbc937a01f0739546, 0x3fef9fa55fdfa9c5,
+0xbc84c6baeb580d7a, 0x3fef9d8503328e6d,
+0xbc6a033489906e0b, 0x3fef9b66affed31b,
+0x3c8657aa1b0d9f83, 0x3fef994a66f951ce,
+0x3c8b8268b04ef0a5, 0x3fef973028d7233e,
+0x3c62f2c7fd6ee145, 0x3fef9517f64d9ef1,
+0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+0xbc6b0b2789925e90, 0x3fef90edb6db2dc1,
+0xbc9ac46e44a2ebcc, 0x3fef8edbab5e2ab6,
+0xbc93aad17d197fae, 0x3fef8ccbae51a5c8,
+0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+0xbc989c464a07ad70, 0x3fef88b1e264a0e9,
+0xbc65704e90c9f860, 0x3fef86a814f204ab,
+0xbc72c338fce197f4, 0x3fef84a058cbae1e,
+0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+0xbc6dca724cea0eb6, 0x3fef809717425438,
+0xbc897cea57e46280, 0x3fef7e95934f312e,
+0x3c464770b955d34d, 0x3fef7c962388149e,
+0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+0xbc962811c114424f, 0x3fef789d83606e12,
+0x3c56f01429e2b9d2, 0x3fef76a45471c3c2,
+0x3c8ec58e74904dd4, 0x3fef74ad3c92df73,
+0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+0x3c8d63b0ab2d5bbf, 0x3fef70c554eaea89,
+0x3c6e653b2459034b, 0x3fef6ed48695bbc0,
+0xbc9ca9effbeeac92, 0x3fef6ce5d23816c9,
+0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+0x3c8bda920de0f6e2, 0x3fef690eba4df41f,
+0x3c92cc7ea345b7dc, 0x3fef672658375d2f,
+0xbc9a597f9a5ff71c, 0x3fef654013041dc2,
+0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+0x3c50835b125aa573, 0x3fef6179e2363cf8,
+0x3c957bfb2876ea9e, 0x3fef5f99f8138a1c,
+0x3c8aaa13d61aec1f, 0x3fef5dbc2dc40bf0,
+0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+0x3c8a4f81aa7110bd, 0x3fef5a06fb91588f,
+0x3c8cdc1873af2155, 0x3fef582f95281c6b,
+0xbc6817fd6a313e3e, 0x3fef565a51860746,
+0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+0xbc96236af85fd26a, 0x3fef52b6358e15e8,
+0xbc9493684653a131, 0x3fef50e75eb44027,
+0x3c7795eb4523abe7, 0x3fef4f1aad999e82,
+0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+0x3c8fe58b91b40095, 0x3fef4b87bf9cda38,
+0xbc98e2899077520a, 0x3fef49c18438ce4d,
+0x3c91ecaa860c614a, 0x3fef47fd7190241e,
+0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+0xbc3e45c83ba0bbcb, 0x3fef447bc96ffc18,
+0x3c9120fcd4f59273, 0x3fef42be3578a819,
+0xbc29fd3bea07b4ee, 0x3fef4102cd3d09b9,
+0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+0x3c87f1c7350e256d, 0x3fef3d9282fc1f27,
+0x3c89b788c188c9b8, 0x3fef3bdda27912d1,
+0x3c420dac6c124f4f, 0x3fef3a2af0b63bff,
+0x3c968efde3a8a894, 0x3fef387a6e756238,
+0xbc99501d09bc09fd, 0x3fef36cc1c78903a,
+0x3c877afbca90ef84, 0x3fef351ffb82140a,
+0x3c73baf864dc8675, 0x3fef33760c547f15,
+0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+0x3c91b0575c1eaf54, 0x3fef3028c65fa1ff,
+0x3c91512f082876ee, 0x3fef2e85711ece75,
+0xbc90364bc9ce33ab, 0x3fef2ce450b3cb82,
+0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+0xbc7548165d85ed32, 0x3fef29a8b16f0a30,
+0x3c9a02f0c7d75ec6, 0x3fef280e341ddf29,
+0x3c7c3b977a68e32c, 0x3fef2675eeb3ab98,
+0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+0xbc93a255f697ecfe, 0x3fef234c0ea83f36,
+0xbc803297e78260bf, 0x3fef21ba7591bb70,
+0x3c8d2d19edc1e550, 0x3fef202b17779965,
+0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+0xbc76b2173113dd8c, 0x3fef1d130f50d65c,
+0xbc95b77e5ccd9fbf, 0x3fef1b8a66d10f13,
+0x3c811aa5f853590b, 0x3fef1a03fc675d1f,
+0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+0x3c61d61a34c8aa02, 0x3fef16fde4f2e280,
+0xbc91e75c40b4251e, 0x3fef157e39771b2f,
+0xbc91f892bf6b286d, 0x3fef1400cf2f6c18,
+0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+0x3c7590c65c20e680, 0x3fef110cc15d5346,
+0x3c98a911f1f7785a, 0x3fef0f961f641589,
+0x3c86fe320b5c1e9d, 0x3fef0e21c1c14833,
+0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+0xbc903cd8b2f25790, 0x3fef0b3fd6a454d2,
+0xbc61e7c998db7dbb, 0x3fef09d24abd886b,
+0x3c7b3bf786a54a87, 0x3fef08670653dfe4,
+0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+0x3c74bb6c41732885, 0x3fef05975721b004,
+0x3c85425c11faadf4, 0x3fef0432edeeb2fd,
+0xbc99d7399abb9a8b, 0x3fef02d0cf63eeac,
+0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+0xbc5451d60c6ac9eb, 0x3fef001375752b40,
+0xbc979517a03e2847, 0x3feefeb83ba8ea32,
+0x3c8787a210ceafd9, 0x3feefd5f4fb45e20,
+0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+0xbc888d1e4629943d, 0x3feefab46484ebb4,
+0xbc800e2a46da4bee, 0x3feef96266e3fa2d,
+0xbc93369c544088b6, 0x3feef812ba4ea77d,
+0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+0x3c85373ce4eb6dfb, 0x3feef57a577dd72b,
+0xbc87430803972b34, 0x3feef431a2de883b,
+0x3c83adec8265a67f, 0x3feef2eb428335b4,
+0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+0xbc835388bcac6bc5, 0x3feef06581d3f669,
+0xbc954de30ae02d94, 0x3feeef26231e754a,
+0x3c727cdb4e4b6640, 0x3feeede91be9c811,
+0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+0x3c86c2696a26af35, 0x3feeeb761742d808,
+0xbc94f2487e1c03ec, 0x3feeea401b7140ef,
+0x3c888f6ff06b979a, 0x3feee90c7a61d55b,
+0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+0xbc89d5efaabc2030, 0x3feee6ac4bcdf3ea,
+0x3c914a5432fcb2f4, 0x3feee57fbfec6cf4,
+0xbc76b8867f91c9d6, 0x3feee4559212ef89,
+0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+0x3c94c9c0b5157fe6, 0x3feee20853c10f28,
+0x3c79c3bba5562a2f, 0x3feee0e544ede173,
+0xbc62455345b51c8e, 0x3feedfc4976d27fa,
+0x3c859f48a72a4c6d, 0x3feedea64c123422,
+0xbc93331de45477d0, 0x3feedd8a63b0a09b,
+0xbc85a71612e21658, 0x3feedc70df1c5175,
+0xbc95f84d39b39b16, 0x3feedb59bf29743f,
+0xbc9312607a28698a, 0x3feeda4504ac801c,
+0xbc72ba4dc7c4d562, 0x3feed932b07a35df,
+0x3c86421f6f1d24d6, 0x3feed822c367a024,
+0xbc844f25dc02691f, 0x3feed7153e4a136a,
+0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+0xbc888d328eb9b501, 0x3feed5016f44d8f5,
+0xbc9348a6815fce65, 0x3feed3fb2709468a,
+0x3c7f0bec42ddb15a, 0x3feed2f74a1af3f1,
+0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+0xbc615f0a2b9cd452, 0x3feed0f6d5817663,
+0x3c835c43984d9871, 0x3feecffa3f84b9d4,
+0xbc8c2e465a919e1d, 0x3feecf0018321a1a,
+0x3c4363ed60c2ac11, 0x3feece086061892d,
+0xbc865dfd02bd08f1, 0x3feecd1318eb43ec,
+0xbc632afc8d9473a0, 0x3feecc2042a7d232,
+0xbc8e68cec89b1762, 0x3feecb2fde7006f4,
+0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+0xbc48ae858eb682ca, 0x3feec9566f8827d0,
+0xbc95fc5e44de020e, 0x3feec86d668b3237,
+0x3c5dd71277c0915f, 0x3feec786d3001fe5,
+0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+0x3c92001325ecd7fb, 0x3feec5c10fa920a1,
+0xbc7ea0148327c42f, 0x3feec4e1e192aed2,
+0x3c65ace6e2870332, 0x3feec4052c5916c4,
+0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+0xbc9595c55690ffaf, 0x3feec2532feaada6,
+0xbc7a843ad1a88022, 0x3feec17dea6db7d7,
+0xbc8b401ba9fb5199, 0x3feec0ab213d5283,
+0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+0x3c6df82bf324cc57, 0x3feebf0d073537ca,
+0x3c892ca3bf144e63, 0x3feebe41b817c114,
+0x3c97cae38641c7bb, 0x3feebd78e8bb586b,
+0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+0x3c62d80c5c4a2b67, 0x3feebbeeccbd7b2a,
+0xbc902c99b04aa8b0, 0x3feebb2d81d8abff,
+0x3c8f39c10d12eaf0, 0x3feeba6eba2e35f0,
+0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+0xbc80b582d74a55d9, 0x3feeb8f8b804f127,
+0x3c73e34f67e67118, 0x3feeb8417f4531ee,
+0xbc6b4e327ff434ca, 0x3feeb78ccd3deb0d,
+0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+0xbc592dca38593e20, 0x3feeb62b00da3b14,
+0xbc75a3b1197ba0f0, 0x3feeb57de83f4eef,
+0xbc85daca9994833e, 0x3feeb4d359dfd53d,
+0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+0xbc980b4321bc6dae, 0x3feeb385df598d78,
+0x3c81bd2888075068, 0x3feeb2e2f4f6ad27,
+0xbc8390afec5241c5, 0x3feeb24298571b06,
+0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+0x3c8f15cdafe7d586, 0x3feeb1098bed1bdf,
+0xbc896be8ae89ef8f, 0x3feeb070dde910d2,
+0xbc910aa91ae9b67f, 0x3feeafdac1351819,
+0x3c93350518fdd78e, 0x3feeaf4736b527da,
+0x3c957e1b67462375, 0x3feeaeb63f4d854c,
+0xbc88e6ac90348602, 0x3feeae27dbe2c4cf,
+0x3c8124d5051552a7, 0x3feead9c0d59ca07,
+0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+0xbc3ca103952ecf1f, 0x3feeac8c32824135,
+0xbc91af7f1365c3ac, 0x3feeac0827ff07cc,
+0x3c773345c02a4fd6, 0x3feeab86b5f43d92,
+0x3c9063e1e21c5409, 0x3feeab07dd485429,
+0xbc909d2a0fce20f2, 0x3feeaa8b9ee20d1e,
+0xbc943a3540d1898a, 0x3feeaa11fba87a03,
+0xbc924f2cb4f81746, 0x3feea99af482fc8f,
+0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+0xbc943592a0a9846b, 0x3feea8b4be135acc,
+0xbc951f58ddaa8090, 0x3feea84590998b93,
+0xbc956bc85d444f4f, 0x3feea7d902d47c65,
+0x3c9432e62b64c035, 0x3feea76f15ad2148,
+0x3c914d1e4218319f, 0x3feea707ca0cbf0f,
+0xbc82e1648e50a17c, 0x3feea6a320dceb71,
+0x3c971c93709313f4, 0x3feea6411b078d26,
+0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+0x3c7f88303b60d222, 0x3feea584fd15612a,
+0x3c95f30eda98a575, 0x3feea52ae6cdf6f4,
+0x3c70125ca18d4b5b, 0x3feea4d3778bc944,
+0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+0x3c9592ea73798b11, 0x3feea42c91c56acd,
+0x3c917ecda8a72159, 0x3feea3dd1d1929fd,
+0xbc9371d6d7d75739, 0x3feea390532205d8,
+0xbc845378892be9ae, 0x3feea34634ccc320,
+0xbc8ac05fd996f807, 0x3feea2fec30678b7,
+0xbc9345f3cee1ae6e, 0x3feea2b9febc8fb7,
+0xbc91f5067d03653a, 0x3feea277e8dcc390,
+0xbc93cedd78565858, 0x3feea23882552225,
+0x3c917339c86ce3ad, 0x3feea1fbcc140be7,
+0xbc85c33fdf910406, 0x3feea1c1c70833f6,
+0xbc77e66065ba2500, 0x3feea18a7420a036,
+0x3c5710aa807e1964, 0x3feea155d44ca973,
+0x3c964c827ee6b49a, 0x3feea123e87bfb7a,
+0x3c81079ab5789604, 0x3feea0f4b19e9538,
+0xbc928311a3c73480, 0x3feea0c830a4c8d4,
+0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+0x3c882c79e185e981, 0x3feea077541ee718,
+0x3c727df161cd7778, 0x3feea052fa75173e,
+0xbc8b48cea80b043b, 0x3feea0315a736c75,
+0xbc6a12ad8734b982, 0x3feea012750bdabf,
+0xbc4f4863bc8e5180, 0x3fee9ff64b30aa09,
+0x3c93f9924a05b767, 0x3fee9fdcddd47645,
+0x3c954835dd4b7548, 0x3fee9fc62dea2f8a,
+0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+0xbc8bf41f59b59f8a, 0x3fee9fa10a38cee8,
+0xbc87557939a8b5ef, 0x3fee9f9298593ae5,
+0xbc8f652fde52775c, 0x3fee9f86e7ba9fef,
+0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+0xbc7b0300defbcf98, 0x3fee9f77ce1303f6,
+0x3c51ed2f56fa9d1a, 0x3fee9f7466f42e87,
+0xbc89dab646035dc0, 0x3fee9f73c4eaa988,
+0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+0xbc91f0c230588dde, 0x3fee9f7ad3ef9011,
+0xbc88e67a9006c909, 0x3fee9f8286ead08a,
+0x3c9106450507a28c, 0x3fee9f8d02d50b8f,
+0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+0xbc9129729a10f3a0, 0x3fee9faa5953c849,
+0x3c86597566977ac8, 0x3fee9fbd35d7cbfd,
+0x3c781a70a5124f67, 0x3fee9fd2df29ce7c,
+0xbc8619321e55e68a, 0x3fee9feb564267c9,
+0x3c941626ea62646d, 0x3feea0069c1a861d,
+0x3c92c0b7028a5c3a, 0x3feea024b1ab6e09,
+0xbc940b9f54365b7c, 0x3feea04597eeba8f,
+0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+0x3c873455e0e826c1, 0x3feea08fda749e5d,
+0x3c8a30faf49cc78c, 0x3feea0b938ac1cf6,
+0x3c94f006ad874e3e, 0x3feea0e56b7fcf03,
+0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+0xbc8f6d693d0973bb, 0x3feea14652e958aa,
+0xbc92dad3519d7b5b, 0x3feea17b0976cfdb,
+0x3c58c5ee2b7e7848, 0x3feea1b2988fb9ec,
+0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+0xbc88b25e045d207b, 0x3feea22a4456e7a3,
+0x3c87d51410fd15c2, 0x3feea26a62ff86f0,
+0xbc69cb3314060ca7, 0x3feea2ad5e2850ac,
+0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+0x3c87a0b15d19e0bb, 0x3feea33bedf2e1b9,
+0xbc760a3629969871, 0x3feea3878491c491,
+0x3c94aa7212bfa73c, 0x3feea3d5fbab091f,
+0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+0xbc81e688272a8a12, 0x3feea47b8f4abaa9,
+0x3c8b18c6e3fdef5d, 0x3feea4d2add106d9,
+0x3c4ab7b7112ec9d5, 0x3feea52cb0d1736a,
+0xbc9369b6f13b3734, 0x3feea589994cce13,
+0x3c8a1e274eed4476, 0x3feea5e968443d9a,
+0x3c90ec1ddcb1390a, 0x3feea64c1eb941f7,
+0x3c94a533a59324da, 0x3feea6b1bdadb46d,
+0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+0x3c7a56d2760d087d, 0x3feea785b91e07f1,
+0xbc522cea4f3afa1e, 0x3feea7f4179f5b21,
+0x3c91682c1c6e8b05, 0x3feea86562ab00ec,
+0xbc94d450d872576e, 0x3feea8d99b4492ed,
+0x3c89ea99cf7a9591, 0x3feea950c27004c2,
+0x3c7c88549b958471, 0x3feea9cad931a436,
+0xbc59e57d8f92ff8e, 0x3feeaa47e08e1957,
+0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+0x3c909b176e05a9cd, 0x3feeab4ac52be8f7,
+0x3c931143962f7877, 0x3feeabd0a478580f,
+0x3c711607f1952c95, 0x3feeac597875c644,
+0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+0x3c869608f0f86431, 0x3feead74029db01e,
+0x3c93e9e96f112479, 0x3feeae05bad61778,
+0xbc7f1ced15c5c5c0, 0x3feeae9a6bdb5598,
+0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+0x3c614b97be3f7b4e, 0x3feeafccbc6c19e6,
+0xbc8dac42a4a38df0, 0x3feeb06a5e0866d9,
+0x3c81c1701c359530, 0x3feeb10afc931857,
+0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+0xbc8edb1bf6809287, 0x3feeb2553499284b,
+0x3c8b99dd98b1ed84, 0x3feeb2fed0282c8a,
+0xbc8ba58ce7a736d3, 0x3feeb3ab6ccce12c,
+0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+0xbc93fc025e1db9ce, 0x3feeb50dad829e70,
+0xbc7885ad50cbb750, 0x3feeb5c353aa2fe2,
+0xbc8d737c7d71382e, 0x3feeb67bff148396,
+0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+0x3c6ae88c43905293, 0x3feeb7f669e2802b,
+0xbc82d5e85f3e0301, 0x3feeb8b82b5f98e5,
+0xbc93d1f7661fe51b, 0x3feeb97cf65253d1,
+0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+0x3c651b68797ffc1c, 0x3feebb0faccf9243,
+0xbc51669428996971, 0x3feebbdd9a7670b3,
+0x3c54579c5ceed70b, 0x3feebcae95cba768,
+0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+0x3c87298413381667, 0x3feebe59b9bddb5b,
+0x3c71f2b2c1c4c014, 0x3feebf33e47a22a2,
+0xbc905000be64e965, 0x3feec01121235681,
+0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+0xbc89fb12e3454b73, 0x3feec1d4d47f2598,
+0xbc9294f304f166b6, 0x3feec2bb4d53fe0d,
+0x3c7be2a03697693b, 0x3feec3a4dc5a3dd3,
+0x3c71affc2b91ce27, 0x3feec49182a3f090,
+0x3c90622b15810eea, 0x3feec581414380f2,
+0xbc8a1e58414c07d3, 0x3feec674194bb8d5,
+0x3be9a5ecc875d327, 0x3feec76a0bcfc15e,
+0x3c6dd235e10a73bb, 0x3feec86319e32323,
+0x3c88ea486a3350ef, 0x3feec95f4499c647,
+0xbc79740b58a20091, 0x3feeca5e8d07f29e,
+0xbc7a2ee551d4c40f, 0x3feecb60f4424fcb,
+0xbc87c50422622263, 0x3feecc667b5de565,
+0x3c89c31f7e38028b, 0x3feecd6f23701b15,
+0x3c9165830a2b96c2, 0x3feece7aed8eb8bb,
+0xbc5fac13f4e005a3, 0x3feecf89dacfe68c,
+0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+0x3c7d8aced7162e89, 0x3feed1b1231475f7,
+0xbc903d5cbe27874b, 0x3feed2c980460ad8,
+0xbc848f50cea7269f, 0x3feed3e504f696b1,
+0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+0x3c821eb9a08a0542, 0x3feed625893523d4,
+0x3c5986178980fce0, 0x3feed74a8af46052,
+0xbc6133a953131cfd, 0x3feed872b8950a73,
+0x3c90cc319cee31d2, 0x3feed99e1330b358,
+0x3c89e95e6f4a0ae4, 0x3feedacc9be14dca,
+0xbc89472975b1f2a5, 0x3feedbfe53c12e59,
+0xbc90260cf07cb311, 0x3feedd333beb0b7e,
+0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+0x3c1bca400a7b939d, 0x3feedfa6a1897fd2,
+0x3c7d8157a34b7e7f, 0x3feee0e521356eba,
+0x3c9140bc34dfc19f, 0x3feee226d59a09ee,
+0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+0xbc8c9b1da461ab87, 0x3feee4b3e100301e,
+0x3c8c8a4e231ebb7d, 0x3feee5ff3a3c2774,
+0x3c8c115f23ebea8e, 0x3feee74dcca5a413,
+0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+0xbc6dcab99f23f84e, 0x3feee9f4a17a4735,
+0xbc888c8d11a142e5, 0x3feeeb4ce622f2ff,
+0x3c60a43e8b7e4bfe, 0x3feeeca868742ee4,
+0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+0x3c915b1397075f04, 0x3feeef692a8fa8cd,
+0x3c889c2ea41433c7, 0x3feef0ce6c9a8952,
+0xbc839f7a1f04d2b0, 0x3feef236f0cf3f3a,
+0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+0xbc86a510f31e13e6, 0x3feef511c43bbd62,
+0xbc7274aedac8ff80, 0x3feef68415b749b1,
+0xbc92887ea88e7340, 0x3feef7f9ade433c6,
+0xbc90a40e3da6f640, 0x3feef9728de5593a,
+0xbc6e57ac604759ba, 0x3feefaeeb6ddfc87,
+0x3c85c620ce76df06, 0x3feefc6e29f1c52a,
+0x3c8e6c6db4f83226, 0x3feefdf0e844bfc6,
+0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+0xbc8d1bf10460dba0, 0x3fef01004b3a7804,
+0xbc8fda52e1b51e41, 0x3fef028cf22749e4,
+0x3c8e5d80813dddfc, 0x3fef041ce8e77680,
+0xbc91eee26b588a35, 0x3fef05b030a1064a,
+0x3c8caff9640f2dcb, 0x3fef0746ca7a67a7,
+0xbc32141a7b3e2cd8, 0x3fef08e0b79a6f1f,
+0x3c7a77557fd62db3, 0x3fef0a7df9285775,
+0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+0xbc651ba6128db749, 0x3fef0dc27e2cb5e5,
+0xbc302899507554e5, 0x3fef0f69c3f3a207,
+0xbc7c0ffefdc5e251, 0x3fef111462c95b60,
+0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+0xbc8b6cd058bfd6fa, 0x3fef1473b0468d30,
+0xbc80dda2d4c0010c, 0x3fef16286141b33d,
+0x3c923759b8aca76d, 0x3fef17e06ff301f4,
+0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+0xbc895498a73dac7d, 0x3fef1b5aab23e61e,
+0xbc8a007daadf8d68, 0x3fef1d1cd9fa652c,
+0x3c851de924583108, 0x3fef1ee26b34e065,
+0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+0xbc8c5fe4051ba06c, 0x3fef2277b9881650,
+0x3c836909391181d3, 0x3fef244778fafb22,
+0xbc6d1816c0a9ac07, 0x3fef261a9f8630ad,
+0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+0xbc7af5c67c4e8235, 0x3fef29cb269e601f,
+0xbc811cd7dbdf9547, 0x3fef2ba88988c933,
+0xbc8304ef0045d575, 0x3fef2d89584661a1,
+0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+0x3c8725f94f910375, 0x3fef31553dfa8313,
+0xbc7ac28b7bef6621, 0x3fef33405751c4db,
+0x3c7b53e99f9191e8, 0x3fef352ee13da7cb,
+0x3c676b2c6c921968, 0x3fef3720dcef9069,
+0xbc810a79e6d7e2b8, 0x3fef39164b994d23,
+0xbc7030587207b9e1, 0x3fef3b0f2e6d1675,
+0x3c840635f6d2a9c0, 0x3fef3d0b869d8f0f,
+0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+0x3c549eeef9ec910c, 0x3fef410e9be12cb9,
+0xbc8cc734592af7fc, 0x3fef43155b5bab74,
+0xbc8335827ffb9dce, 0x3fef451f95018d17,
+0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+0x3c645563980ef762, 0x3fef493e7ba2c38c,
+0x3c87752a44f587e8, 0x3fef4b532b08c968,
+0xbc8cd0205eb2aab2, 0x3fef4d6b596f948c,
+0xbc900dae3875a949, 0x3fef4f87080d89f2,
+0xbc8aab80ceab2b4a, 0x3fef51a638197a3c,
+0x3c85b66fefeef52e, 0x3fef53c8eacaa1d6,
+0xbc8f870f40a8ba1b, 0x3fef55ef2158a91f,
+0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+0x3c83c119f18464c5, 0x3fef5a461eec14be,
+0x3c5159d9d908a96e, 0x3fef5c76e862e6d3,
+0xbc5a628c2be4e7c7, 0x3fef5eab3a99745b,
+0xbc82919e2040220f, 0x3fef60e316c98398,
+0xbc72550d76be719a, 0x3fef631e7e2d479d,
+0x3c8c254d16117a68, 0x3fef655d71ff6075,
+0xbc82090274667d12, 0x3fef679ff37adb4a,
+0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+0x3c75f7d28150cac4, 0x3fef6c2fa45c4dfd,
+0xbc8d8c329fbd0e03, 0x3fef6e7cd63a8315,
+0x3c890de9296f4cd1, 0x3fef70cd9ab294e4,
+0x3c843a59ac016b4b, 0x3fef7321f301b460,
+0x3c832ff9978b34bc, 0x3fef7579e065807d,
+0xbc8ea6e6fbd5f2a6, 0x3fef77d5641c0658,
+0xbc7303b63dda1980, 0x3fef7a347f63c159,
+0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+0xbc81f2ba385f2f95, 0x3fef7efd81a2ece1,
+0xbc63e8e3eab2cbb4, 0x3fef81676b197d17,
+0x3c768d9144ae12fc, 0x3fef83d4f11f8220,
+0xbc892ab93b470dc9, 0x3fef864614f5a129,
+0x3c853687f542403b, 0x3fef88bad7dcee90,
+0xbc8b7966cd0d2cd9, 0x3fef8b333b16ee12,
+0xbc736ed2de40b407, 0x3fef8daf3fe592e8,
+0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+0xbc614ef56c770f3b, 0x3fef92b2334ac7ee,
+0xbc776caa4c2ff1cf, 0x3fef953924676d76,
+0x3c8df7d1353d8e88, 0x3fef97c3bc24e350,
+0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+0xbc850bed64091b8a, 0x3fef9ce3e4933c7e,
+0xbc81d5fc525d9940, 0x3fef9f7977cdb740,
+0x3c89d852381c317f, 0x3fefa212b6bc3181,
+0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+0x3c68a00e3cca04c4, 0x3fefa7503ccd2be5,
+0x3c855cd8aaea3d21, 0x3fefa9f4867cca6e,
+0xbc5a1f25ce94cae7, 0x3fefac9c80faa594,
+0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+0xbc6fb5f3ee307976, 0x3fefb1f78d802dc2,
+0x3c8269947c2bed4a, 0x3fefb4aaa2188510,
+0x3c737e8ae802b851, 0x3fefb7616ca06dd6,
+0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+0x3c875119560e34af, 0x3fefbcda28a52e59,
+0xbc83b6137e9afe9e, 0x3fefbf9c1cb6412a,
+0xbc7431c3840929c6, 0x3fefc261cbdf5be7,
+0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+0xbc8cb472d2e86b99, 0x3fefc7f860a70c22,
+0xbc69fa74878ba7c7, 0x3fefcac948dd7274,
+0x3c83f5df2fde16a8, 0x3fefcd9df15b82ac,
+0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+0x3c8eef18336b62e3, 0x3fefd35288633625,
+0x3c901f3a75ee0efe, 0x3fefd632798844f8,
+0x3c80d23f87b50a2a, 0x3fefd916302bd526,
+0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+0x3c8302dee657c8e6, 0x3fefdee8f32a4b45,
+0xbc516a9ce6ed84fa, 0x3fefe1d802243c89,
+0xbc7b0caa080df170, 0x3fefe4cadbdac61d,
+0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+0x3c7617a9f2fd24e5, 0x3fefeabbf4c0ba54,
+0xbc699c7db2effc76, 0x3fefedba3692d514,
+0x3c75f103b8fd5ca7, 0x3feff0bc4866e8ad,
+0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+0x3c8e70b094fa075a, 0x3feff6cbe15f6314,
+0x3c64b458677f9840, 0x3feff9d96b2a23d9,
+0xbc72ec9a3e5d680a, 0x3feffceaca4391b6,
+#endif
+},
+};
diff --git a/contrib/arm-optimized-routines/pl/math/expf.c b/contrib/arm-optimized-routines/pl/math/expf.c
new file mode 100644
index 000000000000..cd3cfa925c64
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/expf.c
@@ -0,0 +1,76 @@
+/*
+ * Single-precision e^x function.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+EXPF_TABLE_BITS = 5
+EXPF_POLY_ORDER = 3
+
+ULP error: 0.502 (nearest rounding.)
+Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
+Wrong count: 170635 (all nearest rounding wrong results with fma.)
+Non-nearest ULP error: 1 (rounded ULP error)
+*/
+
+#define N (1 << EXPF_TABLE_BITS)
+#define InvLn2N __expf_data.invln2_scaled
+#define T __expf_data.tab
+#define C __expf_data.poly_scaled
+
+static inline uint32_t
+top12 (float x)
+{
+ return asuint (x) >> 20;
+}
+
+float
+optr_aor_exp_f32 (float x)
+{
+ uint32_t abstop;
+ uint64_t ki, t;
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t kd, xd, z, r, r2, y, s;
+
+ xd = (double_t) x;
+ abstop = top12 (x) & 0x7ff;
+ if (unlikely (abstop >= top12 (88.0f)))
+ {
+ /* |x| >= 88 or x is nan. */
+ if (asuint (x) == asuint (-INFINITY))
+ return 0.0f;
+ if (abstop >= top12 (INFINITY))
+ return x + x;
+ if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
+ return __math_oflowf (0);
+ if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
+ return __math_uflowf (0);
+ }
+
+ /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
+ z = InvLn2N * xd;
+
+ /* Round and convert z to int, the result is in [-150*N, 128*N] and
+ ideally nearest int is used, otherwise the magnitude of r can be
+ bigger which gives larger approximation error. */
+ kd = round (z);
+ ki = lround (z);
+ r = z - kd;
+
+ /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+ t = T[ki % N];
+ t += ki << (52 - EXPF_TABLE_BITS);
+ s = asdouble (t);
+ z = C[0] * r + C[1];
+ r2 = r * r;
+ y = C[2] * r + 1;
+ y = z * r2 + y;
+ y = y * s;
+ return eval_as_float (y);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/expf_data.c b/contrib/arm-optimized-routines/pl/math/expf_data.c
new file mode 100644
index 000000000000..474ad57a29a0
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/expf_data.c
@@ -0,0 +1,31 @@
+/*
+ * Coeffs and table entries for single-precision exp. Copied from
+ * math/exp2f_data.c, with EXP2F_TABLE_BITS == 32.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << EXPF_TABLE_BITS)
+
+const struct expf_data __expf_data = {
+ /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
+ used for computing 2^(k/N) for an int |k| < 150 N as
+ double(tab[k%N] + (k << 52-BITS)) */
+ .tab = {
+0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
+0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
+0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
+0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
+0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
+0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+ },
+ .invln2_scaled = 0x1.71547652b82fep+0 * N,
+ .poly_scaled = {
+ 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
+ },
+};
diff --git a/contrib/arm-optimized-routines/pl/math/expm1_2u5.c b/contrib/arm-optimized-routines/pl/math/expm1_2u5.c
new file mode 100644
index 000000000000..f7d431198614
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/expm1_2u5.c
@@ -0,0 +1,85 @@
+/*
+ * Double-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "poly_scalar_f64.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define InvLn2 0x1.71547652b82fep0
+#define Ln2hi 0x1.62e42fefa39efp-1
+#define Ln2lo 0x1.abc9e3b39803fp-56
+#define Shift 0x1.8p52
+/* 0x1p-51, below which expm1(x) is within 2 ULP of x. */
+#define TinyBound 0x3cc0000000000000
+/* Above which expm1(x) overflows. */
+#define BigBound 0x1.63108c75a1937p+9
+/* Below which expm1(x) rounds to 1. */
+#define NegBound -0x1.740bf7c0d927dp+9
+#define AbsMask 0x7fffffffffffffff
+
+/* Approximation for exp(x) - 1 using polynomial on a reduced interval.
+ The maximum error observed error is 2.17 ULP:
+ expm1(0x1.63f90a866748dp-2) got 0x1.a9af56603878ap-2
+ want 0x1.a9af566038788p-2. */
+double
+expm1 (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ax = ix & AbsMask;
+
+ /* Tiny, +Infinity. */
+ if (ax <= TinyBound || ix == 0x7ff0000000000000)
+ return x;
+
+ /* +/-NaN. */
+ if (ax > 0x7ff0000000000000)
+ return __math_invalid (x);
+
+ /* Result is too large to be represented as a double. */
+ if (x >= 0x1.63108c75a1937p+9)
+ return __math_oflow (0);
+
+ /* Result rounds to -1 in double precision. */
+ if (x <= NegBound)
+ return -1;
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ double j = fma (InvLn2, x, Shift) - Shift;
+ int64_t i = j;
+ double f = fma (j, -Ln2hi, x);
+ f = fma (j, -Ln2lo, f);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ double f2 = f * f;
+ double f4 = f2 * f2;
+ double p = fma (f2, estrin_10_f64 (f, f2, f4, f4 * f4, __expm1_poly), f);
+
+ /* Assemble the result, using a slight rearrangement to achieve acceptable
+ accuracy.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^(i - 1). */
+ double t = ldexp (0.5, i);
+ /* expm1(x) ~= 2 * (p * t + (t - 1/2)). */
+ return 2 * fma (p, t, t - 0.5);
+}
+
+PL_SIG (S, D, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (expm1, 1.68)
+PL_TEST_SYM_INTERVAL (expm1, 0, 0x1p-51, 1000)
+PL_TEST_INTERVAL (expm1, 0x1p-51, 0x1.63108c75a1937p+9, 100000)
+PL_TEST_INTERVAL (expm1, -0x1p-51, -0x1.740bf7c0d927dp+9, 100000)
+PL_TEST_INTERVAL (expm1, 0x1.63108c75a1937p+9, inf, 100)
+PL_TEST_INTERVAL (expm1, -0x1.740bf7c0d927dp+9, -inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/expm1_data.c b/contrib/arm-optimized-routines/pl/math/expm1_data.c
new file mode 100644
index 000000000000..ff7426b90135
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/expm1_data.c
@@ -0,0 +1,21 @@
+/*
+ * Coefficients for double-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Generated using fpminimax, see tools/expm1.sollya for details. */
+const double __expm1_poly[] = {0x1p-1,
+ 0x1.5555555555559p-3,
+ 0x1.555555555554bp-5,
+ 0x1.111111110f663p-7,
+ 0x1.6c16c16c1b5f3p-10,
+ 0x1.a01a01affa35dp-13,
+ 0x1.a01a018b4ecbbp-16,
+ 0x1.71ddf82db5bb4p-19,
+ 0x1.27e517fc0d54bp-22,
+ 0x1.af5eedae67435p-26,
+ 0x1.1f143d060a28ap-29};
diff --git a/contrib/arm-optimized-routines/pl/math/expm1f_1u6.c b/contrib/arm-optimized-routines/pl/math/expm1f_1u6.c
new file mode 100644
index 000000000000..e12c9ba9a8a2
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/expm1f_1u6.c
@@ -0,0 +1,79 @@
+/*
+ * Single-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "poly_scalar_f32.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Shift (0x1.8p23f)
+#define InvLn2 (0x1.715476p+0f)
+#define Ln2hi (0x1.62e4p-1f)
+#define Ln2lo (0x1.7f7d1cp-20f)
+#define AbsMask (0x7fffffff)
+#define InfLimit \
+ (0x1.644716p6) /* Smallest value of x for which expm1(x) overflows. */
+#define NegLimit \
+ (-0x1.9bbabcp+6) /* Largest value of x for which expm1(x) rounds to 1. */
+
+/* Approximation for exp(x) - 1 using polynomial on a reduced interval.
+ The maximum error is 1.51 ULP:
+ expm1f(0x1.8baa96p-2) got 0x1.e2fb9p-2
+ want 0x1.e2fb94p-2. */
+float
+expm1f (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ax = ix & AbsMask;
+
+ /* Tiny: |x| < 0x1p-23. expm1(x) is closely approximated by x.
+ Inf: x == +Inf => expm1(x) = x. */
+ if (ax <= 0x34000000 || (ix == 0x7f800000))
+ return x;
+
+ /* +/-NaN. */
+ if (ax > 0x7f800000)
+ return __math_invalidf (x);
+
+ if (x >= InfLimit)
+ return __math_oflowf (0);
+
+ if (x <= NegLimit || ix == 0xff800000)
+ return -1;
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ float j = fmaf (InvLn2, x, Shift) - Shift;
+ int32_t i = j;
+ float f = fmaf (j, -Ln2hi, x);
+ f = fmaf (j, -Ln2lo, f);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ float p = fmaf (f * f, horner_4_f32 (f, __expm1f_poly), f);
+ /* Assemble the result, using a slight rearrangement to achieve acceptable
+ accuracy.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^(i - 1). */
+ float t = ldexpf (0.5f, i);
+ /* expm1(x) ~= 2 * (p * t + (t - 1/2)). */
+ return 2 * fmaf (p, t, t - 0.5f);
+}
+
+PL_SIG (S, F, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (expm1f, 1.02)
+PL_TEST_SYM_INTERVAL (expm1f, 0, 0x1p-23, 1000)
+PL_TEST_INTERVAL (expm1f, 0x1p-23, 0x1.644716p6, 100000)
+PL_TEST_INTERVAL (expm1f, 0x1.644716p6, inf, 1000)
+PL_TEST_INTERVAL (expm1f, -0x1p-23, -0x1.9bbabcp+6, 100000)
+PL_TEST_INTERVAL (expm1f, -0x1.9bbabcp+6, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/expm1f_data.c b/contrib/arm-optimized-routines/pl/math/expm1f_data.c
new file mode 100644
index 000000000000..9d02dc448ebb
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/expm1f_data.c
@@ -0,0 +1,12 @@
+/*
+ * Coefficients for single-precision e^x - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Generated using fpminimax, see tools/expm1f.sollya for details. */
+const float __expm1f_poly[] = {0x1.fffffep-2, 0x1.5554aep-3, 0x1.555736p-5,
+ 0x1.12287cp-7, 0x1.6b55a2p-10};
diff --git a/contrib/arm-optimized-routines/pl/math/finite_pow.h b/contrib/arm-optimized-routines/pl/math/finite_pow.h
new file mode 100644
index 000000000000..8944d4fae625
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/finite_pow.h
@@ -0,0 +1,365 @@
+/*
+ * Double-precision x^y function.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Scalar version of pow used for fallbacks in vector implementations. */
+
+/* Data is defined in v_pow_log_data.c. */
+#define N_LOG (1 << V_POW_LOG_TABLE_BITS)
+#define Off 0x3fe6955500000000
+#define As __v_pow_log_data.poly
+
+/* Data is defined in v_pow_exp_data.c. */
+#define N_EXP (1 << V_POW_EXP_TABLE_BITS)
+#define SignBias (0x800 << V_POW_EXP_TABLE_BITS)
+#define SmallExp 0x3c9 /* top12(0x1p-54). */
+#define BigExp 0x408 /* top12(512.0). */
+#define ThresExp 0x03f /* BigExp - SmallExp. */
+#define InvLn2N __v_pow_exp_data.n_over_ln2
+#define Ln2HiN __v_pow_exp_data.ln2_over_n_hi
+#define Ln2LoN __v_pow_exp_data.ln2_over_n_lo
+#define SBits __v_pow_exp_data.sbits
+#define Cs __v_pow_exp_data.poly
+
+/* Constants associated with pow. */
+#define SmallPowX 0x001 /* top12(0x1p-126). */
+#define BigPowX 0x7ff /* top12(INFINITY). */
+#define ThresPowX 0x7fe /* BigPowX - SmallPowX. */
+#define SmallPowY 0x3be /* top12(0x1.e7b6p-65). */
+#define BigPowY 0x43e /* top12(0x1.749p62). */
+#define ThresPowY 0x080 /* BigPowY - SmallPowY. */
+
+/* Top 12 bits of a double (sign and exponent bits). */
+static inline uint32_t
+top12 (double x)
+{
+ return asuint64 (x) >> 52;
+}
+
+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
+ additional 15 bits precision. IX is the bit representation of x, but
+ normalized in the subnormal range using the sign bit for the exponent. */
+static inline double
+log_inline (uint64_t ix, double *tail)
+{
+ /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ uint64_t tmp = ix - Off;
+ int i = (tmp >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1);
+ int k = (int64_t) tmp >> 52; /* arithmetic shift. */
+ uint64_t iz = ix - (tmp & 0xfffULL << 52);
+ double z = asdouble (iz);
+ double kd = (double) k;
+
+ /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
+ double invc = __v_pow_log_data.invc[i];
+ double logc = __v_pow_log_data.logc[i];
+ double logctail = __v_pow_log_data.logctail[i];
+
+ /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+ |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
+ double r = fma (z, invc, -1.0);
+
+ /* k*Ln2 + log(c) + r. */
+ double t1 = kd * __v_pow_log_data.ln2_hi + logc;
+ double t2 = t1 + r;
+ double lo1 = kd * __v_pow_log_data.ln2_lo + logctail;
+ double lo2 = t1 - t2 + r;
+
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ double ar = As[0] * r;
+ double ar2 = r * ar;
+ double ar3 = r * ar2;
+ /* k*Ln2 + log(c) + r + A[0]*r*r. */
+ double hi = t2 + ar2;
+ double lo3 = fma (ar, r, -ar2);
+ double lo4 = t2 - hi + ar2;
+ /* p = log1p(r) - r - A[0]*r*r. */
+ double p = (ar3
+ * (As[1] + r * As[2]
+ + ar2 * (As[3] + r * As[4] + ar2 * (As[5] + r * As[6]))));
+ double lo = lo1 + lo2 + lo3 + lo4 + p;
+ double y = hi + lo;
+ *tail = hi - y + lo;
+ return y;
+}
+
+/* Handle cases that may overflow or underflow when computing the result that
+ is scale*(1+TMP) without intermediate rounding. The bit representation of
+ scale is in SBITS, however it has a computed exponent that may have
+ overflown into the sign bit so that needs to be adjusted before using it as
+ a double. (int32_t)KI is the k used in the argument reduction and exponent
+ adjustment of scale, positive k here means the result may overflow and
+ negative k means the result may underflow. */
+static inline double
+special_case (double tmp, uint64_t sbits, uint64_t ki)
+{
+ double scale, y;
+
+ if ((ki & 0x80000000) == 0)
+ {
+ /* k > 0, the exponent of scale might have overflowed by <= 460. */
+ sbits -= 1009ull << 52;
+ scale = asdouble (sbits);
+ y = 0x1p1009 * (scale + scale * tmp);
+ return check_oflow (eval_as_double (y));
+ }
+ /* k < 0, need special care in the subnormal range. */
+ sbits += 1022ull << 52;
+ /* Note: sbits is signed scale. */
+ scale = asdouble (sbits);
+ y = scale + scale * tmp;
+#if WANT_SIMD_EXCEPT
+ if (fabs (y) < 1.0)
+ {
+ /* Round y to the right precision before scaling it into the subnormal
+ range to avoid double rounding that can cause 0.5+E/2 ulp error where
+ E is the worst-case ulp error outside the subnormal range. So this
+ is only useful if the goal is better than 1 ulp worst-case error. */
+ double hi, lo, one = 1.0;
+ if (y < 0.0)
+ one = -1.0;
+ lo = scale - y + scale * tmp;
+ hi = one + y;
+ lo = one - hi + y + lo;
+ y = eval_as_double (hi + lo) - one;
+ /* Fix the sign of 0. */
+ if (y == 0.0)
+ y = asdouble (sbits & 0x8000000000000000);
+ /* The underflow exception needs to be signaled explicitly. */
+ force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
+ }
+#endif
+ y = 0x1p-1022 * y;
+ return check_uflow (eval_as_double (y));
+}
+
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+ The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1. */
+static inline double
+exp_inline (double x, double xtail, uint32_t sign_bias)
+{
+ uint32_t abstop = top12 (x) & 0x7ff;
+ if (unlikely (abstop - SmallExp >= ThresExp))
+ {
+ if (abstop - SmallExp >= 0x80000000)
+ {
+ /* Avoid spurious underflow for tiny x. */
+ /* Note: 0 is common input. */
+ return sign_bias ? -1.0 : 1.0;
+ }
+ if (abstop >= top12 (1024.0))
+ {
+ /* Note: inf and nan are already handled. */
+ /* Skip errno handling. */
+#if WANT_SIMD_EXCEPT
+ return asuint64 (x) >> 63 ? __math_uflow (sign_bias)
+ : __math_oflow (sign_bias);
+#else
+ double res_uoflow = asuint64 (x) >> 63 ? 0.0 : INFINITY;
+ return sign_bias ? -res_uoflow : res_uoflow;
+#endif
+ }
+ /* Large x is special cased below. */
+ abstop = 0;
+ }
+
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
+ double z = InvLn2N * x;
+ double kd = round (z);
+ uint64_t ki = lround (z);
+ double r = x - kd * Ln2HiN - kd * Ln2LoN;
+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
+ r += xtail;
+ /* 2^(k/N) ~= scale. */
+ uint64_t idx = ki & (N_EXP - 1);
+ uint64_t top = (ki + sign_bias) << (52 - V_POW_EXP_TABLE_BITS);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ uint64_t sbits = SBits[idx] + top;
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ double r2 = r * r;
+ double tmp = r + r2 * Cs[0] + r * r2 * (Cs[1] + r * Cs[2]);
+ if (unlikely (abstop == 0))
+ return special_case (tmp, sbits, ki);
+ double scale = asdouble (sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ return eval_as_double (scale + scale * tmp);
+}
+
+/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+ A version of exp_inline that is not inlined and for which sign_bias is
+ equal to 0. */
+static double NOINLINE
+exp_nosignbias (double x, double xtail)
+{
+ uint32_t abstop = top12 (x) & 0x7ff;
+ if (unlikely (abstop - SmallExp >= ThresExp))
+ {
+ /* Avoid spurious underflow for tiny x. */
+ if (abstop - SmallExp >= 0x80000000)
+ return 1.0;
+ /* Note: inf and nan are already handled. */
+ if (abstop >= top12 (1024.0))
+#if WANT_SIMD_EXCEPT
+ return asuint64 (x) >> 63 ? __math_uflow (0) : __math_oflow (0);
+#else
+ return asuint64 (x) >> 63 ? 0.0 : INFINITY;
+#endif
+ /* Large x is special cased below. */
+ abstop = 0;
+ }
+
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N]. */
+ double z = InvLn2N * x;
+ double kd = round (z);
+ uint64_t ki = lround (z);
+ double r = x - kd * Ln2HiN - kd * Ln2LoN;
+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
+ r += xtail;
+ /* 2^(k/N) ~= scale. */
+ uint64_t idx = ki & (N_EXP - 1);
+ uint64_t top = ki << (52 - V_POW_EXP_TABLE_BITS);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ uint64_t sbits = SBits[idx] + top;
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
+ double r2 = r * r;
+ double tmp = r + r2 * Cs[0] + r * r2 * (Cs[1] + r * Cs[2]);
+ if (unlikely (abstop == 0))
+ return special_case (tmp, sbits, ki);
+ double scale = asdouble (sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ return eval_as_double (scale + scale * tmp);
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
+ the bit representation of a non-zero finite floating-point value. */
+static inline int
+checkint (uint64_t iy)
+{
+ int e = iy >> 52 & 0x7ff;
+ if (e < 0x3ff)
+ return 0;
+ if (e > 0x3ff + 52)
+ return 2;
+ if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+ return 0;
+ if (iy & (1ULL << (0x3ff + 52 - e)))
+ return 1;
+ return 2;
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan. */
+static inline int
+zeroinfnan (uint64_t i)
+{
+ return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
+}
+
+static double NOINLINE
+__pl_finite_pow (double x, double y)
+{
+ uint32_t sign_bias = 0;
+ uint64_t ix, iy;
+ uint32_t topx, topy;
+
+ ix = asuint64 (x);
+ iy = asuint64 (y);
+ topx = top12 (x);
+ topy = top12 (y);
+ if (unlikely (topx - SmallPowX >= ThresPowX
+ || (topy & 0x7ff) - SmallPowY >= ThresPowY))
+ {
+ /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
+ and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
+ /* Special cases: (x < 0x1p-126 or inf or nan) or
+ (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
+ if (unlikely (zeroinfnan (iy)))
+ {
+ if (2 * iy == 0)
+ return issignaling_inline (x) ? x + y : 1.0;
+ if (ix == asuint64 (1.0))
+ return issignaling_inline (y) ? x + y : 1.0;
+ if (2 * ix > 2 * asuint64 (INFINITY)
+ || 2 * iy > 2 * asuint64 (INFINITY))
+ return x + y;
+ if (2 * ix == 2 * asuint64 (1.0))
+ return 1.0;
+ if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
+ return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
+ return y * y;
+ }
+ if (unlikely (zeroinfnan (ix)))
+ {
+ double x2 = x * x;
+ if (ix >> 63 && checkint (iy) == 1)
+ {
+ x2 = -x2;
+ sign_bias = 1;
+ }
+#if WANT_SIMD_EXCEPT
+ if (2 * ix == 0 && iy >> 63)
+ return __math_divzero (sign_bias);
+#endif
+ /* Without the barrier some versions of clang hoist the 1/x2 and
+ thus division by zero exception can be signaled spuriously. */
+ return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
+ }
+ /* Here x and y are non-zero finite. */
+ if (ix >> 63)
+ {
+ /* Finite x < 0. */
+ int yint = checkint (iy);
+ if (yint == 0)
+#if WANT_SIMD_EXCEPT
+ return __math_invalid (x);
+#else
+ return __builtin_nan ("");
+#endif
+ if (yint == 1)
+ sign_bias = SignBias;
+ ix &= 0x7fffffffffffffff;
+ topx &= 0x7ff;
+ }
+ if ((topy & 0x7ff) - SmallPowY >= ThresPowY)
+ {
+ /* Note: sign_bias == 0 here because y is not odd. */
+ if (ix == asuint64 (1.0))
+ return 1.0;
+ /* |y| < 2^-65, x^y ~= 1 + y*log(x). */
+ if ((topy & 0x7ff) < SmallPowY)
+ return 1.0;
+#if WANT_SIMD_EXCEPT
+ return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
+ : __math_uflow (0);
+#else
+ return (ix > asuint64 (1.0)) == (topy < 0x800) ? INFINITY : 0;
+#endif
+ }
+ if (topx == 0)
+ {
+ /* Normalize subnormal x so exponent becomes negative. */
+ /* Without the barrier some versions of clang evalutate the mul
+ unconditionally causing spurious overflow exceptions. */
+ ix = asuint64 (opt_barrier_double (x) * 0x1p52);
+ ix &= 0x7fffffffffffffff;
+ ix -= 52ULL << 52;
+ }
+ }
+
+ double lo;
+ double hi = log_inline (ix, &lo);
+ double ehi = y * hi;
+ double elo = y * lo + fma (y, hi, -ehi);
+ return exp_inline (ehi, elo, sign_bias);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/include/mathlib.h b/contrib/arm-optimized-routines/pl/math/include/mathlib.h
new file mode 100644
index 000000000000..f886e7f8c07a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/include/mathlib.h
@@ -0,0 +1,206 @@
+/*
+ * Public API.
+ *
+ * Copyright (c) 2015-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _MATHLIB_H
+#define _MATHLIB_H
+
+float acosf (float);
+float acoshf (float);
+float asinf (float);
+float asinhf (float);
+float atan2f (float, float);
+float atanf (float);
+float atanhf (float);
+float cbrtf (float);
+float coshf (float);
+float cospif (float);
+float erfcf (float);
+float erff (float);
+float erfinvf (float);
+float exp10f (float);
+float expm1f (float);
+float log10f (float);
+float log1pf (float);
+float sinhf (float);
+float sinpif (float);
+float tanf (float);
+float tanhf (float);
+
+double acos (double);
+double acosh (double);
+double asin (double);
+double asinh (double);
+double atan (double);
+double atan2 (double, double);
+double atanh (double);
+double cbrt (double);
+double cosh (double);
+double cospi (double);
+double erfc (double);
+double erfinv (double);
+double exp10 (double);
+double expm1 (double);
+double log10 (double);
+double log1p (double);
+double sinh (double);
+double sinpi (double);
+double tanh (double);
+
+long double cospil (long double);
+long double erfinvl (long double);
+long double exp10l (long double);
+long double sinpil (long double);
+
+#if __aarch64__
+# if __GNUC__ >= 5
+typedef __Float32x4_t __f32x4_t;
+typedef __Float64x2_t __f64x2_t;
+# elif __clang_major__ * 100 + __clang_minor__ >= 305
+typedef __attribute__ ((__neon_vector_type__ (4))) float __f32x4_t;
+typedef __attribute__ ((__neon_vector_type__ (2))) double __f64x2_t;
+# else
+# error Unsupported compiler
+# endif
+
+# if __GNUC__ >= 9 || __clang_major__ >= 8
+# define __vpcs __attribute__ ((__aarch64_vector_pcs__))
+
+typedef struct __f32x4x2_t
+{
+ __f32x4_t val[2];
+} __f32x4x2_t;
+
+typedef struct __f64x2x2_t
+{
+ __f64x2_t val[2];
+} __f64x2x2_t;
+
+/* Vector functions following the vector PCS using ABI names. */
+__vpcs __f32x4_t _ZGVnN4v_acoshf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_acosh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_acosf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_acos (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_asinf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_asin (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_asinhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_asinh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_atanf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_atan (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4vv_atan2f (__f32x4_t, __f32x4_t);
+__vpcs __f64x2_t _ZGVnN2vv_atan2 (__f64x2_t, __f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t);
+__vpcs __f32x4x2_t _ZGVnN4v_cexpif (__f32x4_t);
+__vpcs __f64x2x2_t _ZGVnN2v_cexpi (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_cospif (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_cospi (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_erfcf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_erfc (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_erfinvf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_erfinv (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_exp10f (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_exp10 (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_exp2 (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_expm1f (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_expm1 (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4vv_hypotf (__f32x4_t, __f32x4_t);
+__vpcs __f64x2_t _ZGVnN2vv_hypot (__f64x2_t, __f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_log10f (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_log10 (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_log1pf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_log1p (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_log2f (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_log2 (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_sinhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_sinh (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_sinpif (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_sinpi (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_tanf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_tan (__f64x2_t);
+__vpcs __f32x4_t _ZGVnN4v_tanhf (__f32x4_t);
+__vpcs __f64x2_t _ZGVnN2v_tanh (__f64x2_t);
+__vpcs void _ZGVnN4vl4l4_sincosf (__f32x4_t, __f32x4_t *, __f32x4_t *);
+__vpcs void _ZGVnN2vl8l8_sincos (__f64x2_t, __f64x2_t *, __f64x2_t *);
+
+# endif
+
+# if WANT_SVE_MATH
+# include <arm_sve.h>
+svfloat32_t _ZGVsMxv_acoshf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_acosh (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_acosf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_acos (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_asinhf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_asinh (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_asinf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_asin (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_atanhf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_atanh (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxvv_atan2f (svfloat32_t, svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_atanf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_atan (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxvv_atan2 (svfloat64_t, svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_cbrtf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_cbrt (svfloat64_t, svbool_t);
+svfloat32x2_t _ZGVsMxv_cexpif (svfloat32_t, svbool_t);
+svfloat64x2_t _ZGVsMxv_cexpi (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_coshf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_cosh (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_cosf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_cospif (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_cos (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_cospi (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_erff (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_erf (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_erfc (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_erfcf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_expf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_exp (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_exp10f (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_exp10 (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_exp2f (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_exp2 (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_expm1f (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_expm1 (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxvv_hypotf (svfloat32_t, svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxvv_hypot (svfloat64_t, svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_logf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_log (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_log10f (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_log10 (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_log1pf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_log1p (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_log2f (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_log2 (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxvv_powi (svfloat32_t, svint32_t, svbool_t);
+svfloat64_t _ZGVsMxvv_powk (svfloat64_t, svint64_t, svbool_t);
+svfloat32_t _ZGVsMxvv_powf (svfloat32_t, svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxvv_pow (svfloat64_t, svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinhf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_sinh (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinf (svfloat32_t, svbool_t);
+svfloat32_t _ZGVsMxv_sinpif (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_sin (svfloat64_t, svbool_t);
+svfloat64_t _ZGVsMxv_sinpi (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_tanhf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_tanh (svfloat64_t, svbool_t);
+svfloat32_t _ZGVsMxv_tanf (svfloat32_t, svbool_t);
+svfloat64_t _ZGVsMxv_tan (svfloat64_t, svbool_t);
+void _ZGVsMxvl4l4_sincosf (svfloat32_t, float *, float *, svbool_t);
+void _ZGVsMxvl8l8_sincos (svfloat64_t, double *, double *, svbool_t);
+# endif
+
+#endif
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/include/pl_test.h b/contrib/arm-optimized-routines/pl/math/include/pl_test.h
new file mode 100644
index 000000000000..3a3407e337b8
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/include/pl_test.h
@@ -0,0 +1,24 @@
+/*
+ * PL macros to aid testing. This version of this file is used for building the
+ * routine, not the tests. Separate definitions are found in test/pl_test.h
+ * which emit test parameters.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+/* Emit max ULP threshold - silenced for building the routine. */
+#define PL_TEST_ULP(f, l)
+
+/* Emit routine name if e == 1 and f is expected to correctly trigger fenv
+ exceptions. e allows declaration to be emitted conditionally upon certain
+ build flags - defer expansion by one pass to allow those flags to be expanded
+ properly. */
+#define PL_TEST_EXPECT_FENV(f, e)
+#define PL_TEST_EXPECT_FENV_ALWAYS(f)
+
+#define PL_TEST_INTERVAL(f, lo, hi, n)
+#define PL_TEST_SYM_INTERVAL(f, lo, hi, n)
+#define PL_TEST_INTERVAL_C(f, lo, hi, n, c)
+#define PL_TEST_SYM_INTERVAL_C(f, lo, hi, n, c)
+#define PL_TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)
diff --git a/contrib/arm-optimized-routines/pl/math/log.c b/contrib/arm-optimized-routines/pl/math/log.c
new file mode 100644
index 000000000000..40b0441d981d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/log.c
@@ -0,0 +1,161 @@
+/*
+ * Double-precision log(x) function.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <float.h>
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+#define T __log_data.tab
+#define T2 __log_data.tab2
+#define B __log_data.poly1
+#define A __log_data.poly
+#define Ln2hi __log_data.ln2hi
+#define Ln2lo __log_data.ln2lo
+#define N (1 << LOG_TABLE_BITS)
+#define OFF 0x3fe6000000000000
+
+/* Top 16 bits of a double. */
+static inline uint32_t
+top16 (double x)
+{
+ return asuint64 (x) >> 48;
+}
+
+double
+optr_aor_log_f64 (double x)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+ uint64_t ix, iz, tmp;
+ uint32_t top;
+ int k, i;
+
+ ix = asuint64 (x);
+ top = top16 (x);
+
+#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
+#define LO asuint64 (1.0 - 0x1p-5)
+#define HI asuint64 (1.0 + 0x1.1p-5)
+#elif LOG_POLY1_ORDER == 12
+#define LO asuint64 (1.0 - 0x1p-4)
+#define HI asuint64 (1.0 + 0x1.09p-4)
+#endif
+ if (unlikely (ix - LO < HI - LO))
+ {
+ /* Handle close to 1.0 inputs separately. */
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
+ return 0;
+ r = x - 1.0;
+ r2 = r * r;
+ r3 = r * r2;
+#if LOG_POLY1_ORDER == 10
+ /* Worst-case error is around 0.516 ULP. */
+ y = r3
+ * (B[1] + r * B[2] + r2 * B[3]
+ + r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
+ w = B[0] * r2; /* B[0] == -0.5. */
+ hi = r + w;
+ y += r - hi + w;
+ y += hi;
+#elif LOG_POLY1_ORDER == 11
+ /* Worst-case error is around 0.516 ULP. */
+ y = r3
+ * (B[1] + r * B[2]
+ + r2
+ * (B[3] + r * B[4] + r2 * B[5]
+ + r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
+ w = B[0] * r2; /* B[0] == -0.5. */
+ hi = r + w;
+ y += r - hi + w;
+ y += hi;
+#elif LOG_POLY1_ORDER == 12
+ y = r3
+ * (B[1] + r * B[2] + r2 * B[3]
+ + r3
+ * (B[4] + r * B[5] + r2 * B[6]
+ + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+#if N <= 64
+ /* Worst-case error is around 0.532 ULP. */
+ w = B[0] * r2; /* B[0] == -0.5. */
+ hi = r + w;
+ y += r - hi + w;
+ y += hi;
+#else
+ /* Worst-case error is around 0.507 ULP. */
+ w = r * 0x1p27;
+ double_t rhi = r + w - w;
+ double_t rlo = r - rhi;
+ w = rhi * rhi * B[0]; /* B[0] == -0.5. */
+ hi = r + w;
+ lo = r - hi + w;
+ lo += B[0] * rlo * (rhi + r);
+ y += lo;
+ y += hi;
+#endif
+#endif
+ return eval_as_double (y);
+ }
+ if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
+ {
+ /* x < 0x1p-1022 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzero (1);
+ if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
+ return x;
+ if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+ return __math_invalid (x);
+ /* x is subnormal, normalize it. */
+ ix = asuint64 (x * 0x1p52);
+ ix -= 52ULL << 52;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
+ k = (int64_t) tmp >> 52; /* arithmetic shift */
+ iz = ix - (tmp & 0xfffULL << 52);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = asdouble (iz);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ /* r ~= z/c - 1, |r| < 1/(2*N). */
+#if HAVE_FAST_FMA
+ /* rounding error: 0x1p-55/N. */
+ r = fma (z, invc, -1.0);
+#else
+ /* rounding error: 0x1p-55/N + 0x1p-66. */
+ r = (z - T2[i].chi - T2[i].clo) * invc;
+#endif
+ kd = (double_t) k;
+
+ /* hi + lo = r + log(c) + k*Ln2. */
+ w = kd * Ln2hi + logc;
+ hi = w + r;
+ lo = w - hi + r + kd * Ln2lo;
+
+ /* log(x) = lo + (log1p(r) - r) + hi. */
+ r2 = r * r; /* rounding error: 0x1p-54/N^2. */
+ /* Worst case error if |y| > 0x1p-5:
+ 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
+ Worst case error if |y| > 0x1p-4:
+ 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
+#if LOG_POLY_ORDER == 6
+ y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+#elif LOG_POLY_ORDER == 7
+ y = lo
+ + r2
+ * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+ + r2 * r2 * (A[4] + r * A[5]))
+ + hi;
+#endif
+ return eval_as_double (y);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/log10_2u.c b/contrib/arm-optimized-routines/pl/math/log10_2u.c
new file mode 100644
index 000000000000..74828ea9ef3c
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/log10_2u.c
@@ -0,0 +1,150 @@
+/*
+ * Double-precision log10(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* Polynomial coefficients and lookup tables. */
+#define T __log10_data.tab
+#define T2 __log10_data.tab2
+#define B __log10_data.poly1
+#define A __log10_data.poly
+#define Ln2hi __log10_data.ln2hi
+#define Ln2lo __log10_data.ln2lo
+#define InvLn10 __log10_data.invln10
+#define N (1 << LOG10_TABLE_BITS)
+#define OFF 0x3fe6000000000000
+#define LO asuint64 (1.0 - 0x1p-4)
+#define HI asuint64 (1.0 + 0x1.09p-4)
+
+/* Top 16 bits of a double. */
+static inline uint32_t
+top16 (double x)
+{
+ return asuint64 (x) >> 48;
+}
+
+/* Fast and low accuracy implementation of log10.
+ The implementation is similar to that of math/log, except that:
+ - Polynomials are computed for log10(1+r) with r on same intervals as log.
+ - Lookup parameters are scaled (at runtime) to switch from base e to base 10.
+ Many errors above 1.59 ulp are observed across the whole range of doubles.
+ The greatest observed error is 1.61 ulp, at around 0.965:
+ log10(0x1.dc8710333a29bp-1) got -0x1.fee26884905a6p-6
+ want -0x1.fee26884905a8p-6. */
+double
+log10 (double x)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
+ uint64_t ix, iz, tmp;
+ uint32_t top;
+ int k, i;
+
+ ix = asuint64 (x);
+ top = top16 (x);
+
+ if (unlikely (ix - LO < HI - LO))
+ {
+ /* Handle close to 1.0 inputs separately. */
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
+ return 0;
+ r = x - 1.0;
+ r2 = r * r;
+ r3 = r * r2;
+ y = r3
+ * (B[1] + r * B[2] + r2 * B[3]
+ + r3
+ * (B[4] + r * B[5] + r2 * B[6]
+ + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
+ /* Worst-case error is around 0.507 ULP. */
+ w = r * 0x1p27;
+ double_t rhi = r + w - w;
+ double_t rlo = r - rhi;
+ w = rhi * rhi * B[0];
+ hi = r + w;
+ lo = r - hi + w;
+ lo += B[0] * rlo * (rhi + r);
+ y += lo;
+ y += hi;
+ /* Scale by 1/ln(10). Polynomial already contains scaling. */
+ y = y * InvLn10;
+
+ return eval_as_double (y);
+ }
+ if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
+ {
+ /* x < 0x1p-1022 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzero (1);
+ if (ix == asuint64 (INFINITY)) /* log10(inf) == inf. */
+ return x;
+ if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
+ return __math_invalid (x);
+ /* x is subnormal, normalize it. */
+ ix = asuint64 (x * 0x1p52);
+ ix -= 52ULL << 52;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (52 - LOG10_TABLE_BITS)) % N;
+ k = (int64_t) tmp >> 52; /* arithmetic shift. */
+ iz = ix - (tmp & 0xfffULL << 52);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = asdouble (iz);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ /* r ~= z/c - 1, |r| < 1/(2*N). */
+#if HAVE_FAST_FMA
+ /* rounding error: 0x1p-55/N. */
+ r = fma (z, invc, -1.0);
+#else
+ /* rounding error: 0x1p-55/N + 0x1p-66. */
+ r = (z - T2[i].chi - T2[i].clo) * invc;
+#endif
+ kd = (double_t) k;
+
+ /* w = log(c) + k*Ln2hi. */
+ w = kd * Ln2hi + logc;
+ hi = w + r;
+ lo = w - hi + r + kd * Ln2lo;
+
+ /* log10(x) = (w + r)/log(10) + (log10(1+r) - r/log(10)). */
+ r2 = r * r; /* rounding error: 0x1p-54/N^2. */
+
+ /* Scale by 1/ln(10). Polynomial already contains scaling. */
+ y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
+ y = y * InvLn10;
+
+ return eval_as_double (y);
+}
+
+// clang-format off
+#if USE_GLIBC_ABI
+strong_alias (log10, __log10_finite)
+hidden_alias (log10, __ieee754_log10)
+#if LDBL_MANT_DIG == 53
+long double
+log10l (long double x)
+{
+ return log10 (x);
+}
+#endif
+#endif
+// clang-format on
+
+PL_SIG (S, D, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (log10, 1.11)
+PL_TEST_INTERVAL (log10, 0, 0xffff000000000000, 10000)
+PL_TEST_INTERVAL (log10, 0x1p-4, 0x1p4, 40000)
+PL_TEST_INTERVAL (log10, 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/log10_data.c b/contrib/arm-optimized-routines/pl/math/log10_data.c
new file mode 100644
index 000000000000..9976f19cd6df
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/log10_data.c
@@ -0,0 +1,337 @@
+/*
+ * Data for log10.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << LOG10_TABLE_BITS)
+
+const struct log10_data __log10_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.invln10 = 0x1.bcb7b1526e50ep-2,
+.poly1 = {
+#if LOG10_POLY1_ORDER == 12
+// relative error: 0x1.c04d76cp-63
+// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
+-0x1p-1,
+0x1.5555555555577p-2,
+-0x1.ffffffffffdcbp-3,
+0x1.999999995dd0cp-3,
+-0x1.55555556745a7p-3,
+0x1.24924a344de3p-3,
+-0x1.fffffa4423d65p-4,
+0x1.c7184282ad6cap-4,
+-0x1.999eb43b068ffp-4,
+0x1.78182f7afd085p-4,
+-0x1.5521375d145cdp-4,
+#endif
+},
+.poly = {
+#if N == 128 && LOG10_POLY_ORDER == 6
+// relative error: 0x1.926199e8p-56
+// abs error: 0x1.882ff33p-65
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.555555551305bp-2,
+-0x1.fffffffeb459p-3,
+0x1.999b324f10111p-3,
+-0x1.55575e506c89fp-3,
+#endif
+},
+/* Algorithm:
+
+ x = 2^k z
+ log(x) = k ln2 + log(c) + log(z/c)
+ log(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+ tab[i].invc = 1/c
+ tab[i].logc = (double)log(c)
+ tab2[i].chi = (double)c
+ tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+ 1) the rounding error in 0x1.8p9 + logc is 0,
+ 2) the rounding error in z - chi - clo is < 0x1p-66 and
+ 3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
+
+Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
+2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
+a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log(x)| < 0x1p-4, this is not enough so that is special cased. */
+.tab = {
+#if N == 128
+{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
+{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
+{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
+{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
+{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
+{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
+{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
+{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
+{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
+{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
+{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
+{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
+{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
+{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
+{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
+{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
+{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
+{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
+{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
+{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
+{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
+{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
+{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
+{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
+{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
+{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
+{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
+{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
+{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
+{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
+{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
+{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
+{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
+{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
+{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
+{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
+{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
+{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
+{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
+{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
+{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
+{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
+{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
+{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
+{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
+{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
+{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
+{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
+{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
+{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
+{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
+{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
+{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
+{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
+{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
+{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
+{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
+{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
+{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
+{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
+{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
+{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
+{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
+{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
+{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
+{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
+{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
+{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
+{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
+{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
+{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
+{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
+{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
+{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
+{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
+{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
+{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
+{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
+{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
+{0x1.008040614b195p+0, -0x1.0040979240000p-9},
+{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
+{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
+{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
+{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
+{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
+{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
+{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
+{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
+{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
+{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
+{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
+{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
+{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
+{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
+{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
+{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
+{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
+{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
+{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
+{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
+{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
+{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
+{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
+{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
+{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
+{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
+{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
+{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
+{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
+{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
+{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
+{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
+{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
+{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
+{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
+{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
+{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
+{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
+{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
+{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
+{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
+{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
+{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
+{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
+{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
+{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
+{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
+{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
+#endif
+},
+#if !HAVE_FAST_FMA
+.tab2 = {
+#if N == 128
+{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
+{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
+{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
+{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
+{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
+{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
+{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
+{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
+{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
+{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
+{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
+{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
+{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
+{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
+{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
+{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
+{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
+{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
+{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
+{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
+{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
+{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
+{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
+{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
+{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
+{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
+{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
+{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
+{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
+{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
+{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
+{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
+{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
+{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
+{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
+{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
+{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
+{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
+{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
+{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
+{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
+{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
+{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
+{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
+{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
+{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
+{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
+{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
+{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
+{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
+{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
+{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
+{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
+{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
+{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
+{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
+{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
+{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
+{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
+{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
+{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
+{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
+{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
+{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
+{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
+{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
+{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
+{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
+{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
+{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
+{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
+{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
+{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
+{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
+{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
+{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
+{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
+{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
+{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
+{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
+{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
+{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
+{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
+{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
+{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
+{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
+{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
+{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
+{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
+{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
+{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
+{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
+{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
+{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
+{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
+{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
+{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
+{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
+{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
+{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
+{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
+{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
+{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
+{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
+{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
+{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
+{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
+{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
+{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
+{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
+{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
+{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
+{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
+{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
+{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
+{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
+{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
+{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
+{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
+{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
+{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
+{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
+{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
+{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
+{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
+{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
+{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
+{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
+#endif
+},
+#endif /* !HAVE_FAST_FMA */
+};
diff --git a/contrib/arm-optimized-routines/pl/math/log10f.c b/contrib/arm-optimized-routines/pl/math/log10f.c
new file mode 100644
index 000000000000..5c80008e4e57
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/log10f.c
@@ -0,0 +1,97 @@
+/*
+ * Single-precision log10 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* Data associated to logf:
+
+ LOGF_TABLE_BITS = 4
+ LOGF_POLY_ORDER = 4
+
+ ULP error: 0.818 (nearest rounding.)
+ Relative error: 1.957 * 2^-26 (before rounding.). */
+
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define InvLn10 __logf_data.invln10
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
+
+/* This naive implementation of log10f mimics that of log
+ then simply scales the result by 1/log(10) to switch from base e to
+ base 10. Hence, most computations are carried out in double precision.
+ Scaling before rounding to single precision is both faster and more accurate.
+
+ ULP error: 0.797 ulp (nearest rounding.). */
+float
+log10f (float x)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t z, r, r2, y, y0, invc, logc;
+ uint32_t ix, iz, tmp;
+ int k, i;
+
+ ix = asuint (x);
+#if WANT_ROUNDING
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (unlikely (ix == 0x3f800000))
+ return 0;
+#endif
+ if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+ {
+ /* x < 0x1p-126 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzerof (1);
+ if (ix == 0x7f800000) /* log(inf) == inf. */
+ return x;
+ if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+ return __math_invalidf (x);
+ /* x is subnormal, normalize it. */
+ ix = asuint (x * 0x1p23f);
+ ix -= 23 << 23;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+ k = (int32_t) tmp >> 23; /* arithmetic shift. */
+ iz = ix - (tmp & 0xff800000);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = (double_t) asfloat (iz);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ r = z * invc - 1;
+ y0 = logc + (double_t) k * Ln2;
+
+ /* Pipelined polynomial evaluation to approximate log1p(r). */
+ r2 = r * r;
+ y = A[1] * r + A[2];
+ y = A[0] * r2 + y;
+ y = y * r2 + (y0 + r);
+
+ /* Multiply by 1/log(10). */
+ y = y * InvLn10;
+
+ return eval_as_float (y);
+}
+
+PL_SIG (S, F, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (log10f, 0.30)
+PL_TEST_INTERVAL (log10f, 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (log10f, 0x1p-127, 0x1p-26, 50000)
+PL_TEST_INTERVAL (log10f, 0x1p-26, 0x1p3, 50000)
+PL_TEST_INTERVAL (log10f, 0x1p-4, 0x1p4, 50000)
+PL_TEST_INTERVAL (log10f, 0, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/log1p_2u.c b/contrib/arm-optimized-routines/pl/math/log1p_2u.c
new file mode 100644
index 000000000000..f9491ce52b44
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/log1p_2u.c
@@ -0,0 +1,131 @@
+/*
+ * Double-precision log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "poly_scalar_f64.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Ln2Hi 0x1.62e42fefa3800p-1
+#define Ln2Lo 0x1.ef35793c76730p-45
+#define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)). */
+#define OneMHfRt2Top \
+ 0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)). */
+#define OneTop12 0x3ff
+#define BottomMask 0xffffffff
+#define OneMHfRt2 0x3fd2bec333018866
+#define Rt2MOne 0x3fda827999fcef32
+#define AbsMask 0x7fffffffffffffff
+#define ExpM63 0x3c00
+
+static inline double
+eval_poly (double f)
+{
+ double f2 = f * f;
+ double f4 = f2 * f2;
+ double f8 = f4 * f4;
+ return estrin_18_f64 (f, f2, f4, f8, f8 * f8, __log1p_data.coeffs);
+}
+
+/* log1p approximation using polynomial on reduced interval. Largest
+ observed errors are near the lower boundary of the region where k
+ is 0.
+ Maximum measured error: 1.75ULP.
+ log1p(-0x1.2e1aea97b3e5cp-2) got -0x1.65fb8659a2f9p-2
+ want -0x1.65fb8659a2f92p-2. */
+double
+log1p (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & AbsMask;
+ uint32_t ia16 = ia >> 48;
+
+ /* Handle special cases first. */
+ if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000
+ || ix == 0x8000000000000000))
+ {
+ if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000)
+ {
+ /* x == -0 => log1p(x) = -0.
+ x == Inf => log1p(x) = Inf. */
+ return x;
+ }
+ if (ix == 0xbff0000000000000)
+ {
+ /* x == -1 => log1p(x) = -Inf. */
+ return __math_divzero (-1);
+ ;
+ }
+ if (ia16 >= 0x7ff0)
+ {
+ /* x == +/-NaN => log1p(x) = NaN. */
+ return __math_invalid (asdouble (ia));
+ }
+ /* x < -1 => log1p(x) = NaN.
+ x == -Inf => log1p(x) = NaN. */
+ return __math_invalid (x);
+ }
+
+ /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
+ is in [sqrt(2)/2, sqrt(2)]):
+ log1p(x) = k*log(2) + log1p(f).
+
+ f may not be representable exactly, so we need a correction term:
+ let m = round(1 + x), c = (1 + x) - m.
+ c << m: at very small x, log1p(x) ~ x, hence:
+ log(1+x) - log(m) ~ c/m.
+
+ We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */
+
+ uint64_t sign = ix & ~AbsMask;
+ if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne))
+ {
+ if (unlikely (ia16 <= ExpM63))
+ {
+ /* If exponent of x <= -63 then shortcut the polynomial and avoid
+ underflow by just returning x, which is exactly rounded in this
+ region. */
+ return x;
+ }
+ /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the
+ logic below, as k = 0 and f = x and therefore representable exactly.
+ All we need is to return the polynomial. */
+ return fma (x, eval_poly (x) * x, x);
+ }
+
+ /* Obtain correctly scaled k by manipulation in the exponent. */
+ double m = x + 1;
+ uint64_t mi = asuint64 (m);
+ uint32_t u = (mi >> 32) + OneMHfRt2Top;
+ int32_t k = (int32_t) (u >> 20) - OneTop12;
+
+ /* Correction term c/m. */
+ double cm = (x - (m - 1)) / m;
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ uint32_t utop = (u & 0x000fffff) + HfRt2Top;
+ uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
+ double f = asdouble (u_red) - 1;
+
+ /* Approximate log1p(x) on the reduced input using a polynomial. Because
+ log1p(0)=0 we choose an approximation of the form:
+ x + C0*x^2 + C1*x^3 + C2x^4 + ...
+ Hence approximation has the form f + f^2 * P(f)
+ where P(x) = C0 + C1*x + C2x^2 + ... */
+ double p = fma (f, eval_poly (f) * f, f);
+
+ double kd = k;
+ double y = fma (Ln2Lo, kd, cm);
+ return y + fma (Ln2Hi, kd, p);
+}
+
+PL_SIG (S, D, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (log1p, 1.26)
+PL_TEST_SYM_INTERVAL (log1p, 0.0, 0x1p-23, 50000)
+PL_TEST_SYM_INTERVAL (log1p, 0x1p-23, 0.001, 50000)
+PL_TEST_SYM_INTERVAL (log1p, 0.001, 1.0, 50000)
+PL_TEST_SYM_INTERVAL (log1p, 1.0, inf, 5000)
diff --git a/contrib/arm-optimized-routines/pl/math/log1p_data.c b/contrib/arm-optimized-routines/pl/math/log1p_data.c
new file mode 100644
index 000000000000..6168a0c9a214
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/log1p_data.c
@@ -0,0 +1,19 @@
+/*
+ * Data used in double-precision log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Polynomial coefficients generated using Remez algorithm, see
+ log1p.sollya for details. */
+const struct log1p_data __log1p_data = {
+ .coeffs = {-0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
+ 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
+ -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
+ 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
+ -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
+ 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
+ -0x1.cfa7385bdb37ep-6}};
diff --git a/contrib/arm-optimized-routines/pl/math/log1pf_2u1.c b/contrib/arm-optimized-routines/pl/math/log1pf_2u1.c
new file mode 100644
index 000000000000..e99174853720
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/log1pf_2u1.c
@@ -0,0 +1,161 @@
+/*
+ * Single-precision log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "poly_scalar_f32.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define Ln2 (0x1.62e43p-1f)
+#define SignMask (0x80000000)
+
+/* Biased exponent of the largest float m for which m^8 underflows. */
+#define M8UFLOW_BOUND_BEXP 112
+/* Biased exponent of the largest float for which we just return x. */
+#define TINY_BOUND_BEXP 103
+
+#define C(i) __log1pf_data.coeffs[i]
+
+static inline float
+eval_poly (float m, uint32_t e)
+{
+#ifdef LOG1PF_2U5
+
+ /* 2.5 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using
+ slightly modified Estrin scheme (no x^0 term, and x term is just x). */
+ float p_12 = fmaf (m, C (1), C (0));
+ float p_34 = fmaf (m, C (3), C (2));
+ float p_56 = fmaf (m, C (5), C (4));
+ float p_78 = fmaf (m, C (7), C (6));
+
+ float m2 = m * m;
+ float p_02 = fmaf (m2, p_12, m);
+ float p_36 = fmaf (m2, p_56, p_34);
+ float p_79 = fmaf (m2, C (8), p_78);
+
+ float m4 = m2 * m2;
+ float p_06 = fmaf (m4, p_36, p_02);
+
+ if (unlikely (e < M8UFLOW_BOUND_BEXP))
+ return p_06;
+
+ float m8 = m4 * m4;
+ return fmaf (m8, p_79, p_06);
+
+#elif defined(LOG1PF_1U3)
+
+ /* 1.3 ulp variant. Approximate log(1+m) on [-0.25, 0.5] using Horner
+ scheme. Our polynomial approximation for log1p has the form
+ x + C1 * x^2 + C2 * x^3 + C3 * x^4 + ...
+ Hence approximation has the form m + m^2 * P(m)
+ where P(x) = C1 + C2 * x + C3 * x^2 + ... . */
+ return fmaf (m, m * horner_8_f32 (m, __log1pf_data.coeffs), m);
+
+#else
+#error No log1pf approximation exists with the requested precision. Options are 13 or 25.
+#endif
+}
+
+static inline uint32_t
+biased_exponent (uint32_t ix)
+{
+ return (ix & 0x7f800000) >> 23;
+}
+
+/* log1pf approximation using polynomial on reduced interval. Worst-case error
+ when using Estrin is roughly 2.02 ULP:
+ log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
+float
+log1pf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & ~SignMask;
+ uint32_t ia12 = ia >> 20;
+ uint32_t e = biased_exponent (ix);
+
+ /* Handle special cases first. */
+ if (unlikely (ia12 >= 0x7f8 || ix >= 0xbf800000 || ix == 0x80000000
+ || e <= TINY_BOUND_BEXP))
+ {
+ if (ix == 0xff800000)
+ {
+ /* x == -Inf => log1pf(x) = NaN. */
+ return NAN;
+ }
+ if ((ix == 0x7f800000 || e <= TINY_BOUND_BEXP) && ia12 <= 0x7f8)
+ {
+ /* |x| < TinyBound => log1p(x) = x.
+ x == Inf => log1pf(x) = Inf. */
+ return x;
+ }
+ if (ix == 0xbf800000)
+ {
+ /* x == -1.0 => log1pf(x) = -Inf. */
+ return __math_divzerof (-1);
+ }
+ if (ia12 >= 0x7f8)
+ {
+ /* x == +/-NaN => log1pf(x) = NaN. */
+ return __math_invalidf (asfloat (ia));
+ }
+ /* x < -1.0 => log1pf(x) = NaN. */
+ return __math_invalidf (x);
+ }
+
+ /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+ is in [-0.25, 0.5]):
+ log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+ We approximate log1p(m) with a polynomial, then scale by
+ k*log(2). Instead of doing this directly, we use an intermediate
+ scale factor s = 4*k*log(2) to ensure the scale is representable
+ as a normalised fp32 number. */
+
+ if (ix <= 0x3f000000 || ia <= 0x3e800000)
+ {
+ /* If x is in [-0.25, 0.5] then we can shortcut all the logic
+ below, as k = 0 and m = x. All we need is to return the
+ polynomial. */
+ return eval_poly (x, e);
+ }
+
+ float m = x + 1.0f;
+
+ /* k is used scale the input. 0x3f400000 is chosen as we are trying to
+ reduce x to the range [-0.25, 0.5]. Inside this range, k is 0.
+ Outside this range, if k is reinterpreted as (NOT CONVERTED TO) float:
+ let k = sign * 2^p where sign = -1 if x < 0
+ 1 otherwise
+ and p is a negative integer whose magnitude increases with the
+ magnitude of x. */
+ int k = (asuint (m) - 0x3f400000) & 0xff800000;
+
+ /* By using integer arithmetic, we obtain the necessary scaling by
+ subtracting the unbiased exponent of k from the exponent of x. */
+ float m_scale = asfloat (asuint (x) - k);
+
+ /* Scale up to ensure that the scale factor is representable as normalised
+ fp32 number (s in [2**-126,2**26]), and scale m down accordingly. */
+ float s = asfloat (asuint (4.0f) - k);
+ m_scale = m_scale + fmaf (0.25f, s, -1.0f);
+
+ float p = eval_poly (m_scale, biased_exponent (asuint (m_scale)));
+
+ /* The scale factor to be applied back at the end - by multiplying float(k)
+ by 2^-23 we get the unbiased exponent of k. */
+ float scale_back = (float) k * 0x1.0p-23f;
+
+ /* Apply the scaling back. */
+ return fmaf (scale_back, Ln2, p);
+}
+
+PL_SIG (S, F, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (log1pf, 1.52)
+PL_TEST_SYM_INTERVAL (log1pf, 0.0, 0x1p-23, 50000)
+PL_TEST_SYM_INTERVAL (log1pf, 0x1p-23, 0.001, 50000)
+PL_TEST_SYM_INTERVAL (log1pf, 0.001, 1.0, 50000)
+PL_TEST_SYM_INTERVAL (log1pf, 1.0, inf, 5000)
diff --git a/contrib/arm-optimized-routines/pl/math/log1pf_data.c b/contrib/arm-optimized-routines/pl/math/log1pf_data.c
new file mode 100644
index 000000000000..8c92d5738fe8
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/log1pf_data.c
@@ -0,0 +1,14 @@
+/*
+ * Data used in single-precision log1p(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+
+/* Polynomial coefficients generated using floating-point minimax
+ algorithm, see tools/log1pf.sollya for details. */
+const struct log1pf_data __log1pf_data
+ = {.coeffs = {-0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
+ -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
+ -0x1.6f0d5ep-5f}};
diff --git a/contrib/arm-optimized-routines/pl/math/log_data.c b/contrib/arm-optimized-routines/pl/math/log_data.c
new file mode 100644
index 000000000000..34715e5036a3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/log_data.c
@@ -0,0 +1,511 @@
+/*
+ * Data for log.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << LOG_TABLE_BITS)
+
+const struct log_data __log_data = {
+.ln2hi = 0x1.62e42fefa3800p-1,
+.ln2lo = 0x1.ef35793c76730p-45,
+.poly1 = {
+#if LOG_POLY1_ORDER == 10
+// relative error: 0x1.32eccc6p-62
+// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
+-0x1p-1,
+0x1.55555555554e5p-2,
+-0x1.0000000000af2p-2,
+0x1.9999999bbe436p-3,
+-0x1.55555537f9cdep-3,
+0x1.24922fc8127cfp-3,
+-0x1.0000b7d6bb612p-3,
+0x1.c806ee1ddbcafp-4,
+-0x1.972335a9c2d6ep-4,
+#elif LOG_POLY1_ORDER == 11
+// relative error: 0x1.52c8b708p-68
+// in -0x1p-5 0x1.1p-5 (|log(1+x)| > 0x1p-5 outside this interval)
+-0x1p-1,
+0x1.5555555555555p-2,
+-0x1.ffffffffffea9p-3,
+0x1.999999999c4d4p-3,
+-0x1.55555557f5541p-3,
+0x1.249248fbe33e4p-3,
+-0x1.ffffc9a3c825bp-4,
+0x1.c71e1f204435dp-4,
+-0x1.9a7f26377d06ep-4,
+0x1.71c30cf8f7364p-4,
+#elif LOG_POLY1_ORDER == 12
+// relative error: 0x1.c04d76cp-63
+// in -0x1p-4 0x1.09p-4 (|log(1+x)| > 0x1p-4 outside the interval)
+-0x1p-1,
+0x1.5555555555577p-2,
+-0x1.ffffffffffdcbp-3,
+0x1.999999995dd0cp-3,
+-0x1.55555556745a7p-3,
+0x1.24924a344de3p-3,
+-0x1.fffffa4423d65p-4,
+0x1.c7184282ad6cap-4,
+-0x1.999eb43b068ffp-4,
+0x1.78182f7afd085p-4,
+-0x1.5521375d145cdp-4,
+#endif
+},
+.poly = {
+#if N == 64 && LOG_POLY_ORDER == 7
+// relative error: 0x1.906eb8ap-58
+// abs error: 0x1.d2cad5a8p-67
+// in -0x1.fp-8 0x1.fp-8
+-0x1.0000000000027p-1,
+0x1.555555555556ap-2,
+-0x1.fffffff0440bap-3,
+0x1.99999991906c3p-3,
+-0x1.555c8d7e8201ep-3,
+0x1.24978c59151fap-3,
+#elif N == 128 && LOG_POLY_ORDER == 6
+// relative error: 0x1.926199e8p-56
+// abs error: 0x1.882ff33p-65
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.555555551305bp-2,
+-0x1.fffffffeb459p-3,
+0x1.999b324f10111p-3,
+-0x1.55575e506c89fp-3,
+#elif N == 128 && LOG_POLY_ORDER == 7
+// relative error: 0x1.649fc4bp-64
+// abs error: 0x1.c3b5769p-74
+// in -0x1.fp-9 0x1.fp-9
+-0x1.0000000000001p-1,
+0x1.5555555555556p-2,
+-0x1.fffffffea1a8p-3,
+0x1.99999998e9139p-3,
+-0x1.555776801b968p-3,
+0x1.2493c29331a5cp-3,
+#endif
+},
+/* Algorithm:
+
+ x = 2^k z
+ log(x) = k ln2 + log(c) + log(z/c)
+ log(z/c) = poly(z/c - 1)
+
+where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
+into the ith one, then table entries are computed as
+
+ tab[i].invc = 1/c
+ tab[i].logc = (double)log(c)
+ tab2[i].chi = (double)c
+ tab2[i].clo = (double)(c - (double)c)
+
+where c is near the center of the subinterval and is chosen by trying +-2^29
+floating point invc candidates around 1/center and selecting one for which
+
+ 1) the rounding error in 0x1.8p9 + logc is 0,
+ 2) the rounding error in z - chi - clo is < 0x1p-66 and
+ 3) the rounding error in (double)log(c) is minimized (< 0x1p-66).
+
+Note: 1) ensures that k*ln2hi + logc can be computed without rounding error,
+2) ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to
+a single rounding error when there is no fast fma for z*invc - 1, 3) ensures
+that logc + poly(z/c - 1) has small error, however near x == 1 when
+|log(x)| < 0x1p-4, this is not enough so that is special cased. */
+.tab = {
+#if N == 64
+{0x1.7242886495cd8p+0, -0x1.79e267bdfe000p-2},
+{0x1.6e1f769340dc9p+0, -0x1.6e60ee0ecb000p-2},
+{0x1.6a13ccc8f195cp+0, -0x1.63002fdbf6000p-2},
+{0x1.661ec72e86f3ap+0, -0x1.57bf76c597000p-2},
+{0x1.623fa6c447b16p+0, -0x1.4c9e07f0d2000p-2},
+{0x1.5e75bbca31702p+0, -0x1.419b42f027000p-2},
+{0x1.5ac05655adb10p+0, -0x1.36b67660e6000p-2},
+{0x1.571ed3e940191p+0, -0x1.2bef0839e4800p-2},
+{0x1.539094ac0fbbfp+0, -0x1.21445727cb000p-2},
+{0x1.5015007e7fc42p+0, -0x1.16b5ca3c3d000p-2},
+{0x1.4cab877c31cf9p+0, -0x1.0c42d3805f800p-2},
+{0x1.49539e76a88d3p+0, -0x1.01eae61b60800p-2},
+{0x1.460cbc12211dap+0, -0x1.ef5adb9fb0000p-3},
+{0x1.42d6624debe3ap+0, -0x1.db13daab99000p-3},
+{0x1.3fb0144f0d462p+0, -0x1.c6ffbe896e000p-3},
+{0x1.3c995a1f9a9b4p+0, -0x1.b31d84722d000p-3},
+{0x1.3991c23952500p+0, -0x1.9f6c3cf6eb000p-3},
+{0x1.3698df35eaa14p+0, -0x1.8beafe7f13000p-3},
+{0x1.33ae463091760p+0, -0x1.7898db878d000p-3},
+{0x1.30d190aae3d72p+0, -0x1.6574efe4ec000p-3},
+{0x1.2e025c9203c89p+0, -0x1.527e620845000p-3},
+{0x1.2b404a7244988p+0, -0x1.3fb457d798000p-3},
+{0x1.288b01dc19544p+0, -0x1.2d1615a077000p-3},
+{0x1.25e2268085f69p+0, -0x1.1aa2b431e5000p-3},
+{0x1.23456812abb74p+0, -0x1.08598f1d2b000p-3},
+{0x1.20b4703174157p+0, -0x1.ec738fee40000p-4},
+{0x1.1e2ef308b4e9bp+0, -0x1.c885768862000p-4},
+{0x1.1bb4a36b70a3fp+0, -0x1.a4e75b6a46000p-4},
+{0x1.194538e960658p+0, -0x1.8197efba9a000p-4},
+{0x1.16e0692a10ac8p+0, -0x1.5e95ad734e000p-4},
+{0x1.1485f1ba1568bp+0, -0x1.3bdf67117c000p-4},
+{0x1.12358e123ed6fp+0, -0x1.1973b744f0000p-4},
+{0x1.0fef01de37c8dp+0, -0x1.eea33446bc000p-5},
+{0x1.0db20b82be414p+0, -0x1.aaef4ab304000p-5},
+{0x1.0b7e6f67f69b3p+0, -0x1.67c962fd2c000p-5},
+{0x1.0953f342fc108p+0, -0x1.252f29acf8000p-5},
+{0x1.0732604ec956bp+0, -0x1.c63d19e9c0000p-6},
+{0x1.051980117f9b0p+0, -0x1.432ab6a388000p-6},
+{0x1.03091aa6810f1p+0, -0x1.8244357f50000p-7},
+{0x1.01010152cf066p+0, -0x1.0080a711c0000p-8},
+{0x1.fc07ef6b6e30bp-1, 0x1.fe03018e80000p-8},
+{0x1.f4465aa1024afp-1, 0x1.7b91986450000p-6},
+{0x1.ecc07a8fd3f5ep-1, 0x1.39e88608c8000p-5},
+{0x1.e573ad856b537p-1, 0x1.b42dc6e624000p-5},
+{0x1.de5d6dc7b8057p-1, 0x1.165372ec20000p-4},
+{0x1.d77b6498bddf7p-1, 0x1.51b07a0170000p-4},
+{0x1.d0cb580315c0fp-1, 0x1.8c3465c7ea000p-4},
+{0x1.ca4b30d1cf449p-1, 0x1.c5e544a290000p-4},
+{0x1.c3f8ef4810d8ep-1, 0x1.fec91aa0a6000p-4},
+{0x1.bdd2b8b311f44p-1, 0x1.1b72acdc5c000p-3},
+{0x1.b7d6c2eeac054p-1, 0x1.371fc65a98000p-3},
+{0x1.b20363474c8f5p-1, 0x1.526e61c1aa000p-3},
+{0x1.ac570165eeab1p-1, 0x1.6d60ffc240000p-3},
+{0x1.a6d019f331df4p-1, 0x1.87fa08a013000p-3},
+{0x1.a16d3ebc9e3c3p-1, 0x1.a23bc630c3000p-3},
+{0x1.9c2d14567ef45p-1, 0x1.bc286a3512000p-3},
+{0x1.970e4efae9169p-1, 0x1.d5c2195697000p-3},
+{0x1.920fb3bd0b802p-1, 0x1.ef0ae132d3000p-3},
+{0x1.8d3018b58699ap-1, 0x1.040259974e000p-2},
+{0x1.886e5ff170ee6p-1, 0x1.1058bd40e2000p-2},
+{0x1.83c977ad35d27p-1, 0x1.1c898c1137800p-2},
+{0x1.7f405ed16c520p-1, 0x1.2895a3e65b000p-2},
+{0x1.7ad220d0335c4p-1, 0x1.347dd8f6bd000p-2},
+{0x1.767dce53474fdp-1, 0x1.4043083cb3800p-2},
+#elif N == 128
+{0x1.734f0c3e0de9fp+0, -0x1.7cc7f79e69000p-2},
+{0x1.713786a2ce91fp+0, -0x1.76feec20d0000p-2},
+{0x1.6f26008fab5a0p+0, -0x1.713e31351e000p-2},
+{0x1.6d1a61f138c7dp+0, -0x1.6b85b38287800p-2},
+{0x1.6b1490bc5b4d1p+0, -0x1.65d5590807800p-2},
+{0x1.69147332f0cbap+0, -0x1.602d076180000p-2},
+{0x1.6719f18224223p+0, -0x1.5a8ca86909000p-2},
+{0x1.6524f99a51ed9p+0, -0x1.54f4356035000p-2},
+{0x1.63356aa8f24c4p+0, -0x1.4f637c36b4000p-2},
+{0x1.614b36b9ddc14p+0, -0x1.49da7fda85000p-2},
+{0x1.5f66452c65c4cp+0, -0x1.445923989a800p-2},
+{0x1.5d867b5912c4fp+0, -0x1.3edf439b0b800p-2},
+{0x1.5babccb5b90dep+0, -0x1.396ce448f7000p-2},
+{0x1.59d61f2d91a78p+0, -0x1.3401e17bda000p-2},
+{0x1.5805612465687p+0, -0x1.2e9e2ef468000p-2},
+{0x1.56397cee76bd3p+0, -0x1.2941b3830e000p-2},
+{0x1.54725e2a77f93p+0, -0x1.23ec58cda8800p-2},
+{0x1.52aff42064583p+0, -0x1.1e9e129279000p-2},
+{0x1.50f22dbb2bddfp+0, -0x1.1956d2b48f800p-2},
+{0x1.4f38f4734ded7p+0, -0x1.141679ab9f800p-2},
+{0x1.4d843cfde2840p+0, -0x1.0edd094ef9800p-2},
+{0x1.4bd3ec078a3c8p+0, -0x1.09aa518db1000p-2},
+{0x1.4a27fc3e0258ap+0, -0x1.047e65263b800p-2},
+{0x1.4880524d48434p+0, -0x1.feb224586f000p-3},
+{0x1.46dce1b192d0bp+0, -0x1.f474a7517b000p-3},
+{0x1.453d9d3391854p+0, -0x1.ea4443d103000p-3},
+{0x1.43a2744b4845ap+0, -0x1.e020d44e9b000p-3},
+{0x1.420b54115f8fbp+0, -0x1.d60a22977f000p-3},
+{0x1.40782da3ef4b1p+0, -0x1.cc00104959000p-3},
+{0x1.3ee8f5d57fe8fp+0, -0x1.c202956891000p-3},
+{0x1.3d5d9a00b4ce9p+0, -0x1.b81178d811000p-3},
+{0x1.3bd60c010c12bp+0, -0x1.ae2c9ccd3d000p-3},
+{0x1.3a5242b75dab8p+0, -0x1.a45402e129000p-3},
+{0x1.38d22cd9fd002p+0, -0x1.9a877681df000p-3},
+{0x1.3755bc5847a1cp+0, -0x1.90c6d69483000p-3},
+{0x1.35dce49ad36e2p+0, -0x1.87120a645c000p-3},
+{0x1.34679984dd440p+0, -0x1.7d68fb4143000p-3},
+{0x1.32f5cceffcb24p+0, -0x1.73cb83c627000p-3},
+{0x1.3187775a10d49p+0, -0x1.6a39a9b376000p-3},
+{0x1.301c8373e3990p+0, -0x1.60b3154b7a000p-3},
+{0x1.2eb4ebb95f841p+0, -0x1.5737d76243000p-3},
+{0x1.2d50a0219a9d1p+0, -0x1.4dc7b8fc23000p-3},
+{0x1.2bef9a8b7fd2ap+0, -0x1.4462c51d20000p-3},
+{0x1.2a91c7a0c1babp+0, -0x1.3b08abc830000p-3},
+{0x1.293726014b530p+0, -0x1.31b996b490000p-3},
+{0x1.27dfa5757a1f5p+0, -0x1.2875490a44000p-3},
+{0x1.268b39b1d3bbfp+0, -0x1.1f3b9f879a000p-3},
+{0x1.2539d838ff5bdp+0, -0x1.160c8252ca000p-3},
+{0x1.23eb7aac9083bp+0, -0x1.0ce7f57f72000p-3},
+{0x1.22a012ba940b6p+0, -0x1.03cdc49fea000p-3},
+{0x1.2157996cc4132p+0, -0x1.f57bdbc4b8000p-4},
+{0x1.201201dd2fc9bp+0, -0x1.e370896404000p-4},
+{0x1.1ecf4494d480bp+0, -0x1.d17983ef94000p-4},
+{0x1.1d8f5528f6569p+0, -0x1.bf9674ed8a000p-4},
+{0x1.1c52311577e7cp+0, -0x1.adc79202f6000p-4},
+{0x1.1b17c74cb26e9p+0, -0x1.9c0c3e7288000p-4},
+{0x1.19e010c2c1ab6p+0, -0x1.8a646b372c000p-4},
+{0x1.18ab07bb670bdp+0, -0x1.78d01b3ac0000p-4},
+{0x1.1778a25efbcb6p+0, -0x1.674f145380000p-4},
+{0x1.1648d354c31dap+0, -0x1.55e0e6d878000p-4},
+{0x1.151b990275fddp+0, -0x1.4485cdea1e000p-4},
+{0x1.13f0ea432d24cp+0, -0x1.333d94d6aa000p-4},
+{0x1.12c8b7210f9dap+0, -0x1.22079f8c56000p-4},
+{0x1.11a3028ecb531p+0, -0x1.10e4698622000p-4},
+{0x1.107fbda8434afp+0, -0x1.ffa6c6ad20000p-5},
+{0x1.0f5ee0f4e6bb3p+0, -0x1.dda8d4a774000p-5},
+{0x1.0e4065d2a9fcep+0, -0x1.bbcece4850000p-5},
+{0x1.0d244632ca521p+0, -0x1.9a1894012c000p-5},
+{0x1.0c0a77ce2981ap+0, -0x1.788583302c000p-5},
+{0x1.0af2f83c636d1p+0, -0x1.5715e67d68000p-5},
+{0x1.09ddb98a01339p+0, -0x1.35c8a49658000p-5},
+{0x1.08cabaf52e7dfp+0, -0x1.149e364154000p-5},
+{0x1.07b9f2f4e28fbp+0, -0x1.e72c082eb8000p-6},
+{0x1.06ab58c358f19p+0, -0x1.a55f152528000p-6},
+{0x1.059eea5ecf92cp+0, -0x1.63d62cf818000p-6},
+{0x1.04949cdd12c90p+0, -0x1.228fb8caa0000p-6},
+{0x1.038c6c6f0ada9p+0, -0x1.c317b20f90000p-7},
+{0x1.02865137932a9p+0, -0x1.419355daa0000p-7},
+{0x1.0182427ea7348p+0, -0x1.81203c2ec0000p-8},
+{0x1.008040614b195p+0, -0x1.0040979240000p-9},
+{0x1.fe01ff726fa1ap-1, 0x1.feff384900000p-9},
+{0x1.fa11cc261ea74p-1, 0x1.7dc41353d0000p-7},
+{0x1.f6310b081992ep-1, 0x1.3cea3c4c28000p-6},
+{0x1.f25f63ceeadcdp-1, 0x1.b9fc114890000p-6},
+{0x1.ee9c8039113e7p-1, 0x1.1b0d8ce110000p-5},
+{0x1.eae8078cbb1abp-1, 0x1.58a5bd001c000p-5},
+{0x1.e741aa29d0c9bp-1, 0x1.95c8340d88000p-5},
+{0x1.e3a91830a99b5p-1, 0x1.d276aef578000p-5},
+{0x1.e01e009609a56p-1, 0x1.07598e598c000p-4},
+{0x1.dca01e577bb98p-1, 0x1.253f5e30d2000p-4},
+{0x1.d92f20b7c9103p-1, 0x1.42edd8b380000p-4},
+{0x1.d5cac66fb5ccep-1, 0x1.606598757c000p-4},
+{0x1.d272caa5ede9dp-1, 0x1.7da76356a0000p-4},
+{0x1.cf26e3e6b2ccdp-1, 0x1.9ab434e1c6000p-4},
+{0x1.cbe6da2a77902p-1, 0x1.b78c7bb0d6000p-4},
+{0x1.c8b266d37086dp-1, 0x1.d431332e72000p-4},
+{0x1.c5894bd5d5804p-1, 0x1.f0a3171de6000p-4},
+{0x1.c26b533bb9f8cp-1, 0x1.067152b914000p-3},
+{0x1.bf583eeece73fp-1, 0x1.147858292b000p-3},
+{0x1.bc4fd75db96c1p-1, 0x1.2266ecdca3000p-3},
+{0x1.b951e0c864a28p-1, 0x1.303d7a6c55000p-3},
+{0x1.b65e2c5ef3e2cp-1, 0x1.3dfc33c331000p-3},
+{0x1.b374867c9888bp-1, 0x1.4ba366b7a8000p-3},
+{0x1.b094b211d304ap-1, 0x1.5933928d1f000p-3},
+{0x1.adbe885f2ef7ep-1, 0x1.66acd2418f000p-3},
+{0x1.aaf1d31603da2p-1, 0x1.740f8ec669000p-3},
+{0x1.a82e63fd358a7p-1, 0x1.815c0f51af000p-3},
+{0x1.a5740ef09738bp-1, 0x1.8e92954f68000p-3},
+{0x1.a2c2a90ab4b27p-1, 0x1.9bb3602f84000p-3},
+{0x1.a01a01393f2d1p-1, 0x1.a8bed1c2c0000p-3},
+{0x1.9d79f24db3c1bp-1, 0x1.b5b515c01d000p-3},
+{0x1.9ae2505c7b190p-1, 0x1.c2967ccbcc000p-3},
+{0x1.9852ef297ce2fp-1, 0x1.cf635d5486000p-3},
+{0x1.95cbaeea44b75p-1, 0x1.dc1bd3446c000p-3},
+{0x1.934c69de74838p-1, 0x1.e8c01b8cfe000p-3},
+{0x1.90d4f2f6752e6p-1, 0x1.f5509c0179000p-3},
+{0x1.8e6528effd79dp-1, 0x1.00e6c121fb800p-2},
+{0x1.8bfce9fcc007cp-1, 0x1.071b80e93d000p-2},
+{0x1.899c0dabec30ep-1, 0x1.0d46b9e867000p-2},
+{0x1.87427aa2317fbp-1, 0x1.13687334bd000p-2},
+{0x1.84f00acb39a08p-1, 0x1.1980d67234800p-2},
+{0x1.82a49e8653e55p-1, 0x1.1f8ffe0cc8000p-2},
+{0x1.8060195f40260p-1, 0x1.2595fd7636800p-2},
+{0x1.7e22563e0a329p-1, 0x1.2b9300914a800p-2},
+{0x1.7beb377dcb5adp-1, 0x1.3187210436000p-2},
+{0x1.79baa679725c2p-1, 0x1.377266dec1800p-2},
+{0x1.77907f2170657p-1, 0x1.3d54ffbaf3000p-2},
+{0x1.756cadbd6130cp-1, 0x1.432eee32fe000p-2},
+#endif
+},
+#if !HAVE_FAST_FMA
+.tab2 = {
+#if N == 64
+{0x1.61ffff94c4fecp-1, -0x1.9fe4fc998f325p-56},
+{0x1.66000020377ddp-1, 0x1.e804c7a9519f2p-55},
+{0x1.6a00004c41678p-1, 0x1.902c675d9ecfep-55},
+{0x1.6dffff7384f87p-1, -0x1.2fd6b95e55043p-56},
+{0x1.720000b37216ep-1, 0x1.802bc8d437043p-55},
+{0x1.75ffffbeb3c9dp-1, 0x1.6047ad0a0d4e4p-57},
+{0x1.7a0000628daep-1, -0x1.e00434b49313dp-56},
+{0x1.7dffffd7abd1ap-1, -0x1.6015f8a083576p-56},
+{0x1.81ffffdf40c54p-1, 0x1.7f54bf76a42c9p-57},
+{0x1.860000f334e11p-1, 0x1.60054cb5344d7p-56},
+{0x1.8a0001238aca7p-1, 0x1.c03c9bd132f55p-57},
+{0x1.8dffffb81d212p-1, -0x1.001e519f2764fp-55},
+{0x1.92000086adc7cp-1, 0x1.1fe40f88f49c6p-55},
+{0x1.960000135d8eap-1, -0x1.f832268dc3095p-55},
+{0x1.99ffff9435acp-1, 0x1.7031d8b835edcp-56},
+{0x1.9e00003478565p-1, -0x1.0030b221ce3eep-58},
+{0x1.a20000b592948p-1, 0x1.8fd2f1dbd4639p-55},
+{0x1.a600000ad0bcfp-1, 0x1.901d6a974e6bep-55},
+{0x1.a9ffff55953a5p-1, 0x1.a07556192db98p-57},
+{0x1.adffff29ce03dp-1, -0x1.fff0717ec71c2p-56},
+{0x1.b1ffff34f3ac8p-1, 0x1.8005573de89d1p-57},
+{0x1.b60000894c55bp-1, -0x1.ff2fb51b044c7p-57},
+{0x1.b9fffef45ec7dp-1, -0x1.9ff7c4e8730fp-56},
+{0x1.be0000cda7b2ap-1, 0x1.57d058dbf3c1dp-55},
+{0x1.c1ffff2c57917p-1, 0x1.7e66d7e48dbc9p-58},
+{0x1.c60000ea5b82ap-1, -0x1.47f5e132ed4bep-55},
+{0x1.ca0001121ae98p-1, -0x1.40958c8d5e00ap-58},
+{0x1.ce0000f9241cbp-1, -0x1.7da063caa81c8p-59},
+{0x1.d1fffe8be95a4p-1, -0x1.82e3a411afcd9p-59},
+{0x1.d5ffff035932bp-1, -0x1.00f901b3fe87dp-58},
+{0x1.d9fffe8b54ba7p-1, 0x1.ffef55d6e3a4p-55},
+{0x1.de0000ad95d19p-1, 0x1.5feb2efd4c7c7p-55},
+{0x1.e1fffe925ce47p-1, 0x1.c8085484eaf08p-55},
+{0x1.e5fffe3ddf853p-1, -0x1.fd5ed02c5cadp-60},
+{0x1.e9fffed0a0e5fp-1, -0x1.a80aaef411586p-55},
+{0x1.ee00008f82eep-1, -0x1.b000aeaf97276p-55},
+{0x1.f20000a22d2f4p-1, -0x1.8f8906e13eba3p-56},
+{0x1.f5fffee35b57dp-1, 0x1.1fdd33b2d3714p-57},
+{0x1.fa00014eec3a6p-1, -0x1.3ee0b7a18c1a5p-58},
+{0x1.fdffff5daa89fp-1, -0x1.c1e24c8e3b503p-58},
+{0x1.0200005b93349p+0, -0x1.50197fe6bedcap-54},
+{0x1.05ffff9d597acp+0, 0x1.20160d062d0dcp-55},
+{0x1.0a00005687a63p+0, -0x1.27f3f9307696ep-54},
+{0x1.0dffff779164ep+0, 0x1.b7eb40bb9c4f4p-54},
+{0x1.12000044a0aa8p+0, 0x1.efbc914d512c4p-55},
+{0x1.16000069685bcp+0, -0x1.c0bea3eb2d82cp-57},
+{0x1.1a000093f0d78p+0, 0x1.1fecbf1e8c52p-54},
+{0x1.1dffffb2b1457p+0, -0x1.3fc91365637d6p-55},
+{0x1.2200008824a1p+0, -0x1.dff7e9feb578ap-54},
+{0x1.25ffffeef953p+0, -0x1.b00a61ec912f7p-55},
+{0x1.2a0000a1e7783p+0, 0x1.60048318b0483p-56},
+{0x1.2e0000853d4c7p+0, -0x1.77fbedf2c8cf3p-54},
+{0x1.320000324c55bp+0, 0x1.f81983997354fp-54},
+{0x1.360000594f796p+0, -0x1.cfe4beff900a9p-54},
+{0x1.3a0000a4c1c0fp+0, 0x1.07dbb2e268d0ep-54},
+{0x1.3e0000751c61bp+0, 0x1.80583ed1c566ep-56},
+{0x1.42000069e8a9fp+0, 0x1.f01f1edf82045p-54},
+{0x1.460000b5a1e34p+0, -0x1.dfdf0cf45c14ap-55},
+{0x1.4a0000187e513p+0, 0x1.401306b83a98dp-55},
+{0x1.4dffff3ba420bp+0, 0x1.9fc6539a6454ep-56},
+{0x1.51fffffe391c9p+0, -0x1.601ef3353ac83p-54},
+{0x1.560000e342455p+0, 0x1.3fb7fac8ac151p-55},
+{0x1.59ffffc39676fp+0, 0x1.4fe7dd6659cc2p-55},
+{0x1.5dfffff10ef42p+0, -0x1.48154cb592bcbp-54},
+#elif N == 128
+{0x1.61000014fb66bp-1, 0x1.e026c91425b3cp-56},
+{0x1.63000034db495p-1, 0x1.dbfea48005d41p-55},
+{0x1.650000d94d478p-1, 0x1.e7fa786d6a5b7p-55},
+{0x1.67000074e6fadp-1, 0x1.1fcea6b54254cp-57},
+{0x1.68ffffedf0faep-1, -0x1.c7e274c590efdp-56},
+{0x1.6b0000763c5bcp-1, -0x1.ac16848dcda01p-55},
+{0x1.6d0001e5cc1f6p-1, 0x1.33f1c9d499311p-55},
+{0x1.6efffeb05f63ep-1, -0x1.e80041ae22d53p-56},
+{0x1.710000e86978p-1, 0x1.bff6671097952p-56},
+{0x1.72ffffc67e912p-1, 0x1.c00e226bd8724p-55},
+{0x1.74fffdf81116ap-1, -0x1.e02916ef101d2p-57},
+{0x1.770000f679c9p-1, -0x1.7fc71cd549c74p-57},
+{0x1.78ffffa7ec835p-1, 0x1.1bec19ef50483p-55},
+{0x1.7affffe20c2e6p-1, -0x1.07e1729cc6465p-56},
+{0x1.7cfffed3fc9p-1, -0x1.08072087b8b1cp-55},
+{0x1.7efffe9261a76p-1, 0x1.dc0286d9df9aep-55},
+{0x1.81000049ca3e8p-1, 0x1.97fd251e54c33p-55},
+{0x1.8300017932c8fp-1, -0x1.afee9b630f381p-55},
+{0x1.850000633739cp-1, 0x1.9bfbf6b6535bcp-55},
+{0x1.87000204289c6p-1, -0x1.bbf65f3117b75p-55},
+{0x1.88fffebf57904p-1, -0x1.9006ea23dcb57p-55},
+{0x1.8b00022bc04dfp-1, -0x1.d00df38e04b0ap-56},
+{0x1.8cfffe50c1b8ap-1, -0x1.8007146ff9f05p-55},
+{0x1.8effffc918e43p-1, 0x1.3817bd07a7038p-55},
+{0x1.910001efa5fc7p-1, 0x1.93e9176dfb403p-55},
+{0x1.9300013467bb9p-1, 0x1.f804e4b980276p-56},
+{0x1.94fffe6ee076fp-1, -0x1.f7ef0d9ff622ep-55},
+{0x1.96fffde3c12d1p-1, -0x1.082aa962638bap-56},
+{0x1.98ffff4458a0dp-1, -0x1.7801b9164a8efp-55},
+{0x1.9afffdd982e3ep-1, -0x1.740e08a5a9337p-55},
+{0x1.9cfffed49fb66p-1, 0x1.fce08c19bep-60},
+{0x1.9f00020f19c51p-1, -0x1.a3faa27885b0ap-55},
+{0x1.a10001145b006p-1, 0x1.4ff489958da56p-56},
+{0x1.a300007bbf6fap-1, 0x1.cbeab8a2b6d18p-55},
+{0x1.a500010971d79p-1, 0x1.8fecadd78793p-55},
+{0x1.a70001df52e48p-1, -0x1.f41763dd8abdbp-55},
+{0x1.a90001c593352p-1, -0x1.ebf0284c27612p-55},
+{0x1.ab0002a4f3e4bp-1, -0x1.9fd043cff3f5fp-57},
+{0x1.acfffd7ae1ed1p-1, -0x1.23ee7129070b4p-55},
+{0x1.aefffee510478p-1, 0x1.a063ee00edea3p-57},
+{0x1.b0fffdb650d5bp-1, 0x1.a06c8381f0ab9p-58},
+{0x1.b2ffffeaaca57p-1, -0x1.9011e74233c1dp-56},
+{0x1.b4fffd995badcp-1, -0x1.9ff1068862a9fp-56},
+{0x1.b7000249e659cp-1, 0x1.aff45d0864f3ep-55},
+{0x1.b8ffff987164p-1, 0x1.cfe7796c2c3f9p-56},
+{0x1.bafffd204cb4fp-1, -0x1.3ff27eef22bc4p-57},
+{0x1.bcfffd2415c45p-1, -0x1.cffb7ee3bea21p-57},
+{0x1.beffff86309dfp-1, -0x1.14103972e0b5cp-55},
+{0x1.c0fffe1b57653p-1, 0x1.bc16494b76a19p-55},
+{0x1.c2ffff1fa57e3p-1, -0x1.4feef8d30c6edp-57},
+{0x1.c4fffdcbfe424p-1, -0x1.43f68bcec4775p-55},
+{0x1.c6fffed54b9f7p-1, 0x1.47ea3f053e0ecp-55},
+{0x1.c8fffeb998fd5p-1, 0x1.383068df992f1p-56},
+{0x1.cb0002125219ap-1, -0x1.8fd8e64180e04p-57},
+{0x1.ccfffdd94469cp-1, 0x1.e7ebe1cc7ea72p-55},
+{0x1.cefffeafdc476p-1, 0x1.ebe39ad9f88fep-55},
+{0x1.d1000169af82bp-1, 0x1.57d91a8b95a71p-56},
+{0x1.d30000d0ff71dp-1, 0x1.9c1906970c7dap-55},
+{0x1.d4fffea790fc4p-1, -0x1.80e37c558fe0cp-58},
+{0x1.d70002edc87e5p-1, -0x1.f80d64dc10f44p-56},
+{0x1.d900021dc82aap-1, -0x1.47c8f94fd5c5cp-56},
+{0x1.dafffd86b0283p-1, 0x1.c7f1dc521617ep-55},
+{0x1.dd000296c4739p-1, 0x1.8019eb2ffb153p-55},
+{0x1.defffe54490f5p-1, 0x1.e00d2c652cc89p-57},
+{0x1.e0fffcdabf694p-1, -0x1.f8340202d69d2p-56},
+{0x1.e2fffdb52c8ddp-1, 0x1.b00c1ca1b0864p-56},
+{0x1.e4ffff24216efp-1, 0x1.2ffa8b094ab51p-56},
+{0x1.e6fffe88a5e11p-1, -0x1.7f673b1efbe59p-58},
+{0x1.e9000119eff0dp-1, -0x1.4808d5e0bc801p-55},
+{0x1.eafffdfa51744p-1, 0x1.80006d54320b5p-56},
+{0x1.ed0001a127fa1p-1, -0x1.002f860565c92p-58},
+{0x1.ef00007babcc4p-1, -0x1.540445d35e611p-55},
+{0x1.f0ffff57a8d02p-1, -0x1.ffb3139ef9105p-59},
+{0x1.f30001ee58ac7p-1, 0x1.a81acf2731155p-55},
+{0x1.f4ffff5823494p-1, 0x1.a3f41d4d7c743p-55},
+{0x1.f6ffffca94c6bp-1, -0x1.202f41c987875p-57},
+{0x1.f8fffe1f9c441p-1, 0x1.77dd1f477e74bp-56},
+{0x1.fafffd2e0e37ep-1, -0x1.f01199a7ca331p-57},
+{0x1.fd0001c77e49ep-1, 0x1.181ee4bceacb1p-56},
+{0x1.feffff7e0c331p-1, -0x1.e05370170875ap-57},
+{0x1.00ffff465606ep+0, -0x1.a7ead491c0adap-55},
+{0x1.02ffff3867a58p+0, -0x1.77f69c3fcb2ep-54},
+{0x1.04ffffdfc0d17p+0, 0x1.7bffe34cb945bp-54},
+{0x1.0700003cd4d82p+0, 0x1.20083c0e456cbp-55},
+{0x1.08ffff9f2cbe8p+0, -0x1.dffdfbe37751ap-57},
+{0x1.0b000010cda65p+0, -0x1.13f7faee626ebp-54},
+{0x1.0d00001a4d338p+0, 0x1.07dfa79489ff7p-55},
+{0x1.0effffadafdfdp+0, -0x1.7040570d66bcp-56},
+{0x1.110000bbafd96p+0, 0x1.e80d4846d0b62p-55},
+{0x1.12ffffae5f45dp+0, 0x1.dbffa64fd36efp-54},
+{0x1.150000dd59ad9p+0, 0x1.a0077701250aep-54},
+{0x1.170000f21559ap+0, 0x1.dfdf9e2e3deeep-55},
+{0x1.18ffffc275426p+0, 0x1.10030dc3b7273p-54},
+{0x1.1b000123d3c59p+0, 0x1.97f7980030188p-54},
+{0x1.1cffff8299eb7p+0, -0x1.5f932ab9f8c67p-57},
+{0x1.1effff48ad4p+0, 0x1.37fbf9da75bebp-54},
+{0x1.210000c8b86a4p+0, 0x1.f806b91fd5b22p-54},
+{0x1.2300003854303p+0, 0x1.3ffc2eb9fbf33p-54},
+{0x1.24fffffbcf684p+0, 0x1.601e77e2e2e72p-56},
+{0x1.26ffff52921d9p+0, 0x1.ffcbb767f0c61p-56},
+{0x1.2900014933a3cp+0, -0x1.202ca3c02412bp-56},
+{0x1.2b00014556313p+0, -0x1.2808233f21f02p-54},
+{0x1.2cfffebfe523bp+0, -0x1.8ff7e384fdcf2p-55},
+{0x1.2f0000bb8ad96p+0, -0x1.5ff51503041c5p-55},
+{0x1.30ffffb7ae2afp+0, -0x1.10071885e289dp-55},
+{0x1.32ffffeac5f7fp+0, -0x1.1ff5d3fb7b715p-54},
+{0x1.350000ca66756p+0, 0x1.57f82228b82bdp-54},
+{0x1.3700011fbf721p+0, 0x1.000bac40dd5ccp-55},
+{0x1.38ffff9592fb9p+0, -0x1.43f9d2db2a751p-54},
+{0x1.3b00004ddd242p+0, 0x1.57f6b707638e1p-55},
+{0x1.3cffff5b2c957p+0, 0x1.a023a10bf1231p-56},
+{0x1.3efffeab0b418p+0, 0x1.87f6d66b152bp-54},
+{0x1.410001532aff4p+0, 0x1.7f8375f198524p-57},
+{0x1.4300017478b29p+0, 0x1.301e672dc5143p-55},
+{0x1.44fffe795b463p+0, 0x1.9ff69b8b2895ap-55},
+{0x1.46fffe80475ep+0, -0x1.5c0b19bc2f254p-54},
+{0x1.48fffef6fc1e7p+0, 0x1.b4009f23a2a72p-54},
+{0x1.4afffe5bea704p+0, -0x1.4ffb7bf0d7d45p-54},
+{0x1.4d000171027dep+0, -0x1.9c06471dc6a3dp-54},
+{0x1.4f0000ff03ee2p+0, 0x1.77f890b85531cp-54},
+{0x1.5100012dc4bd1p+0, 0x1.004657166a436p-57},
+{0x1.530001605277ap+0, -0x1.6bfcece233209p-54},
+{0x1.54fffecdb704cp+0, -0x1.902720505a1d7p-55},
+{0x1.56fffef5f54a9p+0, 0x1.bbfe60ec96412p-54},
+{0x1.5900017e61012p+0, 0x1.87ec581afef9p-55},
+{0x1.5b00003c93e92p+0, -0x1.f41080abf0ccp-54},
+{0x1.5d0001d4919bcp+0, -0x1.8812afb254729p-54},
+{0x1.5efffe7b87a89p+0, -0x1.47eb780ed6904p-54},
+#endif
+},
+#endif /* !HAVE_FAST_FMA */
+};
diff --git a/contrib/arm-optimized-routines/pl/math/logf.c b/contrib/arm-optimized-routines/pl/math/logf.c
new file mode 100644
index 000000000000..17a74ed6d28f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/logf.c
@@ -0,0 +1,75 @@
+/*
+ * Single-precision log function.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "math_config.h"
+
+/*
+LOGF_TABLE_BITS = 4
+LOGF_POLY_ORDER = 4
+
+ULP error: 0.818 (nearest rounding.)
+Relative error: 1.957 * 2^-26 (before rounding.)
+*/
+
+#define T __logf_data.tab
+#define A __logf_data.poly
+#define Ln2 __logf_data.ln2
+#define N (1 << LOGF_TABLE_BITS)
+#define OFF 0x3f330000
+
+float
+optr_aor_log_f32 (float x)
+{
+ /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
+ double_t z, r, r2, y, y0, invc, logc;
+ uint32_t ix, iz, tmp;
+ int k, i;
+
+ ix = asuint (x);
+#if WANT_ROUNDING
+ /* Fix sign of zero with downward rounding when x==1. */
+ if (unlikely (ix == 0x3f800000))
+ return 0;
+#endif
+ if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
+ {
+ /* x < 0x1p-126 or inf or nan. */
+ if (ix * 2 == 0)
+ return __math_divzerof (1);
+ if (ix == 0x7f800000) /* log(inf) == inf. */
+ return x;
+ if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
+ return __math_invalidf (x);
+ /* x is subnormal, normalize it. */
+ ix = asuint (x * 0x1p23f);
+ ix -= 23 << 23;
+ }
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = ix - OFF;
+ i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
+ k = (int32_t) tmp >> 23; /* arithmetic shift */
+ iz = ix - (tmp & 0x1ff << 23);
+ invc = T[i].invc;
+ logc = T[i].logc;
+ z = (double_t) asfloat (iz);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
+ r = z * invc - 1;
+ y0 = logc + (double_t) k * Ln2;
+
+ /* Pipelined polynomial evaluation to approximate log1p(r). */
+ r2 = r * r;
+ y = A[1] * r + A[2];
+ y = A[0] * r2 + y;
+ y = y * r2 + (y0 + r);
+ return eval_as_float (y);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/logf_data.c b/contrib/arm-optimized-routines/pl/math/logf_data.c
new file mode 100644
index 000000000000..97d9eb8d0097
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/logf_data.c
@@ -0,0 +1,36 @@
+/*
+ * Data definition for logf and log10f.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct logf_data __logf_data = {
+ .tab =
+ {
+ {0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2},
+ {0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2},
+ {0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2},
+ {0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3},
+ {0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3},
+ {0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3},
+ {0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4},
+ {0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4},
+ {0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5},
+ {0x1p+0, 0x0p+0},
+ {0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5},
+ {0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4},
+ {0x1.b2036576afce6p-1, 0x1.526e57720db08p-3},
+ {0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3},
+ {0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2},
+ {0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2},
+ },
+ .ln2 = 0x1.62e42fefa39efp-1,
+ .invln10 = 0x1.bcb7b1526e50ep-2,
+ .poly = {
+ -0x1.00ea348b88334p-2,
+ 0x1.5575b0be00b6ap-2,
+ -0x1.ffffef20a4123p-2,
+ }};
diff --git a/contrib/arm-optimized-routines/pl/math/math_config.h b/contrib/arm-optimized-routines/pl/math/math_config.h
new file mode 100644
index 000000000000..c3dd8f2db8c7
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/math_config.h
@@ -0,0 +1,624 @@
+/*
+ * Configuration for math routines.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _MATH_CONFIG_H
+#define _MATH_CONFIG_H
+
+#include <math.h>
+#include <stdint.h>
+
+#ifndef WANT_ROUNDING
+/* If defined to 1, return correct results for special cases in non-nearest
+ rounding modes (logf (1.0f) returns 0.0f with FE_DOWNWARD rather than
+ -0.0f). This may be set to 0 if there is no fenv support or if math
+ functions only get called in round to nearest mode. */
+# define WANT_ROUNDING 1
+#endif
+#ifndef WANT_ERRNO
+/* If defined to 1, set errno in math functions according to ISO C. Many math
+ libraries do not set errno, so this is 0 by default. It may need to be
+ set to 1 if math.h has (math_errhandling & MATH_ERRNO) != 0. */
+# define WANT_ERRNO 0
+#endif
+#ifndef WANT_SIMD_EXCEPT
+/* If defined to 1, trigger fp exceptions in vector routines, consistently with
+ behaviour expected from the corresponding scalar routine. */
+# define WANT_SIMD_EXCEPT 0
+#endif
+
+/* Compiler can inline round as a single instruction. */
+#ifndef HAVE_FAST_ROUND
+# if __aarch64__
+# define HAVE_FAST_ROUND 1
+# else
+# define HAVE_FAST_ROUND 0
+# endif
+#endif
+
+/* Compiler can inline lround, but not (long)round(x). */
+#ifndef HAVE_FAST_LROUND
+# if __aarch64__ && (100 * __GNUC__ + __GNUC_MINOR__) >= 408 \
+ && __NO_MATH_ERRNO__
+# define HAVE_FAST_LROUND 1
+# else
+# define HAVE_FAST_LROUND 0
+# endif
+#endif
+
+/* Compiler can inline fma as a single instruction. */
+#ifndef HAVE_FAST_FMA
+# if defined FP_FAST_FMA || __aarch64__
+# define HAVE_FAST_FMA 1
+# else
+# define HAVE_FAST_FMA 0
+# endif
+#endif
+
+/* Provide *_finite symbols and some of the glibc hidden symbols
+ so libmathlib can be used with binaries compiled against glibc
+ to interpose math functions with both static and dynamic linking. */
+#ifndef USE_GLIBC_ABI
+# if __GNUC__
+# define USE_GLIBC_ABI 1
+# else
+# define USE_GLIBC_ABI 0
+# endif
+#endif
+
+/* Optionally used extensions. */
+#ifdef __GNUC__
+# define HIDDEN __attribute__ ((__visibility__ ("hidden")))
+# define NOINLINE __attribute__ ((noinline))
+# define UNUSED __attribute__ ((unused))
+# define likely(x) __builtin_expect (!!(x), 1)
+# define unlikely(x) __builtin_expect (x, 0)
+# if __GNUC__ >= 9
+# define attribute_copy(f) __attribute__ ((copy (f)))
+# else
+# define attribute_copy(f)
+# endif
+# define strong_alias(f, a) \
+ extern __typeof (f) a __attribute__ ((alias (#f))) attribute_copy (f);
+# define hidden_alias(f, a) \
+ extern __typeof (f) a __attribute__ ((alias (#f), visibility ("hidden"))) \
+ attribute_copy (f);
+#else
+# define HIDDEN
+# define NOINLINE
+# define UNUSED
+# define likely(x) (x)
+# define unlikely(x) (x)
+#endif
+
+/* Return ptr but hide its value from the compiler so accesses through it
+ cannot be optimized based on the contents. */
+#define ptr_barrier(ptr) \
+ ({ \
+ __typeof (ptr) __ptr = (ptr); \
+ __asm("" : "+r"(__ptr)); \
+ __ptr; \
+ })
+
+/* Symbol renames to avoid libc conflicts. */
+#define __math_oflowf arm_math_oflowf
+#define __math_uflowf arm_math_uflowf
+#define __math_may_uflowf arm_math_may_uflowf
+#define __math_divzerof arm_math_divzerof
+#define __math_oflow arm_math_oflow
+#define __math_uflow arm_math_uflow
+#define __math_may_uflow arm_math_may_uflow
+#define __math_divzero arm_math_divzero
+#define __math_invalidf arm_math_invalidf
+#define __math_invalid arm_math_invalid
+#define __math_check_oflow arm_math_check_oflow
+#define __math_check_uflow arm_math_check_uflow
+#define __math_check_oflowf arm_math_check_oflowf
+#define __math_check_uflowf arm_math_check_uflowf
+
+#if HAVE_FAST_ROUND
+/* When set, the roundtoint and converttoint functions are provided with
+ the semantics documented below. */
+# define TOINT_INTRINSICS 1
+
+/* Round x to nearest int in all rounding modes, ties have to be rounded
+ consistently with converttoint so the results match. If the result
+ would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
+static inline double_t
+roundtoint (double_t x)
+{
+ return round (x);
+}
+
+/* Convert x to nearest int in all rounding modes, ties have to be rounded
+ consistently with roundtoint. If the result is not representible in an
+ int32_t then the semantics is unspecified. */
+static inline int32_t
+converttoint (double_t x)
+{
+# if HAVE_FAST_LROUND
+ return lround (x);
+# else
+ return (long) round (x);
+# endif
+}
+#endif
+
+static inline uint32_t
+asuint (float f)
+{
+ union
+ {
+ float f;
+ uint32_t i;
+ } u = { f };
+ return u.i;
+}
+
+static inline float
+asfloat (uint32_t i)
+{
+ union
+ {
+ uint32_t i;
+ float f;
+ } u = { i };
+ return u.f;
+}
+
+static inline uint64_t
+asuint64 (double f)
+{
+ union
+ {
+ double f;
+ uint64_t i;
+ } u = { f };
+ return u.i;
+}
+
+static inline double
+asdouble (uint64_t i)
+{
+ union
+ {
+ uint64_t i;
+ double f;
+ } u = { i };
+ return u.f;
+}
+
+#ifndef IEEE_754_2008_SNAN
+# define IEEE_754_2008_SNAN 1
+#endif
+static inline int
+issignalingf_inline (float x)
+{
+ uint32_t ix = asuint (x);
+ if (!IEEE_754_2008_SNAN)
+ return (ix & 0x7fc00000) == 0x7fc00000;
+ return 2 * (ix ^ 0x00400000) > 2u * 0x7fc00000;
+}
+
+static inline int
+issignaling_inline (double x)
+{
+ uint64_t ix = asuint64 (x);
+ if (!IEEE_754_2008_SNAN)
+ return (ix & 0x7ff8000000000000) == 0x7ff8000000000000;
+ return 2 * (ix ^ 0x0008000000000000) > 2 * 0x7ff8000000000000ULL;
+}
+
+#if __aarch64__ && __GNUC__
+/* Prevent the optimization of a floating-point expression. */
+static inline float
+opt_barrier_float (float x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+ return x;
+}
+static inline double
+opt_barrier_double (double x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+ return x;
+}
+/* Force the evaluation of a floating-point expression for its side-effect. */
+static inline void
+force_eval_float (float x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+}
+static inline void
+force_eval_double (double x)
+{
+ __asm__ __volatile__ ("" : "+w" (x));
+}
+#else
+static inline float
+opt_barrier_float (float x)
+{
+ volatile float y = x;
+ return y;
+}
+static inline double
+opt_barrier_double (double x)
+{
+ volatile double y = x;
+ return y;
+}
+static inline void
+force_eval_float (float x)
+{
+ volatile float y UNUSED = x;
+}
+static inline void
+force_eval_double (double x)
+{
+ volatile double y UNUSED = x;
+}
+#endif
+
+/* Evaluate an expression as the specified type, normally a type
+ cast should be enough, but compilers implement non-standard
+ excess-precision handling, so when FLT_EVAL_METHOD != 0 then
+ these functions may need to be customized. */
+static inline float
+eval_as_float (float x)
+{
+ return x;
+}
+static inline double
+eval_as_double (double x)
+{
+ return x;
+}
+
+/* Error handling tail calls for special cases, with a sign argument.
+ The sign of the return value is set if the argument is non-zero. */
+
+/* The result overflows. */
+HIDDEN float __math_oflowf (uint32_t);
+/* The result underflows to 0 in nearest rounding mode. */
+HIDDEN float __math_uflowf (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only. */
+HIDDEN float __math_may_uflowf (uint32_t);
+/* Division by zero. */
+HIDDEN float __math_divzerof (uint32_t);
+/* The result overflows. */
+HIDDEN double __math_oflow (uint32_t);
+/* The result underflows to 0 in nearest rounding mode. */
+HIDDEN double __math_uflow (uint32_t);
+/* The result underflows to 0 in some directed rounding mode only. */
+HIDDEN double __math_may_uflow (uint32_t);
+/* Division by zero. */
+HIDDEN double __math_divzero (uint32_t);
+
+/* Error handling using input checking. */
+
+/* Invalid input unless it is a quiet NaN. */
+HIDDEN float __math_invalidf (float);
+/* Invalid input unless it is a quiet NaN. */
+HIDDEN double __math_invalid (double);
+
+/* Error handling using output checking, only for errno setting. */
+
+/* Check if the result overflowed to infinity. */
+HIDDEN double __math_check_oflow (double);
+/* Check if the result underflowed to 0. */
+HIDDEN double __math_check_uflow (double);
+
+/* Check if the result overflowed to infinity. */
+static inline double
+check_oflow (double x)
+{
+ return WANT_ERRNO ? __math_check_oflow (x) : x;
+}
+
+/* Check if the result underflowed to 0. */
+static inline double
+check_uflow (double x)
+{
+ return WANT_ERRNO ? __math_check_uflow (x) : x;
+}
+
+/* Check if the result overflowed to infinity. */
+HIDDEN float __math_check_oflowf (float);
+/* Check if the result underflowed to 0. */
+HIDDEN float __math_check_uflowf (float);
+
+/* Check if the result overflowed to infinity. */
+static inline float
+check_oflowf (float x)
+{
+ return WANT_ERRNO ? __math_check_oflowf (x) : x;
+}
+
+/* Check if the result underflowed to 0. */
+static inline float
+check_uflowf (float x)
+{
+ return WANT_ERRNO ? __math_check_uflowf (x) : x;
+}
+
+extern const struct erff_data
+{
+ struct
+ {
+ float erf, scale;
+ } tab[513];
+} __erff_data HIDDEN;
+
+extern const struct sv_erff_data
+{
+ float erf[513];
+ float scale[513];
+} __sv_erff_data HIDDEN;
+
+extern const struct erfcf_data
+{
+ struct
+ {
+ float erfc, scale;
+ } tab[645];
+} __erfcf_data HIDDEN;
+
+/* Data for logf and log10f. */
+#define LOGF_TABLE_BITS 4
+#define LOGF_POLY_ORDER 4
+extern const struct logf_data
+{
+ struct
+ {
+ double invc, logc;
+ } tab[1 << LOGF_TABLE_BITS];
+ double ln2;
+ double invln10;
+ double poly[LOGF_POLY_ORDER - 1]; /* First order coefficient is 1. */
+} __logf_data HIDDEN;
+
+/* Data for low accuracy log10 (with 1/ln(10) included in coefficients). */
+#define LOG10_TABLE_BITS 7
+#define LOG10_POLY_ORDER 6
+#define LOG10_POLY1_ORDER 12
+extern const struct log10_data
+{
+ double ln2hi;
+ double ln2lo;
+ double invln10;
+ double poly[LOG10_POLY_ORDER - 1]; /* First coefficient is 1/log(10). */
+ double poly1[LOG10_POLY1_ORDER - 1];
+ struct
+ {
+ double invc, logc;
+ } tab[1 << LOG10_TABLE_BITS];
+#if !HAVE_FAST_FMA
+ struct
+ {
+ double chi, clo;
+ } tab2[1 << LOG10_TABLE_BITS];
+#endif
+} __log10_data HIDDEN;
+
+#define EXP_TABLE_BITS 7
+#define EXP_POLY_ORDER 5
+/* Use polynomial that is optimized for a wider input range. This may be
+ needed for good precision in non-nearest rounding and !TOINT_INTRINSICS. */
+#define EXP_POLY_WIDE 0
+/* Use close to nearest rounding toint when !TOINT_INTRINSICS. This may be
+ needed for good precision in non-nearest rouning and !EXP_POLY_WIDE. */
+#define EXP_USE_TOINT_NARROW 0
+#define EXP2_POLY_ORDER 5
+#define EXP2_POLY_WIDE 0
+extern const struct exp_data
+{
+ double invln2N;
+ double shift;
+ double negln2hiN;
+ double negln2loN;
+ double poly[4]; /* Last four coefficients. */
+ double exp2_shift;
+ double exp2_poly[EXP2_POLY_ORDER];
+ uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
+} __exp_data HIDDEN;
+
+/* Copied from math/v_exp.h for use in vector exp_tail. */
+#define V_EXP_TAIL_TABLE_BITS 8
+extern const uint64_t __v_exp_tail_data[1 << V_EXP_TAIL_TABLE_BITS] HIDDEN;
+
+/* Copied from math/v_exp.h for use in vector exp2. */
+#define V_EXP_TABLE_BITS 7
+extern const uint64_t __v_exp_data[1 << V_EXP_TABLE_BITS] HIDDEN;
+
+extern const struct erf_data
+{
+ struct
+ {
+ double erf, scale;
+ } tab[769];
+} __erf_data HIDDEN;
+
+extern const struct sv_erf_data
+{
+ double erf[769];
+ double scale[769];
+} __sv_erf_data HIDDEN;
+
+extern const struct erfc_data
+{
+ struct
+ {
+ double erfc, scale;
+ } tab[3488];
+} __erfc_data HIDDEN;
+
+#define ATAN_POLY_NCOEFFS 20
+extern const struct atan_poly_data
+{
+ double poly[ATAN_POLY_NCOEFFS];
+} __atan_poly_data HIDDEN;
+
+#define ATANF_POLY_NCOEFFS 8
+extern const struct atanf_poly_data
+{
+ float poly[ATANF_POLY_NCOEFFS];
+} __atanf_poly_data HIDDEN;
+
+#define ASINHF_NCOEFFS 8
+extern const struct asinhf_data
+{
+ float coeffs[ASINHF_NCOEFFS];
+} __asinhf_data HIDDEN;
+
+#define LOG_TABLE_BITS 7
+#define LOG_POLY_ORDER 6
+#define LOG_POLY1_ORDER 12
+extern const struct log_data
+{
+ double ln2hi;
+ double ln2lo;
+ double poly[LOG_POLY_ORDER - 1]; /* First coefficient is 1. */
+ double poly1[LOG_POLY1_ORDER - 1];
+ struct
+ {
+ double invc, logc;
+ } tab[1 << LOG_TABLE_BITS];
+#if !HAVE_FAST_FMA
+ struct
+ {
+ double chi, clo;
+ } tab2[1 << LOG_TABLE_BITS];
+#endif
+} __log_data HIDDEN;
+
+#define ASINH_NCOEFFS 18
+extern const struct asinh_data
+{
+ double poly[ASINH_NCOEFFS];
+} __asinh_data HIDDEN;
+
+#define LOG1P_NCOEFFS 19
+extern const struct log1p_data
+{
+ double coeffs[LOG1P_NCOEFFS];
+} __log1p_data HIDDEN;
+
+#define LOG1PF_2U5
+#define LOG1PF_NCOEFFS 9
+extern const struct log1pf_data
+{
+ float coeffs[LOG1PF_NCOEFFS];
+} __log1pf_data HIDDEN;
+
+#define TANF_P_POLY_NCOEFFS 6
+/* cotan approach needs order 3 on [0, pi/4] to reach <3.5ulps. */
+#define TANF_Q_POLY_NCOEFFS 4
+extern const struct tanf_poly_data
+{
+ float poly_tan[TANF_P_POLY_NCOEFFS];
+ float poly_cotan[TANF_Q_POLY_NCOEFFS];
+} __tanf_poly_data HIDDEN;
+
+#define V_LOG2_TABLE_BITS 7
+extern const struct v_log2_data
+{
+ double poly[5];
+ double invln2;
+ struct
+ {
+ double invc, log2c;
+ } table[1 << V_LOG2_TABLE_BITS];
+} __v_log2_data HIDDEN;
+
+#define V_LOG10_TABLE_BITS 7
+extern const struct v_log10_data
+{
+ double poly[5];
+ double invln10, log10_2;
+ struct
+ {
+ double invc, log10c;
+ } table[1 << V_LOG10_TABLE_BITS];
+} __v_log10_data HIDDEN;
+
+/* Some data for SVE powf's internal exp and log. */
+#define V_POWF_EXP2_TABLE_BITS 5
+#define V_POWF_EXP2_N (1 << V_POWF_EXP2_TABLE_BITS)
+#define V_POWF_LOG2_TABLE_BITS 5
+#define V_POWF_LOG2_N (1 << V_POWF_LOG2_TABLE_BITS)
+extern const struct v_powf_data
+{
+ double invc[V_POWF_LOG2_N];
+ double logc[V_POWF_LOG2_N];
+ uint64_t scale[V_POWF_EXP2_N];
+} __v_powf_data HIDDEN;
+
+#define V_LOG_POLY_ORDER 6
+#define V_LOG_TABLE_BITS 7
+extern const struct v_log_data
+{
+ /* Shared data for vector log and log-derived routines (e.g. asinh). */
+ double poly[V_LOG_POLY_ORDER - 1];
+ double ln2;
+ struct
+ {
+ double invc, logc;
+ } table[1 << V_LOG_TABLE_BITS];
+} __v_log_data HIDDEN;
+
+#define EXPM1F_POLY_ORDER 5
+extern const float __expm1f_poly[EXPM1F_POLY_ORDER] HIDDEN;
+
+#define EXPF_TABLE_BITS 5
+#define EXPF_POLY_ORDER 3
+extern const struct expf_data
+{
+ uint64_t tab[1 << EXPF_TABLE_BITS];
+ double invln2_scaled;
+ double poly_scaled[EXPF_POLY_ORDER];
+} __expf_data HIDDEN;
+
+#define EXPM1_POLY_ORDER 11
+extern const double __expm1_poly[EXPM1_POLY_ORDER] HIDDEN;
+
+extern const struct cbrtf_data
+{
+ float poly[4];
+ float table[5];
+} __cbrtf_data HIDDEN;
+
+extern const struct cbrt_data
+{
+ double poly[4];
+ double table[5];
+} __cbrt_data HIDDEN;
+
+#define ASINF_POLY_ORDER 4
+extern const float __asinf_poly[ASINF_POLY_ORDER + 1] HIDDEN;
+
+#define ASIN_POLY_ORDER 11
+extern const double __asin_poly[ASIN_POLY_ORDER + 1] HIDDEN;
+
+/* Some data for AdvSIMD and SVE pow's internal exp and log. */
+#define V_POW_EXP_TABLE_BITS 8
+extern const struct v_pow_exp_data
+{
+ double poly[3];
+ double n_over_ln2, ln2_over_n_hi, ln2_over_n_lo, shift;
+ uint64_t sbits[1 << V_POW_EXP_TABLE_BITS];
+} __v_pow_exp_data HIDDEN;
+
+#define V_POW_LOG_TABLE_BITS 7
+extern const struct v_pow_log_data
+{
+ double poly[7]; /* First coefficient is 1. */
+ double ln2_hi, ln2_lo;
+ double invc[1 << V_POW_LOG_TABLE_BITS];
+ double logc[1 << V_POW_LOG_TABLE_BITS];
+ double logctail[1 << V_POW_LOG_TABLE_BITS];
+} __v_pow_log_data HIDDEN;
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/math_err.c b/contrib/arm-optimized-routines/pl/math/math_err.c
new file mode 100644
index 000000000000..74db54a5b2cd
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/math_err.c
@@ -0,0 +1,78 @@
+/*
+ * Double-precision math error handling.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#if WANT_ERRNO
+# include <errno.h>
+/* NOINLINE reduces code size and avoids making math functions non-leaf
+ when the error handling is inlined. */
+NOINLINE static double
+with_errno (double y, int e)
+{
+ errno = e;
+ return y;
+}
+#else
+# define with_errno(x, e) (x)
+#endif
+
+/* NOINLINE reduces code size. */
+NOINLINE static double
+xflow (uint32_t sign, double y)
+{
+ y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
+ return with_errno (y, ERANGE);
+}
+
+HIDDEN double
+__math_uflow (uint32_t sign)
+{
+ return xflow (sign, 0x1p-767);
+}
+
+/* Underflows to zero in some non-nearest rounding mode, setting errno
+ is valid even if the result is non-zero, but in the subnormal range. */
+HIDDEN double
+__math_may_uflow (uint32_t sign)
+{
+ return xflow (sign, 0x1.8p-538);
+}
+
+HIDDEN double
+__math_oflow (uint32_t sign)
+{
+ return xflow (sign, 0x1p769);
+}
+
+HIDDEN double
+__math_divzero (uint32_t sign)
+{
+ double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
+ return with_errno (y, ERANGE);
+}
+
+HIDDEN double
+__math_invalid (double x)
+{
+ double y = (x - x) / (x - x);
+ return isnan (x) ? y : with_errno (y, EDOM);
+}
+
+/* Check result and set errno if necessary. */
+
+HIDDEN double
+__math_check_uflow (double y)
+{
+ return y == 0.0 ? with_errno (y, ERANGE) : y;
+}
+
+HIDDEN double
+__math_check_oflow (double y)
+{
+ return isinf (y) ? with_errno (y, ERANGE) : y;
+}
diff --git a/contrib/arm-optimized-routines/pl/math/math_errf.c b/contrib/arm-optimized-routines/pl/math/math_errf.c
new file mode 100644
index 000000000000..2b8c6bd25753
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/math_errf.c
@@ -0,0 +1,78 @@
+/*
+ * Single-precision math error handling.
+ *
+ * Copyright (c) 2017-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#if WANT_ERRNO
+# include <errno.h>
+/* NOINLINE reduces code size and avoids making math functions non-leaf
+ when the error handling is inlined. */
+NOINLINE static float
+with_errnof (float y, int e)
+{
+ errno = e;
+ return y;
+}
+#else
+# define with_errnof(x, e) (x)
+#endif
+
+/* NOINLINE reduces code size. */
+NOINLINE static float
+xflowf (uint32_t sign, float y)
+{
+ y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
+ return with_errnof (y, ERANGE);
+}
+
+HIDDEN float
+__math_uflowf (uint32_t sign)
+{
+ return xflowf (sign, 0x1p-95f);
+}
+
+/* Underflows to zero in some non-nearest rounding mode, setting errno
+ is valid even if the result is non-zero, but in the subnormal range. */
+HIDDEN float
+__math_may_uflowf (uint32_t sign)
+{
+ return xflowf (sign, 0x1.4p-75f);
+}
+
+HIDDEN float
+__math_oflowf (uint32_t sign)
+{
+ return xflowf (sign, 0x1p97f);
+}
+
+HIDDEN float
+__math_divzerof (uint32_t sign)
+{
+ float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
+ return with_errnof (y, ERANGE);
+}
+
+HIDDEN float
+__math_invalidf (float x)
+{
+ float y = (x - x) / (x - x);
+ return isnan (x) ? y : with_errnof (y, EDOM);
+}
+
+/* Check result and set errno if necessary. */
+
+HIDDEN float
+__math_check_uflowf (float y)
+{
+ return y == 0.0f ? with_errnof (y, ERANGE) : y;
+}
+
+HIDDEN float
+__math_check_oflowf (float y)
+{
+ return isinf (y) ? with_errnof (y, ERANGE) : y;
+}
diff --git a/contrib/arm-optimized-routines/pl/math/pl_sig.h b/contrib/arm-optimized-routines/pl/math/pl_sig.h
new file mode 100644
index 000000000000..52d988f0e1ce
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/pl_sig.h
@@ -0,0 +1,59 @@
+/*
+ * PL macros for emitting various ulp/bench entries based on function signature
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
+#define V_NAME_D1(fun) _ZGVnN2v_##fun
+#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
+#define V_NAME_D2(fun) _ZGVnN2vv_##fun
+
+#define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
+#define SV_NAME_D1(fun) _ZGVsMxv_##fun
+#define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
+#define SV_NAME_D2(fun) _ZGVsMxvv_##fun
+
+#define PL_DECL_SF1(fun) float fun##f (float);
+#define PL_DECL_SF2(fun) float fun##f (float, float);
+#define PL_DECL_SD1(fun) double fun (double);
+#define PL_DECL_SD2(fun) double fun (double, double);
+
+#if WANT_VMATH
+# define PL_DECL_VF1(fun) \
+ VPCS_ATTR float32x4_t V_NAME_F1 (fun##f) (float32x4_t);
+# define PL_DECL_VF2(fun) \
+ VPCS_ATTR float32x4_t V_NAME_F2 (fun##f) (float32x4_t, float32x4_t);
+# define PL_DECL_VD1(fun) VPCS_ATTR float64x2_t V_NAME_D1 (fun) (float64x2_t);
+# define PL_DECL_VD2(fun) \
+ VPCS_ATTR float64x2_t V_NAME_D2 (fun) (float64x2_t, float64x2_t);
+#else
+# define PL_DECL_VF1(fun)
+# define PL_DECL_VF2(fun)
+# define PL_DECL_VD1(fun)
+# define PL_DECL_VD2(fun)
+#endif
+
+#if WANT_SVE_MATH
+# define PL_DECL_SVF1(fun) \
+ svfloat32_t SV_NAME_F1 (fun) (svfloat32_t, svbool_t);
+# define PL_DECL_SVF2(fun) \
+ svfloat32_t SV_NAME_F2 (fun) (svfloat32_t, svfloat32_t, svbool_t);
+# define PL_DECL_SVD1(fun) \
+ svfloat64_t SV_NAME_D1 (fun) (svfloat64_t, svbool_t);
+# define PL_DECL_SVD2(fun) \
+ svfloat64_t SV_NAME_D2 (fun) (svfloat64_t, svfloat64_t, svbool_t);
+#else
+# define PL_DECL_SVF1(fun)
+# define PL_DECL_SVF2(fun)
+# define PL_DECL_SVD1(fun)
+# define PL_DECL_SVD2(fun)
+#endif
+
+/* For building the routines, emit function prototype from PL_SIG. This
+ ensures that the correct signature has been chosen (wrong one will be a
+ compile error). PL_SIG is defined differently by various components of the
+ build system to emit entries in the wrappers and entries for mathbench and
+ ulp. */
+#define PL_SIG(v, t, a, f, ...) PL_DECL_##v##t##a (f)
diff --git a/contrib/arm-optimized-routines/pl/math/poly_advsimd_f32.h b/contrib/arm-optimized-routines/pl/math/poly_advsimd_f32.h
new file mode 100644
index 000000000000..438e153dff90
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/poly_advsimd_f32.h
@@ -0,0 +1,24 @@
+/*
+ * Helpers for evaluating polynomials on single-precision AdvSIMD input, using
+ * various schemes.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_POLY_ADVSIMD_F32_H
+#define PL_MATH_POLY_ADVSIMD_F32_H
+
+#include <arm_neon.h>
+
+/* Wrap AdvSIMD f32 helpers: evaluation of some scheme/order has form:
+ v_[scheme]_[order]_f32. */
+#define VTYPE float32x4_t
+#define FMA(x, y, z) vfmaq_f32 (z, x, y)
+#define VWRAP(f) v_##f##_f32
+#include "poly_generic.h"
+#undef VWRAP
+#undef FMA
+#undef VTYPE
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_advsimd_f64.h b/contrib/arm-optimized-routines/pl/math/poly_advsimd_f64.h
new file mode 100644
index 000000000000..7ea249a91225
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/poly_advsimd_f64.h
@@ -0,0 +1,24 @@
+/*
+ * Helpers for evaluating polynomials on double-precision AdvSIMD input, using
+ * various schemes.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_POLY_ADVSIMD_F64_H
+#define PL_MATH_POLY_ADVSIMD_F64_H
+
+#include <arm_neon.h>
+
+/* Wrap AdvSIMD f64 helpers: evaluation of some scheme/order has form:
+ v_[scheme]_[order]_f64. */
+#define VTYPE float64x2_t
+#define FMA(x, y, z) vfmaq_f64 (z, x, y)
+#define VWRAP(f) v_##f##_f64
+#include "poly_generic.h"
+#undef VWRAP
+#undef FMA
+#undef VTYPE
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_generic.h b/contrib/arm-optimized-routines/pl/math/poly_generic.h
new file mode 100644
index 000000000000..3fc25f8762f2
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/poly_generic.h
@@ -0,0 +1,277 @@
+/*
+ * Generic helpers for evaluating polynomials with various schemes.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef VTYPE
+# error Cannot use poly_generic without defining VTYPE
+#endif
+#ifndef VWRAP
+# error Cannot use poly_generic without defining VWRAP
+#endif
+#ifndef FMA
+# error Cannot use poly_generic without defining FMA
+#endif
+
+static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2,
+ const VTYPE *poly)
+{
+ /* At order 3, Estrin and Pairwise Horner are identical. */
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ VTYPE p23 = FMA (poly[3], x, poly[2]);
+ return FMA (p23, x2, p01);
+}
+
+static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4,
+ const VTYPE *poly)
+{
+ VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
+ return FMA (poly[4], x4, p03);
+}
+static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4,
+ const VTYPE *poly)
+{
+ VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
+ VTYPE p45 = FMA (poly[5], x, poly[4]);
+ return FMA (p45, x4, p03);
+}
+static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4,
+ const VTYPE *poly)
+{
+ VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
+ VTYPE p45 = FMA (poly[5], x, poly[4]);
+ VTYPE p46 = FMA (poly[6], x2, p45);
+ return FMA (p46, x4, p03);
+}
+static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4,
+ const VTYPE *poly)
+{
+ VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
+ VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4);
+ return FMA (p47, x4, p03);
+}
+static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ const VTYPE *poly)
+{
+ return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly));
+}
+static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ const VTYPE *poly)
+{
+ VTYPE p89 = FMA (poly[9], x, poly[8]);
+ return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly));
+}
+static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ const VTYPE *poly)
+{
+ VTYPE p89 = FMA (poly[9], x, poly[8]);
+ VTYPE p8_10 = FMA (poly[10], x2, p89);
+ return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly));
+}
+static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ const VTYPE *poly)
+{
+ VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8);
+ return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly));
+}
+static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ const VTYPE *poly)
+{
+ return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8,
+ VWRAP (estrin_7) (x, x2, x4, poly));
+}
+static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ const VTYPE *poly)
+{
+ return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8,
+ VWRAP (estrin_7) (x, x2, x4, poly));
+}
+static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ const VTYPE *poly)
+{
+ return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8,
+ VWRAP (estrin_7) (x, x2, x4, poly));
+}
+static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ const VTYPE *poly)
+{
+ return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8,
+ VWRAP (estrin_7) (x, x2, x4, poly));
+}
+static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ VTYPE x16, const VTYPE *poly)
+{
+ return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
+}
+static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ VTYPE x16, const VTYPE *poly)
+{
+ VTYPE p16_17 = FMA (poly[17], x, poly[16]);
+ return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
+}
+static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ VTYPE x16, const VTYPE *poly)
+{
+ VTYPE p16_17 = FMA (poly[17], x, poly[16]);
+ VTYPE p16_18 = FMA (poly[18], x2, p16_17);
+ return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
+}
+static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
+ VTYPE x16, const VTYPE *poly)
+{
+ VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16);
+ return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
+}
+
+static inline VTYPE VWRAP (horner_2) (VTYPE x, const VTYPE *poly)
+{
+ VTYPE p = FMA (poly[2], x, poly[1]);
+ return FMA (x, p, poly[0]);
+}
+static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly)
+{
+ VTYPE p = FMA (poly[3], x, poly[2]);
+ p = FMA (x, p, poly[1]);
+ p = FMA (x, p, poly[0]);
+ return p;
+}
+static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly)
+{
+ VTYPE p = FMA (poly[4], x, poly[3]);
+ p = FMA (x, p, poly[2]);
+ p = FMA (x, p, poly[1]);
+ p = FMA (x, p, poly[0]);
+ return p;
+}
+static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly)
+{
+ return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly)
+{
+ return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly)
+{
+ return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly)
+{
+ return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly)
+{
+ return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly)
+{
+ return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly)
+{
+ return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly)
+{
+ return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]);
+}
+
+static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ VTYPE p23 = FMA (poly[3], x, poly[2]);
+ VTYPE p;
+ p = FMA (x2, poly[4], p23);
+ p = FMA (x2, p, p01);
+ return p;
+}
+static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ VTYPE p23 = FMA (poly[3], x, poly[2]);
+ VTYPE p45 = FMA (poly[5], x, poly[4]);
+ VTYPE p;
+ p = FMA (x2, p45, p23);
+ p = FMA (x2, p, p01);
+ return p;
+}
+static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p26, p01);
+}
+static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p27, p01);
+}
+static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p28, p01);
+}
+static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p29, p01);
+}
+static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p2_10, p01);
+}
+static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p2_11, p01);
+}
+static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p2_12, p01);
+}
+static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p2_13, p01);
+}
+static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p2_14, p01);
+}
+static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p2_15, p01);
+}
+static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p2_16, p01);
+}
+static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p2_17, p01);
+}
+static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly)
+{
+ VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2);
+ VTYPE p01 = FMA (poly[1], x, poly[0]);
+ return FMA (x2, p2_18, p01);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/poly_scalar_f32.h b/contrib/arm-optimized-routines/pl/math/poly_scalar_f32.h
new file mode 100644
index 000000000000..a9b1c5544494
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/poly_scalar_f32.h
@@ -0,0 +1,24 @@
+/*
+ * Helpers for evaluating polynomials on siongle-precision scalar input, using
+ * various schemes.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_POLY_SCALAR_F32_H
+#define PL_MATH_POLY_SCALAR_F32_H
+
+#include <math.h>
+
+/* Wrap scalar f32 helpers: evaluation of some scheme/order has form:
+ [scheme]_[order]_f32. */
+#define VTYPE float
+#define FMA fmaf
+#define VWRAP(f) f##_f32
+#include "poly_generic.h"
+#undef VWRAP
+#undef FMA
+#undef VTYPE
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_scalar_f64.h b/contrib/arm-optimized-routines/pl/math/poly_scalar_f64.h
new file mode 100644
index 000000000000..207dccee30ad
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/poly_scalar_f64.h
@@ -0,0 +1,24 @@
+/*
+ * Helpers for evaluating polynomials on double-precision scalar input, using
+ * various schemes.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_POLY_SCALAR_F64_H
+#define PL_MATH_POLY_SCALAR_F64_H
+
+#include <math.h>
+
+/* Wrap scalar f64 helpers: evaluation of some scheme/order has form:
+ [scheme]_[order]_f64. */
+#define VTYPE double
+#define FMA fma
+#define VWRAP(f) f##_f64
+#include "poly_generic.h"
+#undef VWRAP
+#undef FMA
+#undef VTYPE
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_sve_f32.h b/contrib/arm-optimized-routines/pl/math/poly_sve_f32.h
new file mode 100644
index 000000000000..a97e2ced027a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/poly_sve_f32.h
@@ -0,0 +1,26 @@
+/*
+ * Helpers for evaluating polynomials on single-precision SVE input, using
+ * various schemes.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_POLY_SVE_F32_H
+#define PL_MATH_POLY_SVE_F32_H
+
+#include <arm_sve.h>
+
+/* Wrap SVE f32 helpers: evaluation of some scheme/order has form:
+ sv_[scheme]_[order]_f32_x. */
+#define VTYPE svfloat32_t
+#define STYPE float
+#define VWRAP(f) sv_##f##_f32_x
+#define DUP svdup_f32
+#include "poly_sve_generic.h"
+#undef DUP
+#undef VWRAP
+#undef STYPE
+#undef VTYPE
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_sve_f64.h b/contrib/arm-optimized-routines/pl/math/poly_sve_f64.h
new file mode 100644
index 000000000000..5fb14b3c1700
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/poly_sve_f64.h
@@ -0,0 +1,26 @@
+/*
+ * Helpers for evaluating polynomials on double-precision SVE input, using
+ * various schemes.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_POLY_SVE_F64_H
+#define PL_MATH_POLY_SVE_F64_H
+
+#include <arm_sve.h>
+
+/* Wrap SVE f64 helpers: evaluation of some scheme/order has form:
+ sv_[scheme]_[order]_f64_x. */
+#define VTYPE svfloat64_t
+#define STYPE double
+#define VWRAP(f) sv_##f##_f64_x
+#define DUP svdup_f64
+#include "poly_sve_generic.h"
+#undef DUP
+#undef VWRAP
+#undef STYPE
+#undef VTYPE
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/poly_sve_generic.h b/contrib/arm-optimized-routines/pl/math/poly_sve_generic.h
new file mode 100644
index 000000000000..b568e4cddff3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/poly_sve_generic.h
@@ -0,0 +1,301 @@
+/*
+ * Helpers for evaluating polynomials with various schemes - specific to SVE
+ * but precision-agnostic.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef VTYPE
+# error Cannot use poly_generic without defining VTYPE
+#endif
+#ifndef STYPE
+# error Cannot use poly_generic without defining STYPE
+#endif
+#ifndef VWRAP
+# error Cannot use poly_generic without defining VWRAP
+#endif
+#ifndef DUP
+# error Cannot use poly_generic without defining DUP
+#endif
+
+static inline VTYPE VWRAP (pairwise_poly_3) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ /* At order 3, Estrin and Pairwise Horner are identical. */
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ VTYPE p23 = svmla_x (pg, DUP (poly[2]), x, poly[3]);
+ return svmla_x (pg, p01, p23, x2);
+}
+
+static inline VTYPE VWRAP (estrin_4) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
+ const STYPE *poly)
+{
+ VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
+ return svmla_x (pg, p03, x4, poly[4]);
+}
+static inline VTYPE VWRAP (estrin_5) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
+ const STYPE *poly)
+{
+ VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
+ VTYPE p45 = svmla_x (pg, DUP (poly[4]), x, poly[5]);
+ return svmla_x (pg, p03, p45, x4);
+}
+static inline VTYPE VWRAP (estrin_6) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
+ const STYPE *poly)
+{
+ VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
+ VTYPE p45 = svmla_x (pg, DUP (poly[4]), x, poly[5]);
+ VTYPE p46 = svmla_x (pg, p45, x, poly[6]);
+ return svmla_x (pg, p03, p46, x4);
+}
+static inline VTYPE VWRAP (estrin_7) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
+ const STYPE *poly)
+{
+ VTYPE p03 = VWRAP (pairwise_poly_3) (pg, x, x2, poly);
+ VTYPE p47 = VWRAP (pairwise_poly_3) (pg, x, x2, poly + 4);
+ return svmla_x (pg, p03, p47, x4);
+}
+static inline VTYPE VWRAP (estrin_8) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
+ VTYPE x8, const STYPE *poly)
+{
+ return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), x8, poly[8]);
+}
+static inline VTYPE VWRAP (estrin_9) (svbool_t pg, VTYPE x, VTYPE x2, VTYPE x4,
+ VTYPE x8, const STYPE *poly)
+{
+ VTYPE p89 = svmla_x (pg, DUP (poly[8]), x, poly[9]);
+ return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), p89, x8);
+}
+static inline VTYPE VWRAP (estrin_10) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, const STYPE *poly)
+{
+ VTYPE p89 = svmla_x (pg, DUP (poly[8]), x, poly[9]);
+ VTYPE p8_10 = svmla_x (pg, p89, x2, poly[10]);
+ return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), p8_10, x8);
+}
+static inline VTYPE VWRAP (estrin_11) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, const STYPE *poly)
+{
+ VTYPE p8_11 = VWRAP (pairwise_poly_3) (pg, x, x2, poly + 8);
+ return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly), p8_11, x8);
+}
+static inline VTYPE VWRAP (estrin_12) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, const STYPE *poly)
+{
+ return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
+ VWRAP (estrin_4) (pg, x, x2, x4, poly + 8), x8);
+}
+static inline VTYPE VWRAP (estrin_13) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, const STYPE *poly)
+{
+ return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
+ VWRAP (estrin_5) (pg, x, x2, x4, poly + 8), x8);
+}
+static inline VTYPE VWRAP (estrin_14) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, const STYPE *poly)
+{
+ return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
+ VWRAP (estrin_6) (pg, x, x2, x4, poly + 8), x8);
+}
+static inline VTYPE VWRAP (estrin_15) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, const STYPE *poly)
+{
+ return svmla_x (pg, VWRAP (estrin_7) (pg, x, x2, x4, poly),
+ VWRAP (estrin_7) (pg, x, x2, x4, poly + 8), x8);
+}
+static inline VTYPE VWRAP (estrin_16) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, VTYPE x16,
+ const STYPE *poly)
+{
+ return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly), x16,
+ poly[16]);
+}
+static inline VTYPE VWRAP (estrin_17) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, VTYPE x16,
+ const STYPE *poly)
+{
+ VTYPE p16_17 = svmla_x (pg, DUP (poly[16]), x, poly[17]);
+ return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly), p16_17,
+ x16);
+}
+static inline VTYPE VWRAP (estrin_18) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, VTYPE x16,
+ const STYPE *poly)
+{
+ VTYPE p16_17 = svmla_x (pg, DUP (poly[16]), x, poly[17]);
+ VTYPE p16_18 = svmla_x (pg, p16_17, x2, poly[18]);
+ return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly), p16_18,
+ x16);
+}
+static inline VTYPE VWRAP (estrin_19) (svbool_t pg, VTYPE x, VTYPE x2,
+ VTYPE x4, VTYPE x8, VTYPE x16,
+ const STYPE *poly)
+{
+ return svmla_x (pg, VWRAP (estrin_15) (pg, x, x2, x4, x8, poly),
+ VWRAP (pairwise_poly_3) (pg, x, x2, poly + 16), x16);
+}
+
+static inline VTYPE VWRAP (horner_3) (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ VTYPE p = svmla_x (pg, DUP (poly[2]), x, poly[3]);
+ p = svmad_x (pg, x, p, poly[1]);
+ p = svmad_x (pg, x, p, poly[0]);
+ return p;
+}
+static inline VTYPE VWRAP (horner_4) (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ VTYPE p = svmla_x (pg, DUP (poly[3]), x, poly[4]);
+ p = svmad_x (pg, x, p, poly[2]);
+ p = svmad_x (pg, x, p, poly[1]);
+ p = svmad_x (pg, x, p, poly[0]);
+ return p;
+}
+static inline VTYPE VWRAP (horner_5) (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ return svmad_x (pg, x, VWRAP (horner_4) (pg, x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_6) (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ return svmad_x (pg, x, VWRAP (horner_5) (pg, x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_7) (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ return svmad_x (pg, x, VWRAP (horner_6) (pg, x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_8) (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ return svmad_x (pg, x, VWRAP (horner_7) (pg, x, poly + 1), poly[0]);
+}
+static inline VTYPE VWRAP (horner_9) (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ return svmad_x (pg, x, VWRAP (horner_8) (pg, x, poly + 1), poly[0]);
+}
+static inline VTYPE
+sv_horner_10_f32_x (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ return svmad_x (pg, x, VWRAP (horner_9) (pg, x, poly + 1), poly[0]);
+}
+static inline VTYPE
+sv_horner_11_f32_x (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ return svmad_x (pg, x, sv_horner_10_f32_x (pg, x, poly + 1), poly[0]);
+}
+static inline VTYPE
+sv_horner_12_f32_x (svbool_t pg, VTYPE x, const STYPE *poly)
+{
+ return svmad_x (pg, x, sv_horner_11_f32_x (pg, x, poly + 1), poly[0]);
+}
+
+static inline VTYPE VWRAP (pw_horner_4) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ VTYPE p23 = svmla_x (pg, DUP (poly[2]), x, poly[3]);
+ VTYPE p;
+ p = svmla_x (pg, p23, x2, poly[4]);
+ p = svmla_x (pg, p01, x2, p);
+ return p;
+}
+static inline VTYPE VWRAP (pw_horner_5) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ VTYPE p23 = svmla_x (pg, DUP (poly[2]), x, poly[3]);
+ VTYPE p45 = svmla_x (pg, DUP (poly[4]), x, poly[5]);
+ VTYPE p;
+ p = svmla_x (pg, p23, x2, p45);
+ p = svmla_x (pg, p01, x2, p);
+ return p;
+}
+static inline VTYPE VWRAP (pw_horner_6) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p26 = VWRAP (pw_horner_4) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p26);
+}
+static inline VTYPE VWRAP (pw_horner_7) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p27 = VWRAP (pw_horner_5) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p27);
+}
+static inline VTYPE VWRAP (pw_horner_8) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p28 = VWRAP (pw_horner_6) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p28);
+}
+static inline VTYPE VWRAP (pw_horner_9) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p29 = VWRAP (pw_horner_7) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p29);
+}
+static inline VTYPE VWRAP (pw_horner_10) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p2_10 = VWRAP (pw_horner_8) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p2_10);
+}
+static inline VTYPE VWRAP (pw_horner_11) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p2_11 = VWRAP (pw_horner_9) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p2_11);
+}
+static inline VTYPE VWRAP (pw_horner_12) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p2_12 = VWRAP (pw_horner_10) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p2_12);
+}
+static inline VTYPE VWRAP (pw_horner_13) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p2_13 = VWRAP (pw_horner_11) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p2_13);
+}
+static inline VTYPE VWRAP (pw_horner_14) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p2_14 = VWRAP (pw_horner_12) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p2_14);
+}
+static inline VTYPE VWRAP (pw_horner_15) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p2_15 = VWRAP (pw_horner_13) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p2_15);
+}
+static inline VTYPE VWRAP (pw_horner_16) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p2_16 = VWRAP (pw_horner_14) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p2_16);
+}
+static inline VTYPE VWRAP (pw_horner_17) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p2_17 = VWRAP (pw_horner_15) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p2_17);
+}
+static inline VTYPE VWRAP (pw_horner_18) (svbool_t pg, VTYPE x, VTYPE x2,
+ const STYPE *poly)
+{
+ VTYPE p2_18 = VWRAP (pw_horner_16) (pg, x, x2, poly + 2);
+ VTYPE p01 = svmla_x (pg, DUP (poly[0]), x, poly[1]);
+ return svmla_x (pg, p01, x2, p2_18);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/sinh_3u.c b/contrib/arm-optimized-routines/pl/math/sinh_3u.c
new file mode 100644
index 000000000000..1d86629ee2a3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sinh_3u.c
@@ -0,0 +1,63 @@
+/*
+ * Double-precision sinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define Half 0x3fe0000000000000
+#define OFlowBound \
+ 0x40862e42fefa39f0 /* 0x1.62e42fefa39fp+9, above which using expm1 results \
+ in NaN. */
+
+double
+__exp_dd (double, double);
+
+/* Approximation for double-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The greatest observed error is 2.57 ULP:
+ __v_sinh(0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
+ want 0x1.ab34e59d678d9p-2. */
+double
+sinh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iax = ix & AbsMask;
+ double ax = asdouble (iax);
+ uint64_t sign = ix & ~AbsMask;
+ double halfsign = asdouble (Half | sign);
+
+ if (unlikely (iax >= OFlowBound))
+ {
+ /* Special values and overflow. */
+ if (unlikely (iax > 0x7ff0000000000000))
+ return __math_invalidf (x);
+ /* expm1 overflows a little before sinh. We have to fill this
+ gap by using a different algorithm, in this case we use a
+ double-precision exp helper. For large x sinh(x) is dominated
+ by exp(x), however we cannot compute exp without overflow
+ either. We use the identity: exp(a) = (exp(a / 2)) ^ 2
+ to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2 for x > 0
+ ~= (exp(|x| / 2)) ^ 2 / -2 for x < 0. */
+ double e = __exp_dd (ax / 2, 0);
+ return (e * halfsign) * e;
+ }
+
+ /* Use expm1f to retain acceptable precision for small numbers.
+ Let t = e^(|x|) - 1. */
+ double t = expm1 (ax);
+ /* Then sinh(x) = (t + t / (t + 1)) / 2 for x > 0
+ (t + t / (t + 1)) / -2 for x < 0. */
+ return (t + t / (t + 1)) * halfsign;
+}
+
+PL_SIG (S, D, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (sinh, 2.08)
+PL_TEST_SYM_INTERVAL (sinh, 0, 0x1p-51, 100)
+PL_TEST_SYM_INTERVAL (sinh, 0x1p-51, 0x1.62e42fefa39fp+9, 100000)
+PL_TEST_SYM_INTERVAL (sinh, 0x1.62e42fefa39fp+9, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sinhf_2u3.c b/contrib/arm-optimized-routines/pl/math/sinhf_2u3.c
new file mode 100644
index 000000000000..aa7aadcf67c5
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sinhf_2u3.c
@@ -0,0 +1,73 @@
+/*
+ * Single-precision sinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define Expm1OFlowLimit \
+ 0x42b17218 /* 0x1.62e43p+6, 2^7*ln2, minimum value for which expm1f \
+ overflows. */
+#define OFlowLimit \
+ 0x42b2d4fd /* 0x1.65a9fap+6, minimum positive value for which sinhf should \
+ overflow. */
+
+float
+optr_aor_exp_f32 (float);
+
+/* Approximation for single-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The maximum error is 2.26 ULP:
+ sinhf(0x1.e34a9ep-4) got 0x1.e469ep-4 want 0x1.e469e4p-4. */
+float
+sinhf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ float ax = asfloat (iax);
+ uint32_t sign = ix & ~AbsMask;
+ float halfsign = asfloat (Half | sign);
+
+ if (unlikely (iax >= Expm1OFlowLimit))
+ {
+ /* Special values and overflow. */
+ if (iax >= 0x7fc00001 || iax == 0x7f800000)
+ return x;
+ if (iax >= 0x7f800000)
+ return __math_invalidf (x);
+ if (iax >= OFlowLimit)
+ return __math_oflowf (sign);
+
+ /* expm1f overflows a little before sinhf, (~88.7 vs ~89.4). We have to
+ fill this gap by using a different algorithm, in this case we use a
+ double-precision exp helper. For large x sinh(x) dominated by exp(x),
+ however we cannot compute exp without overflow either. We use the
+ identity:
+ exp(a) = (exp(a / 2)) ^ 2.
+ to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2 for x > 0
+ ~= (exp(|x| / 2)) ^ 2 / -2 for x < 0.
+ Greatest error in this region is 1.89 ULP:
+ sinhf(0x1.65898cp+6) got 0x1.f00aep+127 want 0x1.f00adcp+127. */
+ float e = optr_aor_exp_f32 (ax / 2);
+ return (e * halfsign) * e;
+ }
+
+ /* Use expm1f to retain acceptable precision for small numbers.
+ Let t = e^(|x|) - 1. */
+ float t = expm1f (ax);
+ /* Then sinh(x) = (t + t / (t + 1)) / 2 for x > 0
+ (t + t / (t + 1)) / -2 for x < 0. */
+ return (t + t / (t + 1)) * halfsign;
+}
+
+PL_SIG (S, F, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (sinhf, 1.76)
+PL_TEST_SYM_INTERVAL (sinhf, 0, 0x1.62e43p+6, 100000)
+PL_TEST_SYM_INTERVAL (sinhf, 0x1.62e43p+6, 0x1.65a9fap+6, 100)
+PL_TEST_SYM_INTERVAL (sinhf, 0x1.65a9fap+6, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/sinpi_3u.c b/contrib/arm-optimized-routines/pl/math/sinpi_3u.c
new file mode 100644
index 000000000000..a04a352a62e6
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sinpi_3u.c
@@ -0,0 +1,90 @@
+/*
+ * Double-precision scalar sinpi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#define _GNU_SOURCE
+#include <math.h>
+#include "mathlib.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_scalar_f64.h"
+
+/* Taylor series coefficents for sin(pi * x).
+ C2 coefficient (orginally ~=5.16771278) has been split into two parts:
+ C2_hi = 4, C2_lo = C2 - C2_hi (~=1.16771278)
+ This change in magnitude reduces floating point rounding errors.
+ C2_hi is then reintroduced after the polynomial approxmation. */
+static const double poly[]
+ = { 0x1.921fb54442d184p1, -0x1.2aef39896f94bp0, 0x1.466bc6775ab16p1,
+ -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
+ 0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16, 0x1.af86ae521260bp-21,
+ -0x1.012a9870eeb7dp-25 };
+
+#define Shift 0x1.8p+52
+
+/* Approximation for scalar double-precision sinpi(x).
+ Maximum error: 3.03 ULP:
+ sinpi(0x1.a90da2818f8b5p+7) got 0x1.fe358f255a4b3p-1
+ want 0x1.fe358f255a4b6p-1. */
+double
+sinpi (double x)
+{
+ if (isinf (x))
+ return __math_invalid (x);
+
+ double r = asdouble (asuint64 (x) & ~0x8000000000000000);
+ uint64_t sign = asuint64 (x) & 0x8000000000000000;
+
+ /* Edge cases for when sinpif should be exactly 0. (Integers)
+ 0x1p53 is the limit for single precision to store any decimal places. */
+ if (r >= 0x1p53)
+ return 0;
+
+ /* If x is an integer, return 0. */
+ uint64_t m = (uint64_t) r;
+ if (r == m)
+ return 0;
+
+ /* For very small inputs, squaring r causes underflow.
+ Values below this threshold can be approximated via sinpi(x) ≈ pi*x. */
+ if (r < 0x1p-63)
+ return M_PI * x;
+
+ /* Any non-integer values >= 0x1x51 will be int + 0.5.
+ These values should return exactly 1 or -1. */
+ if (r >= 0x1p51)
+ {
+ uint64_t iy = ((m & 1) << 63) ^ asuint64 (1.0);
+ return asdouble (sign ^ iy);
+ }
+
+ /* n = rint(|x|). */
+ double n = r + Shift;
+ sign ^= (asuint64 (n) << 63);
+ n = n - Shift;
+
+ /* r = |x| - n (range reduction into -1/2 .. 1/2). */
+ r = r - n;
+
+ /* y = sin(r). */
+ double r2 = r * r;
+ double y = horner_9_f64 (r2, poly);
+ y = y * r;
+
+ /* Reintroduce C2_hi. */
+ y = fma (-4 * r2, r, y);
+
+ /* Copy sign of x to sin(|x|). */
+ return asdouble (asuint64 (y) ^ sign);
+}
+
+PL_SIG (S, D, 1, sinpi, -0.9, 0.9)
+PL_TEST_ULP (sinpi, 2.53)
+PL_TEST_SYM_INTERVAL (sinpi, 0, 0x1p-63, 5000)
+PL_TEST_SYM_INTERVAL (sinpi, 0x1p-63, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (sinpi, 0.5, 0x1p51, 10000)
+PL_TEST_SYM_INTERVAL (sinpi, 0x1p51, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sinpif_2u5.c b/contrib/arm-optimized-routines/pl/math/sinpif_2u5.c
new file mode 100644
index 000000000000..af9ca0573b37
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sinpif_2u5.c
@@ -0,0 +1,83 @@
+/*
+ * Single-precision scalar sinpi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* Taylor series coefficents for sin(pi * x). */
+#define C0 0x1.921fb6p1f
+#define C1 -0x1.4abbcep2f
+#define C2 0x1.466bc6p1f
+#define C3 -0x1.32d2ccp-1f
+#define C4 0x1.50783p-4f
+#define C5 -0x1.e30750p-8f
+
+#define Shift 0x1.0p+23f
+
+/* Approximation for scalar single-precision sinpi(x) - sinpif.
+ Maximum error: 2.48 ULP:
+ sinpif(0x1.d062b6p-2) got 0x1.fa8c06p-1
+ want 0x1.fa8c02p-1. */
+float
+sinpif (float x)
+{
+ if (isinf (x))
+ return __math_invalidf (x);
+
+ float r = asfloat (asuint (x) & ~0x80000000);
+ uint32_t sign = asuint (x) & 0x80000000;
+
+ /* Edge cases for when sinpif should be exactly 0. (Integers)
+ 0x1p23 is the limit for single precision to store any decimal places. */
+ if (r >= 0x1p23f)
+ return 0;
+
+ int32_t m = roundf (r);
+ if (m == r)
+ return 0;
+
+ /* For very small inputs, squaring r causes underflow.
+ Values below this threshold can be approximated via sinpi(x) ~= pi*x. */
+ if (r < 0x1p-31f)
+ return C0 * x;
+
+ /* Any non-integer values >= 0x1p22f will be int + 0.5.
+ These values should return exactly 1 or -1. */
+ if (r >= 0x1p22f)
+ {
+ uint32_t iy = ((m & 1) << 31) ^ asuint (-1.0f);
+ return asfloat (sign ^ iy);
+ }
+
+ /* n = rint(|x|). */
+ float n = r + Shift;
+ sign ^= (asuint (n) << 31);
+ n = n - Shift;
+
+ /* r = |x| - n (range reduction into -1/2 .. 1/2). */
+ r = r - n;
+
+ /* y = sin(pi * r). */
+ float r2 = r * r;
+ float y = fmaf (C5, r2, C4);
+ y = fmaf (y, r2, C3);
+ y = fmaf (y, r2, C2);
+ y = fmaf (y, r2, C1);
+ y = fmaf (y, r2, C0);
+
+ /* Copy sign of x to sin(|x|). */
+ return asfloat (asuint (y * r) ^ sign);
+}
+
+PL_SIG (S, F, 1, sinpi, -0.9, 0.9)
+PL_TEST_ULP (sinpif, 1.99)
+PL_TEST_SYM_INTERVAL (sinpif, 0, 0x1p-31, 5000)
+PL_TEST_SYM_INTERVAL (sinpif, 0x1p-31, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (sinpif, 0.5, 0x1p22f, 10000)
+PL_TEST_SYM_INTERVAL (sinpif, 0x1p22f, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_acos_2u.c b/contrib/arm-optimized-routines/pl/math/sv_acos_2u.c
new file mode 100644
index 000000000000..e06db6cae6af
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_acos_2u.c
@@ -0,0 +1,91 @@
+/*
+ * Double-precision SVE acos(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64_t poly[12];
+ float64_t pi, pi_over_2;
+} data = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
+ on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */
+ .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4, 0x1.6db6db67f6d9fp-5,
+ 0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6,
+ 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, 0x1.fd1151acb6bedp-8,
+ 0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, },
+ .pi = 0x1.921fb54442d18p+1,
+ .pi_over_2 = 0x1.921fb54442d18p+0,
+};
+
+/* Double-precision SVE implementation of vector acos(x).
+
+ For |x| in [0, 0.5], use an order 11 polynomial P such that the final
+ approximation of asin is an odd polynomial:
+
+ acos(x) ~ pi/2 - (x + x^3 P(x^2)).
+
+ The largest observed error in this region is 1.18 ulps,
+ _ZGVsMxv_acos (0x1.fbc5fe28ee9e3p-2) got 0x1.0d4d0f55667f6p+0
+ want 0x1.0d4d0f55667f7p+0.
+
+ For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+ acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z).
+
+ The largest observed error in this region is 1.52 ulps,
+ _ZGVsMxv_acos (0x1.24024271a500ap-1) got 0x1.ed82df4243f0dp-1
+ want 0x1.ed82df4243f0bp-1. */
+svfloat64_t SV_NAME_D1 (acos) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
+ svfloat64_t ax = svabs_x (pg, x);
+
+ svbool_t a_gt_half = svacgt (pg, x, 0.5);
+
+ /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+ z2 = x ^ 2 and z = |x| , if |x| < 0.5
+ z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */
+ svfloat64_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5),
+ svmul_x (pg, x, x));
+ svfloat64_t z = svsqrt_m (ax, a_gt_half, z2);
+
+ /* Use a single polynomial approximation P for both intervals. */
+ svfloat64_t z4 = svmul_x (pg, z2, z2);
+ svfloat64_t z8 = svmul_x (pg, z4, z4);
+ svfloat64_t z16 = svmul_x (pg, z8, z8);
+ svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly);
+
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
+
+ /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5
+ = 2 Q(|x|) , for 0.5 < x < 1.0
+ = pi - 2 Q(|x|) , for -1.0 < x < -0.5. */
+ svfloat64_t y
+ = svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (p), sign));
+
+ svbool_t is_neg = svcmplt (pg, x, 0.0);
+ svfloat64_t off = svdup_f64_z (is_neg, d->pi);
+ svfloat64_t mul = svsel (a_gt_half, sv_f64 (2.0), sv_f64 (-1.0));
+ svfloat64_t add = svsel (a_gt_half, off, sv_f64 (d->pi_over_2));
+
+ return svmla_x (pg, add, mul, y);
+}
+
+PL_SIG (SV, D, 1, acos, -1.0, 1.0)
+PL_TEST_ULP (SV_NAME_D1 (acos), 1.02)
+PL_TEST_INTERVAL (SV_NAME_D1 (acos), 0, 0.5, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (acos), 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (acos), 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (acos), 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (SV_NAME_D1 (acos), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_acosf_1u4.c b/contrib/arm-optimized-routines/pl/math/sv_acosf_1u4.c
new file mode 100644
index 000000000000..7ac59ceedfbd
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_acosf_1u4.c
@@ -0,0 +1,84 @@
+/*
+ * Single-precision SVE acos(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float32_t poly[5];
+ float32_t pi, pi_over_2;
+} data = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on
+ [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */
+ .poly = { 0x1.55555ep-3, 0x1.33261ap-4, 0x1.70d7dcp-5, 0x1.b059dp-6,
+ 0x1.3af7d8p-5, },
+ .pi = 0x1.921fb6p+1f,
+ .pi_over_2 = 0x1.921fb6p+0f,
+};
+
+/* Single-precision SVE implementation of vector acos(x).
+
+ For |x| in [0, 0.5], use order 4 polynomial P such that the final
+ approximation of asin is an odd polynomial:
+
+ acos(x) ~ pi/2 - (x + x^3 P(x^2)).
+
+ The largest observed error in this region is 1.16 ulps,
+ _ZGVsMxv_acosf(0x1.ffbeccp-2) got 0x1.0c27f8p+0
+ want 0x1.0c27f6p+0.
+
+ For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+ acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z).
+
+ The largest observed error in this region is 1.32 ulps,
+ _ZGVsMxv_acosf (0x1.15ba56p-1) got 0x1.feb33p-1
+ want 0x1.feb32ep-1. */
+svfloat32_t SV_NAME_F1 (acos) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000);
+ svfloat32_t ax = svabs_x (pg, x);
+ svbool_t a_gt_half = svacgt (pg, x, 0.5);
+
+ /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+ z2 = x ^ 2 and z = |x| , if |x| < 0.5
+ z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */
+ svfloat32_t z2 = svsel (a_gt_half, svmls_x (pg, sv_f32 (0.5), ax, 0.5),
+ svmul_x (pg, x, x));
+ svfloat32_t z = svsqrt_m (ax, a_gt_half, z2);
+
+ /* Use a single polynomial approximation P for both intervals. */
+ svfloat32_t p = sv_horner_4_f32_x (pg, z2, d->poly);
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
+
+ /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5
+ = 2 Q(|x|) , for 0.5 < x < 1.0
+ = pi - 2 Q(|x|) , for -1.0 < x < -0.5. */
+ svfloat32_t y
+ = svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (p), sign));
+
+ svbool_t is_neg = svcmplt (pg, x, 0.0);
+ svfloat32_t off = svdup_f32_z (is_neg, d->pi);
+ svfloat32_t mul = svsel (a_gt_half, sv_f32 (2.0), sv_f32 (-1.0));
+ svfloat32_t add = svsel (a_gt_half, off, sv_f32 (d->pi_over_2));
+
+ return svmla_x (pg, add, mul, y);
+}
+
+PL_SIG (SV, F, 1, acos, -1.0, 1.0)
+PL_TEST_ULP (SV_NAME_F1 (acos), 0.82)
+PL_TEST_INTERVAL (SV_NAME_F1 (acos), 0, 0.5, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (acos), 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (acos), 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (acos), 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (SV_NAME_F1 (acos), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_acosh_3u5.c b/contrib/arm-optimized-routines/pl/math/sv_acosh_3u5.c
new file mode 100644
index 000000000000..faf351331464
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_acosh_3u5.c
@@ -0,0 +1,50 @@
+/*
+ * Double-precision SVE acosh(x) function.
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define WANT_SV_LOG1P_K0_SHORTCUT 1
+#include "sv_log1p_inline.h"
+
+#define BigBoundTop 0x5fe /* top12 (asuint64 (0x1p511)). */
+#define OneTop 0x3ff
+
+static NOINLINE svfloat64_t
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (acosh, x, y, special);
+}
+
+/* SVE approximation for double-precision acosh, based on log1p.
+ The largest observed error is 3.19 ULP in the region where the
+ argument to log1p falls in the k=0 interval, i.e. x close to 1:
+ SV_NAME_D1 (acosh)(0x1.1e4388d4ca821p+0) got 0x1.ed23399f5137p-2
+ want 0x1.ed23399f51373p-2. */
+svfloat64_t SV_NAME_D1 (acosh) (svfloat64_t x, const svbool_t pg)
+{
+ svuint64_t itop = svlsr_x (pg, svreinterpret_u64 (x), 52);
+ /* (itop - OneTop) >= (BigBoundTop - OneTop). */
+ svbool_t special = svcmpge (pg, svsub_x (pg, itop, OneTop), sv_u64 (0x1ff));
+
+ svfloat64_t xm1 = svsub_x (pg, x, 1);
+ svfloat64_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1));
+ svfloat64_t y = sv_log1p_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
+
+ /* Fall back to scalar routine for special lanes. */
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, y, special);
+
+ return y;
+}
+
+PL_SIG (SV, D, 1, acosh, 1.0, 10.0)
+PL_TEST_ULP (SV_NAME_D1 (acosh), 2.69)
+PL_TEST_INTERVAL (SV_NAME_D1 (acosh), 1, 0x1p511, 90000)
+PL_TEST_INTERVAL (SV_NAME_D1 (acosh), 0x1p511, inf, 10000)
+PL_TEST_INTERVAL (SV_NAME_D1 (acosh), 0, 1, 1000)
+PL_TEST_INTERVAL (SV_NAME_D1 (acosh), -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_acoshf_2u8.c b/contrib/arm-optimized-routines/pl/math/sv_acoshf_2u8.c
new file mode 100644
index 000000000000..f527083af40a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_acoshf_2u8.c
@@ -0,0 +1,47 @@
+/*
+ * Single-precision SVE acosh(x) function.
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define One 0x3f800000
+#define Thres 0x20000000 /* asuint(0x1p64) - One. */
+
+#include "sv_log1pf_inline.h"
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (acoshf, x, y, special);
+}
+
+/* Single-precision SVE acosh(x) routine. Implements the same algorithm as
+ vector acoshf and log1p.
+
+ Maximum error is 2.78 ULPs:
+ SV_NAME_F1 (acosh) (0x1.01e996p+0) got 0x1.f45b42p-4
+ want 0x1.f45b3cp-4. */
+svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
+{
+ svuint32_t ix = svreinterpret_u32 (x);
+ svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
+
+ svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
+ svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
+ svfloat32_t y = sv_log1pf_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, y, special);
+ return y;
+}
+
+PL_SIG (SV, F, 1, acosh, 1.0, 10.0)
+PL_TEST_ULP (SV_NAME_F1 (acosh), 2.29)
+PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 0, 1, 500)
+PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 1, 0x1p64, 100000)
+PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 0x1p64, inf, 1000)
+PL_TEST_INTERVAL (SV_NAME_F1 (acosh), -0, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_asin_3u.c b/contrib/arm-optimized-routines/pl/math/sv_asin_3u.c
new file mode 100644
index 000000000000..c3dd37b145ae
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_asin_3u.c
@@ -0,0 +1,84 @@
+/*
+ * Double-precision SVE asin(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64_t poly[12];
+ float64_t pi_over_2f;
+} data = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
+ on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */
+ .poly = { 0x1.555555555554ep-3, 0x1.3333333337233p-4,
+ 0x1.6db6db67f6d9fp-5, 0x1.f1c71fbd29fbbp-6,
+ 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6,
+ 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7,
+ 0x1.fd1151acb6bedp-8, 0x1.087182f799c1dp-6,
+ -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, },
+ .pi_over_2f = 0x1.921fb54442d18p+0,
+};
+
+#define P(i) sv_f64 (d->poly[i])
+
+/* Double-precision SVE implementation of vector asin(x).
+
+ For |x| in [0, 0.5], use an order 11 polynomial P such that the final
+ approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+
+ The largest observed error in this region is 0.52 ulps,
+ _ZGVsMxv_asin(0x1.d95ae04998b6cp-2) got 0x1.ec13757305f27p-2
+ want 0x1.ec13757305f26p-2.
+
+ For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+ asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z).
+
+ The largest observed error in this region is 2.69 ulps,
+ _ZGVsMxv_asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
+ want 0x1.110d7e85fdd53p-1. */
+svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
+ svfloat64_t ax = svabs_x (pg, x);
+ svbool_t a_ge_half = svacge (pg, x, 0.5);
+
+ /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+ z = x ^ 2 and y = |x| , if |x| < 0.5
+ z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */
+ svfloat64_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f64 (0.5), ax, 0.5),
+ svmul_x (pg, x, x));
+ svfloat64_t z = svsqrt_m (ax, a_ge_half, z2);
+
+ /* Use a single polynomial approximation P for both intervals. */
+ svfloat64_t z4 = svmul_x (pg, z2, z2);
+ svfloat64_t z8 = svmul_x (pg, z4, z4);
+ svfloat64_t z16 = svmul_x (pg, z8, z8);
+ svfloat64_t p = sv_estrin_11_f64_x (pg, z2, z4, z8, z16, d->poly);
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
+
+ /* asin(|x|) = Q(|x|) , for |x| < 0.5
+ = pi/2 - 2 Q(|x|), for |x| >= 0.5. */
+ svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (-2.0), d->pi_over_2f);
+
+ /* Copy sign. */
+ return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
+}
+
+PL_SIG (SV, D, 1, asin, -1.0, 1.0)
+PL_TEST_ULP (SV_NAME_D1 (asin), 2.19)
+PL_TEST_INTERVAL (SV_NAME_D1 (asin), 0, 0.5, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (asin), 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (asin), 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (asin), 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (SV_NAME_D1 (asin), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_asinf_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_asinf_2u5.c
new file mode 100644
index 000000000000..8e9edc2439f5
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_asinf_2u5.c
@@ -0,0 +1,76 @@
+/*
+ * Single-precision SVE asin(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float32_t poly[5];
+ float32_t pi_over_2f;
+} data = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on
+ [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */
+ .poly = { 0x1.55555ep-3, 0x1.33261ap-4, 0x1.70d7dcp-5, 0x1.b059dp-6,
+ 0x1.3af7d8p-5, },
+ .pi_over_2f = 0x1.921fb6p+0f,
+};
+
+/* Single-precision SVE implementation of vector asin(x).
+
+ For |x| in [0, 0.5], use order 4 polynomial P such that the final
+ approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+
+ The largest observed error in this region is 0.83 ulps,
+ _ZGVsMxv_asinf (0x1.ea00f4p-2) got 0x1.fef15ep-2
+ want 0x1.fef15cp-2.
+
+ For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+ asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z).
+
+ The largest observed error in this region is 2.41 ulps,
+ _ZGVsMxv_asinf (-0x1.00203ep-1) got -0x1.0c3a64p-1
+ want -0x1.0c3a6p-1. */
+svfloat32_t SV_NAME_F1 (asin) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), 0x80000000);
+
+ svfloat32_t ax = svabs_x (pg, x);
+ svbool_t a_ge_half = svacge (pg, x, 0.5);
+
+ /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+ z = x ^ 2 and y = |x| , if |x| < 0.5
+ z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */
+ svfloat32_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f32 (0.5), ax, 0.5),
+ svmul_x (pg, x, x));
+ svfloat32_t z = svsqrt_m (ax, a_ge_half, z2);
+
+ /* Use a single polynomial approximation P for both intervals. */
+ svfloat32_t p = sv_horner_4_f32_x (pg, z2, d->poly);
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
+
+ /* asin(|x|) = Q(|x|) , for |x| < 0.5
+ = pi/2 - 2 Q(|x|), for |x| >= 0.5. */
+ svfloat32_t y = svmad_m (a_ge_half, p, sv_f32 (-2.0), d->pi_over_2f);
+
+ /* Copy sign. */
+ return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
+}
+
+PL_SIG (SV, F, 1, asin, -1.0, 1.0)
+PL_TEST_ULP (SV_NAME_F1 (asin), 1.91)
+PL_TEST_INTERVAL (SV_NAME_F1 (asin), 0, 0.5, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (asin), 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (asin), 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (asin), 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (SV_NAME_F1 (asin), -0, -inf, 20000) \ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/sv_asinh_3u0.c b/contrib/arm-optimized-routines/pl/math/sv_asinh_3u0.c
new file mode 100644
index 000000000000..711f0dfdbedc
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_asinh_3u0.c
@@ -0,0 +1,129 @@
+/*
+ * Double-precision SVE asinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define OneTop sv_u64 (0x3ff) /* top12(asuint64(1.0f)). */
+#define HugeBound sv_u64 (0x5fe) /* top12(asuint64(0x1p511)). */
+#define TinyBound (0x3e5) /* top12(asuint64(0x1p-26)). */
+#define SignMask (0x8000000000000000)
+
+/* Constants & data for log. */
+#define A(i) __v_log_data.poly[i]
+#define Ln2 (0x1.62e42fefa39efp-1)
+#define N (1 << V_LOG_TABLE_BITS)
+#define OFF (0x3fe6900900000000)
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (asinh, x, y, special);
+}
+
+static inline svfloat64_t
+__sv_log_inline (svfloat64_t x, const svbool_t pg)
+{
+ /* Double-precision SVE log, copied from pl/math/sv_log_2u5.c with some
+ cosmetic modification and special-cases removed. See that file for details
+ of the algorithm used. */
+ svuint64_t ix = svreinterpret_u64 (x);
+ svuint64_t tmp = svsub_x (pg, ix, OFF);
+ svuint64_t i
+ = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
+ svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
+ svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
+ svfloat64_t z = svreinterpret_f64 (iz);
+ svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+ svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
+ svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), invc, z);
+ svfloat64_t kd = svcvt_f64_x (pg, k);
+ svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, Ln2);
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t y = svmla_x (pg, sv_f64 (A (2)), r, A (3));
+ svfloat64_t p = svmla_x (pg, sv_f64 (A (0)), r, A (1));
+ y = svmla_x (pg, y, r2, A (4));
+ y = svmla_x (pg, p, r2, y);
+ y = svmla_x (pg, hi, r2, y);
+ return y;
+}
+
+/* Double-precision implementation of SVE asinh(x).
+ asinh is very sensitive around 1, so it is impractical to devise a single
+ low-cost algorithm which is sufficiently accurate on a wide range of input.
+ Instead we use two different algorithms:
+ asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1) if |x| >= 1
+ = sign(x) * (|x| + |x|^3 * P(x^2)) otherwise
+ where log(x) is an optimized log approximation, and P(x) is a polynomial
+ shared with the scalar routine. The greatest observed error 2.51 ULP, in
+ |x| >= 1:
+ _ZGVsMxv_asinh(0x1.170469d024505p+0) got 0x1.e3181c43b0f36p-1
+ want 0x1.e3181c43b0f39p-1. */
+svfloat64_t SV_NAME_D1 (asinh) (svfloat64_t x, const svbool_t pg)
+{
+ svuint64_t ix = svreinterpret_u64 (x);
+ svuint64_t iax = svbic_x (pg, ix, SignMask);
+ svuint64_t sign = svand_x (pg, ix, SignMask);
+ svfloat64_t ax = svreinterpret_f64 (iax);
+ svuint64_t top12 = svlsr_x (pg, iax, 52);
+
+ svbool_t ge1 = svcmpge (pg, top12, OneTop);
+ svbool_t special = svcmpge (pg, top12, HugeBound);
+
+ /* Option 1: |x| >= 1.
+ Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)). */
+ svfloat64_t option_1 = sv_f64 (0);
+ if (likely (svptest_any (pg, ge1)))
+ {
+ svfloat64_t axax = svmul_x (pg, ax, ax);
+ option_1 = __sv_log_inline (
+ svadd_x (pg, ax, svsqrt_x (pg, svadd_x (pg, axax, 1))), pg);
+ }
+
+ /* Option 2: |x| < 1.
+ Compute asinh(x) using a polynomial.
+ The largest observed error in this region is 1.51 ULPs:
+ _ZGVsMxv_asinh(0x1.fe12bf8c616a2p-1) got 0x1.c1e649ee2681bp-1
+ want 0x1.c1e649ee2681dp-1. */
+ svfloat64_t option_2 = sv_f64 (0);
+ if (likely (svptest_any (pg, svnot_z (pg, ge1))))
+ {
+ svfloat64_t x2 = svmul_x (pg, ax, ax);
+ svfloat64_t z2 = svmul_x (pg, x2, x2);
+ svfloat64_t z4 = svmul_x (pg, z2, z2);
+ svfloat64_t z8 = svmul_x (pg, z4, z4);
+ svfloat64_t z16 = svmul_x (pg, z8, z8);
+ svfloat64_t p
+ = sv_estrin_17_f64_x (pg, x2, z2, z4, z8, z16, __asinh_data.poly);
+ option_2 = svmla_x (pg, ax, p, svmul_x (pg, x2, ax));
+ }
+
+ /* Choose the right option for each lane. */
+ svfloat64_t y = svsel (ge1, option_1, option_2);
+
+ /* Apply sign of x to y. */
+ y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, y, special);
+ return y;
+}
+
+PL_SIG (SV, D, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_D1 (asinh), 2.52)
+/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
+ Ensures the svsel is choosing the right option in all cases. */
+#define SV_ASINH_INTERVAL(lo, hi, n) \
+ PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (asinh), lo, hi, n, 0.5) \
+ PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (asinh), lo, hi, n, 2) \
+ PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (asinh), lo, hi, n, 0x1p600)
+SV_ASINH_INTERVAL (0, 0x1p-26, 50000)
+SV_ASINH_INTERVAL (0x1p-26, 1, 50000)
+SV_ASINH_INTERVAL (1, 0x1p511, 50000)
+SV_ASINH_INTERVAL (0x1p511, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_asinhf_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_asinhf_2u5.c
new file mode 100644
index 000000000000..1f1f6e5c846f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_asinhf_2u5.c
@@ -0,0 +1,55 @@
+/*
+ * Single-precision SVE asinh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "include/mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#include "sv_log1pf_inline.h"
+
+#define BigBound (0x5f800000) /* asuint(0x1p64). */
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (asinhf, x, y, special);
+}
+
+/* Single-precision SVE asinh(x) routine. Implements the same algorithm as
+ vector asinhf and log1p.
+
+ Maximum error is 2.48 ULPs:
+ SV_NAME_F1 (asinh) (0x1.008864p-3) got 0x1.ffbbbcp-4
+ want 0x1.ffbbb8p-4. */
+svfloat32_t SV_NAME_F1 (asinh) (svfloat32_t x, const svbool_t pg)
+{
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t iax = svreinterpret_u32 (ax);
+ svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+ svbool_t special = svcmpge (pg, iax, BigBound);
+
+ /* asinh(x) = log(x + sqrt(x * x + 1)).
+ For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */
+ svfloat32_t ax2 = svmul_x (pg, ax, ax);
+ svfloat32_t d = svadd_x (pg, svsqrt_x (pg, svadd_x (pg, ax2, 1.0f)), 1.0f);
+ svfloat32_t y
+ = sv_log1pf_inline (svadd_x (pg, ax, svdiv_x (pg, ax2, d)), pg);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (
+ x, svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y))),
+ special);
+ return svreinterpret_f32 (svorr_x (pg, sign, svreinterpret_u32 (y)));
+}
+
+PL_SIG (SV, F, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_F1 (asinh), 1.98)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0, 0x1p-12, 4000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p-12, 1.0, 20000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 1.0, 0x1p64, 20000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (asinh), 0x1p64, inf, 4000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atan2_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_atan2_2u5.c
new file mode 100644
index 000000000000..00530a324a76
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_atan2_2u5.c
@@ -0,0 +1,116 @@
+/*
+ * Double-precision vector atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f64.h"
+
+static const struct data
+{
+ float64_t poly[20];
+ float64_t pi_over_2;
+} data = {
+ /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ [2**-1022, 1.0]. */
+ .poly = { -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
+ 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
+ -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
+ 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
+ -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
+ 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
+ -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, },
+ .pi_over_2 = 0x1.921fb54442d18p+0,
+};
+
+/* Useful constants. */
+#define SignMask sv_u64 (0x8000000000000000)
+
+/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
+static svfloat64_t NOINLINE
+special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret,
+ const svbool_t cmp)
+{
+ return sv_call2_f64 (atan2, y, x, ret, cmp);
+}
+
+/* Returns a predicate indicating true if the input is the bit representation
+ of 0, infinity or nan. */
+static inline svbool_t
+zeroinfnan (svuint64_t i, const svbool_t pg)
+{
+ return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
+ sv_u64 (2 * asuint64 (INFINITY) - 1));
+}
+
+/* Fast implementation of SVE atan2. Errors are greatest when y and
+ x are reasonably close together. The greatest observed error is 2.28 ULP:
+ _ZGVsMxvv_atan2 (-0x1.5915b1498e82fp+732, 0x1.54d11ef838826p+732)
+ got -0x1.954f42f1fa841p-1 want -0x1.954f42f1fa843p-1. */
+svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg)
+{
+ const struct data *data_ptr = ptr_barrier (&data);
+
+ svuint64_t ix = svreinterpret_u64 (x);
+ svuint64_t iy = svreinterpret_u64 (y);
+
+ svbool_t cmp_x = zeroinfnan (ix, pg);
+ svbool_t cmp_y = zeroinfnan (iy, pg);
+ svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
+
+ svuint64_t sign_x = svand_x (pg, ix, SignMask);
+ svuint64_t sign_y = svand_x (pg, iy, SignMask);
+ svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y);
+
+ svfloat64_t ax = svabs_x (pg, x);
+ svfloat64_t ay = svabs_x (pg, y);
+
+ svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
+ svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
+
+ /* Set up z for call to atan. */
+ svfloat64_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+ svfloat64_t d = svsel (pred_aygtax, ay, ax);
+ svfloat64_t z = svdiv_x (pg, n, d);
+
+ /* Work out the correct shift. */
+ svfloat64_t shift = svsel (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0));
+ shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
+ shift = svmul_x (pg, shift, data_ptr->pi_over_2);
+
+ /* Use split Estrin scheme for P(z^2) with deg(P)=19. */
+ svfloat64_t z2 = svmul_x (pg, z, z);
+ svfloat64_t x2 = svmul_x (pg, z2, z2);
+ svfloat64_t x4 = svmul_x (pg, x2, x2);
+ svfloat64_t x8 = svmul_x (pg, x4, x4);
+
+ svfloat64_t ret = svmla_x (
+ pg, sv_estrin_7_f64_x (pg, z2, x2, x4, data_ptr->poly),
+ sv_estrin_11_f64_x (pg, z2, x2, x4, x8, data_ptr->poly + 8), x8);
+
+ /* y = shift + z + z^3 * P(z^2). */
+ svfloat64_t z3 = svmul_x (pg, z2, z);
+ ret = svmla_x (pg, z, z3, ret);
+
+ ret = svadd_m (pg, ret, shift);
+
+ /* Account for the sign of x and y. */
+ ret = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy));
+
+ if (unlikely (svptest_any (pg, cmp_xy)))
+ return special_case (y, x, ret, cmp_xy);
+
+ return ret;
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (SV, D, 2, atan2)
+PL_TEST_ULP (SV_NAME_D2 (atan2), 1.78)
+PL_TEST_INTERVAL (SV_NAME_D2 (atan2), 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_D2 (atan2), 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_D2 (atan2), 100, inf, 40000)
+PL_TEST_INTERVAL (SV_NAME_D2 (atan2), -0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atan2f_3u.c b/contrib/arm-optimized-routines/pl/math/sv_atan2f_3u.c
new file mode 100644
index 000000000000..9ff73ecb74ba
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_atan2f_3u.c
@@ -0,0 +1,108 @@
+/*
+ * Single-precision vector atan2f(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f32.h"
+
+static const struct data
+{
+ float32_t poly[8];
+ float32_t pi_over_2;
+} data = {
+ /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ [2**-128, 1.0]. */
+ .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
+ -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f },
+ .pi_over_2 = 0x1.921fb6p+0f,
+};
+
+#define SignMask sv_u32 (0x80000000)
+
+/* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */
+static inline svfloat32_t
+special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret,
+ const svbool_t cmp)
+{
+ return sv_call2_f32 (atan2f, y, x, ret, cmp);
+}
+
+/* Returns a predicate indicating true if the input is the bit representation
+ of 0, infinity or nan. */
+static inline svbool_t
+zeroinfnan (svuint32_t i, const svbool_t pg)
+{
+ return svcmpge (pg, svsub_x (pg, svlsl_x (pg, i, 1), 1),
+ sv_u32 (2 * 0x7f800000lu - 1));
+}
+
+/* Fast implementation of SVE atan2f based on atan(x) ~ shift + z + z^3 *
+ P(z^2) with reduction to [0,1] using z=1/x and shift = pi/2. Maximum
+ observed error is 2.95 ULP:
+ _ZGVsMxvv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
+ want 0x1.967f00p-1. */
+svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg)
+{
+ const struct data *data_ptr = ptr_barrier (&data);
+
+ svuint32_t ix = svreinterpret_u32 (x);
+ svuint32_t iy = svreinterpret_u32 (y);
+
+ svbool_t cmp_x = zeroinfnan (ix, pg);
+ svbool_t cmp_y = zeroinfnan (iy, pg);
+ svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y);
+
+ svuint32_t sign_x = svand_x (pg, ix, SignMask);
+ svuint32_t sign_y = svand_x (pg, iy, SignMask);
+ svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y);
+
+ svfloat32_t ax = svabs_x (pg, x);
+ svfloat32_t ay = svabs_x (pg, y);
+
+ svbool_t pred_xlt0 = svcmplt (pg, x, 0.0);
+ svbool_t pred_aygtax = svcmpgt (pg, ay, ax);
+
+ /* Set up z for call to atan. */
+ svfloat32_t n = svsel (pred_aygtax, svneg_x (pg, ax), ay);
+ svfloat32_t d = svsel (pred_aygtax, ay, ax);
+ svfloat32_t z = svdiv_x (pg, n, d);
+
+ /* Work out the correct shift. */
+ svfloat32_t shift = svsel (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0));
+ shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift);
+ shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2));
+
+ /* Use split Estrin scheme for P(z^2) with deg(P)=7. */
+ svfloat32_t z2 = svmul_x (pg, z, z);
+ svfloat32_t z4 = svmul_x (pg, z2, z2);
+ svfloat32_t z8 = svmul_x (pg, z4, z4);
+
+ svfloat32_t ret = sv_estrin_7_f32_x (pg, z2, z4, z8, data_ptr->poly);
+
+ /* ret = shift + z + z^3 * P(z^2). */
+ svfloat32_t z3 = svmul_x (pg, z2, z);
+ ret = svmla_x (pg, z, z3, ret);
+
+ ret = svadd_m (pg, ret, shift);
+
+ /* Account for the sign of x and y. */
+ ret = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy));
+
+ if (unlikely (svptest_any (pg, cmp_xy)))
+ return special_case (y, x, ret, cmp_xy);
+
+ return ret;
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (SV, F, 2, atan2)
+PL_TEST_ULP (SV_NAME_F2 (atan2), 2.45)
+PL_TEST_INTERVAL (SV_NAME_F2 (atan2), 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_F2 (atan2), 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_F2 (atan2), 100, inf, 40000)
+PL_TEST_INTERVAL (SV_NAME_F2 (atan2), -0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atan_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_atan_2u5.c
new file mode 100644
index 000000000000..7ab486a4c9d2
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_atan_2u5.c
@@ -0,0 +1,87 @@
+/*
+ * Double-precision vector atan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f64.h"
+
+static const struct data
+{
+ float64_t poly[20];
+ float64_t pi_over_2;
+} data = {
+ /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ [2**-1022, 1.0]. */
+ .poly = { -0x1.5555555555555p-2, 0x1.99999999996c1p-3, -0x1.2492492478f88p-3,
+ 0x1.c71c71bc3951cp-4, -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4,
+ -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, -0x1.aebfe7b418581p-5,
+ 0x1.842dbe9b0d916p-5, -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5,
+ -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, -0x1.0051381722a59p-6,
+ 0x1.14e9dc19a4a4ep-7, -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10,
+ -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16, },
+ .pi_over_2 = 0x1.921fb54442d18p+0,
+};
+
+/* Useful constants. */
+#define SignMask (0x8000000000000000)
+
+/* Fast implementation of SVE atan.
+ Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
+ z=1/x and shift = pi/2. Largest errors are close to 1. The maximum observed
+ error is 2.27 ulps:
+ _ZGVsMxv_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
+ want 0x1.9225645bdd7c3p-1. */
+svfloat64_t SV_NAME_D1 (atan) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* No need to trigger special case. Small cases, infs and nans
+ are supported by our approximation technique. */
+ svuint64_t ix = svreinterpret_u64 (x);
+ svuint64_t sign = svand_x (pg, ix, SignMask);
+
+ /* Argument reduction:
+ y := arctan(x) for x < 1
+ y := pi/2 + arctan(-1/x) for x > 1
+ Hence, use z=-1/a if x>=1, otherwise z=a. */
+ svbool_t red = svacgt (pg, x, 1.0);
+ /* Avoid dependency in abs(x) in division (and comparison). */
+ svfloat64_t z = svsel (red, svdivr_x (pg, x, 1.0), x);
+ /* Use absolute value only when needed (odd powers of z). */
+ svfloat64_t az = svabs_x (pg, z);
+ az = svneg_m (az, red, az);
+
+ /* Use split Estrin scheme for P(z^2) with deg(P)=19. */
+ svfloat64_t z2 = svmul_x (pg, z, z);
+ svfloat64_t x2 = svmul_x (pg, z2, z2);
+ svfloat64_t x4 = svmul_x (pg, x2, x2);
+ svfloat64_t x8 = svmul_x (pg, x4, x4);
+
+ svfloat64_t y
+ = svmla_x (pg, sv_estrin_7_f64_x (pg, z2, x2, x4, d->poly),
+ sv_estrin_11_f64_x (pg, z2, x2, x4, x8, d->poly + 8), x8);
+
+ /* y = shift + z + z^3 * P(z^2). */
+ svfloat64_t z3 = svmul_x (pg, z2, az);
+ y = svmla_x (pg, az, z3, y);
+
+ /* Apply shift as indicated by `red` predicate. */
+ y = svadd_m (red, y, d->pi_over_2);
+
+ /* y = atan(x) if x>0, -atan(-x) otherwise. */
+ y = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+
+ return y;
+}
+
+PL_SIG (SV, D, 1, atan, -3.1, 3.1)
+PL_TEST_ULP (SV_NAME_D1 (atan), 1.78)
+PL_TEST_INTERVAL (SV_NAME_D1 (atan), 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_D1 (atan), 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_D1 (atan), 100, inf, 40000)
+PL_TEST_INTERVAL (SV_NAME_D1 (atan), -0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atanf_2u9.c b/contrib/arm-optimized-routines/pl/math/sv_atanf_2u9.c
new file mode 100644
index 000000000000..4defb356e7f9
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_atanf_2u9.c
@@ -0,0 +1,76 @@
+/*
+ * Single-precision vector atan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f32.h"
+
+static const struct data
+{
+ float32_t poly[8];
+ float32_t pi_over_2;
+} data = {
+ /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ [2**-128, 1.0]. */
+ .poly = { -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
+ -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 0x1.01fd88p-8f },
+ .pi_over_2 = 0x1.921fb6p+0f,
+};
+
+#define SignMask (0x80000000)
+
+/* Fast implementation of SVE atanf based on
+ atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
+ z=-1/x and shift = pi/2.
+ Largest observed error is 2.9 ULP, close to +/-1.0:
+ _ZGVsMxv_atanf (0x1.0468f6p+0) got -0x1.967f06p-1
+ want -0x1.967fp-1. */
+svfloat32_t SV_NAME_F1 (atan) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* No need to trigger special case. Small cases, infs and nans
+ are supported by our approximation technique. */
+ svuint32_t ix = svreinterpret_u32 (x);
+ svuint32_t sign = svand_x (pg, ix, SignMask);
+
+ /* Argument reduction:
+ y := arctan(x) for x < 1
+ y := pi/2 + arctan(-1/x) for x > 1
+ Hence, use z=-1/a if x>=1, otherwise z=a. */
+ svbool_t red = svacgt (pg, x, 1.0f);
+ /* Avoid dependency in abs(x) in division (and comparison). */
+ svfloat32_t z = svsel (red, svdiv_x (pg, sv_f32 (1.0f), x), x);
+ /* Use absolute value only when needed (odd powers of z). */
+ svfloat32_t az = svabs_x (pg, z);
+ az = svneg_m (az, red, az);
+
+ /* Use split Estrin scheme for P(z^2) with deg(P)=7. */
+ svfloat32_t z2 = svmul_x (pg, z, z);
+ svfloat32_t z4 = svmul_x (pg, z2, z2);
+ svfloat32_t z8 = svmul_x (pg, z4, z4);
+
+ svfloat32_t y = sv_estrin_7_f32_x (pg, z2, z4, z8, d->poly);
+
+ /* y = shift + z + z^3 * P(z^2). */
+ svfloat32_t z3 = svmul_x (pg, z2, az);
+ y = svmla_x (pg, az, z3, y);
+
+ /* Apply shift as indicated by 'red' predicate. */
+ y = svadd_m (red, y, sv_f32 (d->pi_over_2));
+
+ /* y = atan(x) if x>0, -atan(-x) otherwise. */
+ return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
+}
+
+PL_SIG (SV, F, 1, atan, -3.1, 3.1)
+PL_TEST_ULP (SV_NAME_F1 (atan), 2.9)
+PL_TEST_INTERVAL (SV_NAME_F1 (atan), 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (atan), 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (atan), 100, inf, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (atan), -0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atanh_3u3.c b/contrib/arm-optimized-routines/pl/math/sv_atanh_3u3.c
new file mode 100644
index 000000000000..dcc9350b4962
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_atanh_3u3.c
@@ -0,0 +1,60 @@
+/*
+ * Double-precision SVE atanh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define WANT_SV_LOG1P_K0_SHORTCUT 0
+#include "sv_log1p_inline.h"
+
+#define One (0x3ff0000000000000)
+#define Half (0x3fe0000000000000)
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (atanh, x, y, special);
+}
+
+/* SVE approximation for double-precision atanh, based on log1p.
+ The greatest observed error is 2.81 ULP:
+ _ZGVsMxv_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
+ want 0x1.ffd8ff31b501cp-6. */
+svfloat64_t SV_NAME_D1 (atanh) (svfloat64_t x, const svbool_t pg)
+{
+
+ svfloat64_t ax = svabs_x (pg, x);
+ svuint64_t iax = svreinterpret_u64 (ax);
+ svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
+ svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, Half));
+
+ /* It is special if iax >= 1. */
+// svbool_t special = svcmpge (pg, iax, One);
+ svbool_t special = svacge (pg, x, 1.0);
+
+ /* Computation is performed based on the following sequence of equality:
+ (1+x)/(1-x) = 1 + 2x/(1-x). */
+ svfloat64_t y;
+ y = svadd_x (pg, ax, ax);
+ y = svdiv_x (pg, y, svsub_x (pg, sv_f64 (1), ax));
+ /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y). */
+ y = sv_log1p_inline (y, pg);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svmul_x (pg, halfsign, y), special);
+ return svmul_x (pg, halfsign, y);
+}
+
+PL_SIG (SV, D, 1, atanh, -1.0, 1.0)
+PL_TEST_ULP (SV_NAME_D1 (atanh), 3.32)
+/* atanh is asymptotic at 1, which is the default control value - have to set
+ -c 0 specially to ensure fp exceptions are triggered correctly (choice of
+ control lane is irrelevant if fp exceptions are disabled). */
+PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (atanh), 0, 0x1p-23, 10000, 0)
+PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (atanh), 0x1p-23, 1, 90000, 0)
+PL_TEST_SYM_INTERVAL_C (SV_NAME_D1 (atanh), 1, inf, 100, 0)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_atanhf_2u8.c b/contrib/arm-optimized-routines/pl/math/sv_atanhf_2u8.c
new file mode 100644
index 000000000000..413c60ce05da
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_atanhf_2u8.c
@@ -0,0 +1,56 @@
+/*
+ * Single-precision vector atanh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#include "sv_log1pf_inline.h"
+
+#define One (0x3f800000)
+#define Half (0x3f000000)
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (atanhf, x, y, special);
+}
+
+/* Approximation for vector single-precision atanh(x) using modified log1p.
+ The maximum error is 2.28 ULP:
+ _ZGVsMxv_atanhf(0x1.ff1194p-5) got 0x1.ffbbbcp-5
+ want 0x1.ffbbb6p-5. */
+svfloat32_t SV_NAME_F1 (atanh) (svfloat32_t x, const svbool_t pg)
+{
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t iax = svreinterpret_u32 (ax);
+ svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+ svfloat32_t halfsign = svreinterpret_f32 (svorr_x (pg, sign, Half));
+ svbool_t special = svcmpge (pg, iax, One);
+
+ /* Computation is performed based on the following sequence of equality:
+ * (1+x)/(1-x) = 1 + 2x/(1-x). */
+ svfloat32_t y = svadd_x (pg, ax, ax);
+ y = svdiv_x (pg, y, svsub_x (pg, sv_f32 (1), ax));
+ /* ln((1+x)/(1-x)) = ln(1+2x/(1-x)) = ln(1 + y). */
+ y = sv_log1pf_inline (y, pg);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svmul_x (pg, halfsign, y), special);
+
+ return svmul_x (pg, halfsign, y);
+}
+
+PL_SIG (SV, F, 1, atanh, -1.0, 1.0)
+PL_TEST_ULP (SV_NAME_F1 (atanh), 2.59)
+/* atanh is asymptotic at 1, which is the default control value - have to set
+ -c 0 specially to ensure fp exceptions are triggered correctly (choice of
+ control lane is irrelevant if fp exceptions are disabled). */
+PL_TEST_SYM_INTERVAL_C (SV_NAME_F1 (atanh), 0, 0x1p-12, 1000, 0)
+PL_TEST_SYM_INTERVAL_C (SV_NAME_F1 (atanh), 0x1p-12, 1, 20000, 0)
+PL_TEST_SYM_INTERVAL_C (SV_NAME_F1 (atanh), 1, inf, 1000, 0)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cbrt_2u.c b/contrib/arm-optimized-routines/pl/math/sv_cbrt_2u.c
new file mode 100644
index 000000000000..192f1cd80d59
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_cbrt_2u.c
@@ -0,0 +1,122 @@
+/*
+ * Double-precision SVE cbrt(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f64.h"
+
+const static struct data
+{
+ float64_t poly[4];
+ float64_t table[5];
+ float64_t one_third, two_thirds, shift;
+ int64_t exp_bias;
+ uint64_t tiny_bound, thresh;
+} data = {
+ /* Generated with FPMinimax in [0.5, 1]. */
+ .poly = { 0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1,
+ 0x1.2c74eaa3ba428p-3, },
+ /* table[i] = 2^((i - 2) / 3). */
+ .table = { 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0,
+ 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0, },
+ .one_third = 0x1.5555555555555p-2,
+ .two_thirds = 0x1.5555555555555p-1,
+ .shift = 0x1.8p52,
+ .exp_bias = 1022,
+ .tiny_bound = 0x0010000000000000, /* Smallest normal. */
+ .thresh = 0x7fe0000000000000, /* asuint64 (infinity) - tiny_bound. */
+};
+
+#define MantissaMask 0x000fffffffffffff
+#define HalfExp 0x3fe0000000000000
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (cbrt, x, y, special);
+}
+
+static inline svfloat64_t
+shifted_lookup (const svbool_t pg, const float64_t *table, svint64_t i)
+{
+ return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
+}
+
+/* Approximation for double-precision vector cbrt(x), using low-order
+ polynomial and two Newton iterations. Greatest observed error is 1.79 ULP.
+ Errors repeat according to the exponent, for instance an error observed for
+ double value m * 2^e will be observed for any input m * 2^(e + 3*i), where i
+ is an integer.
+ _ZGVsMxv_cbrt (0x0.3fffb8d4413f3p-1022) got 0x1.965f53b0e5d97p-342
+ want 0x1.965f53b0e5d95p-342. */
+svfloat64_t SV_NAME_D1 (cbrt) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat64_t ax = svabs_x (pg, x);
+ svuint64_t iax = svreinterpret_u64 (ax);
+ svuint64_t sign = sveor_x (pg, svreinterpret_u64 (x), iax);
+
+ /* Subnormal, +/-0 and special values. */
+ svbool_t special = svcmpge (pg, svsub_x (pg, iax, d->tiny_bound), d->thresh);
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexp, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ svfloat64_t m = svreinterpret_f64 (svorr_x (
+ pg, svand_x (pg, svreinterpret_u64 (x), MantissaMask), HalfExp));
+ svint64_t e
+ = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, iax, 52)), d->exp_bias);
+
+ /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point
+ for Newton iterations. */
+ svfloat64_t p
+ = sv_pairwise_poly_3_f64_x (pg, m, svmul_x (pg, m, m), d->poly);
+
+ /* Two iterations of Newton's method for iteratively approximating cbrt. */
+ svfloat64_t m_by_3 = svmul_x (pg, m, d->one_third);
+ svfloat64_t a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, p, p)), p,
+ d->two_thirds);
+ a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, a, a)), a, d->two_thirds);
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+ is an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+ svfloat64_t eb3f = svmul_x (pg, svcvt_f64_x (pg, e), d->one_third);
+ svint64_t ey = svcvt_s64_x (pg, eb3f);
+ svint64_t em3 = svmls_x (pg, e, ey, 3);
+
+ svfloat64_t my = shifted_lookup (pg, d->table, em3);
+ my = svmul_x (pg, my, a);
+
+ /* Vector version of ldexp. */
+ svfloat64_t y = svscale_x (pg, my, ey);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (
+ x, svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign)),
+ special);
+
+ /* Copy sign. */
+ return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
+}
+
+PL_SIG (SV, D, 1, cbrt, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_D1 (cbrt), 1.30)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cbrt), 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cbrtf_1u7.c b/contrib/arm-optimized-routines/pl/math/sv_cbrtf_1u7.c
new file mode 100644
index 000000000000..5b625f308827
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_cbrtf_1u7.c
@@ -0,0 +1,116 @@
+/*
+ * Single-precision SVE cbrt(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f32.h"
+
+const static struct data
+{
+ float32_t poly[4];
+ float32_t table[5];
+ float32_t one_third, two_thirds;
+} data = {
+ /* Very rough approximation of cbrt(x) in [0.5, 1], generated with FPMinimax.
+ */
+ .poly = { 0x1.c14e96p-2, 0x1.dd2d3p-1, -0x1.08e81ap-1,
+ 0x1.2c74c2p-3, },
+ /* table[i] = 2^((i - 2) / 3). */
+ .table = { 0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0 },
+ .one_third = 0x1.555556p-2f,
+ .two_thirds = 0x1.555556p-1f,
+};
+
+#define SmallestNormal 0x00800000
+#define Thresh 0x7f000000 /* asuint(INFINITY) - SmallestNormal. */
+#define MantissaMask 0x007fffff
+#define HalfExp 0x3f000000
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (cbrtf, x, y, special);
+}
+
+static inline svfloat32_t
+shifted_lookup (const svbool_t pg, const float32_t *table, svint32_t i)
+{
+ return svld1_gather_index (pg, table, svadd_x (pg, i, 2));
+}
+
+/* Approximation for vector single-precision cbrt(x) using Newton iteration
+ with initial guess obtained by a low-order polynomial. Greatest error
+ is 1.64 ULP. This is observed for every value where the mantissa is
+ 0x1.85a2aa and the exponent is a multiple of 3, for example:
+ _ZGVsMxv_cbrtf (0x1.85a2aap+3) got 0x1.267936p+1
+ want 0x1.267932p+1. */
+svfloat32_t SV_NAME_F1 (cbrt) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t iax = svreinterpret_u32 (ax);
+ svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+
+ /* Subnormal, +/-0 and special values. */
+ svbool_t special = svcmpge (pg, svsub_x (pg, iax, SmallestNormal), Thresh);
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexpf, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ svfloat32_t m = svreinterpret_f32 (svorr_x (
+ pg, svand_x (pg, svreinterpret_u32 (x), MantissaMask), HalfExp));
+ svint32_t e = svsub_x (pg, svreinterpret_s32 (svlsr_x (pg, iax, 23)), 126);
+
+ /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
+ the less accurate the next stage of the algorithm needs to be. An order-4
+ polynomial is enough for one Newton iteration. */
+ svfloat32_t p
+ = sv_pairwise_poly_3_f32_x (pg, m, svmul_x (pg, m, m), d->poly);
+
+ /* One iteration of Newton's method for iteratively approximating cbrt. */
+ svfloat32_t m_by_3 = svmul_x (pg, m, d->one_third);
+ svfloat32_t a = svmla_x (pg, svdiv_x (pg, m_by_3, svmul_x (pg, p, p)), p,
+ d->two_thirds);
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+ is an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+ svfloat32_t ef = svmul_x (pg, svcvt_f32_x (pg, e), d->one_third);
+ svint32_t ey = svcvt_s32_x (pg, ef);
+ svint32_t em3 = svmls_x (pg, e, ey, 3);
+
+ svfloat32_t my = shifted_lookup (pg, d->table, em3);
+ my = svmul_x (pg, my, a);
+
+ /* Vector version of ldexpf. */
+ svfloat32_t y = svscale_x (pg, my, ey);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (
+ x, svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign)),
+ special);
+
+ /* Copy sign. */
+ return svreinterpret_f32 (svorr_x (pg, svreinterpret_u32 (y), sign));
+}
+
+PL_SIG (SV, F, 1, cbrt, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_F1 (cbrt), 1.15)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cbrt), 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cexpi_3u5.c b/contrib/arm-optimized-routines/pl/math/sv_cexpi_3u5.c
new file mode 100644
index 000000000000..920acfea5da0
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_cexpi_3u5.c
@@ -0,0 +1,45 @@
+/*
+ * Double-precision vector cexpi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_sincos_common.h"
+#include "sv_math.h"
+#include "pl_test.h"
+
+static svfloat64x2_t NOINLINE
+special_case (svfloat64_t x, svbool_t special, svfloat64x2_t y)
+{
+ return svcreate2 (sv_call_f64 (sin, x, svget2 (y, 0), special),
+ sv_call_f64 (cos, x, svget2 (y, 1), special));
+}
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate polynomials.
+ Largest observed error is for sin, 3.22 ULP:
+ sv_cexpi_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
+ want -0x1.ffe9537d5dbb4p-3. */
+svfloat64x2_t
+_ZGVsMxv_cexpi (svfloat64_t x, svbool_t pg)
+{
+ const struct sv_sincos_data *d = ptr_barrier (&sv_sincos_data);
+ svbool_t special = check_ge_rangeval (pg, x, d);
+
+ svfloat64x2_t sc = sv_sincos_inline (pg, x, d);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, special, sc);
+ return sc;
+}
+
+PL_TEST_ULP (_ZGVsMxv_cexpi_sin, 2.73)
+PL_TEST_ULP (_ZGVsMxv_cexpi_cos, 2.73)
+#define SV_CEXPI_INTERVAL(lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVsMxv_cexpi_sin, lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVsMxv_cexpi_cos, lo, hi, n)
+SV_CEXPI_INTERVAL (0, 0x1p23, 500000)
+SV_CEXPI_INTERVAL (-0, -0x1p23, 500000)
+SV_CEXPI_INTERVAL (0x1p23, inf, 10000)
+SV_CEXPI_INTERVAL (-0x1p23, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cexpif_1u8.c b/contrib/arm-optimized-routines/pl/math/sv_cexpif_1u8.c
new file mode 100644
index 000000000000..93f2f998cb38
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_cexpif_1u8.c
@@ -0,0 +1,47 @@
+/*
+ * Single-precision vector cexpi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_sincosf_common.h"
+#include "sv_math.h"
+#include "pl_test.h"
+
+static svfloat32x2_t NOINLINE
+special_case (svfloat32_t x, svbool_t special, svfloat32x2_t y)
+{
+ return svcreate2 (sv_call_f32 (sinf, x, svget2 (y, 0), special),
+ sv_call_f32 (cosf, x, svget2 (y, 1), special));
+}
+
+/* Single-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate low-order
+ polynomials.
+ Worst-case error for sin is 1.67 ULP:
+ v_cexpif_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
+ Worst-case error for cos is 1.81 ULP:
+ v_cexpif_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6. */
+svfloat32x2_t
+_ZGVsMxv_cexpif (svfloat32_t x, svbool_t pg)
+{
+ const struct sv_sincosf_data *d = ptr_barrier (&sv_sincosf_data);
+ svbool_t special = check_ge_rangeval (pg, x, d);
+
+ svfloat32x2_t sc = sv_sincosf_inline (pg, x, d);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, special, sc);
+ return sc;
+}
+
+PL_TEST_ULP (_ZGVsMxv_cexpif_sin, 1.17)
+PL_TEST_ULP (_ZGVsMxv_cexpif_cos, 1.31)
+#define SV_CEXPIF_INTERVAL(lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVsMxv_cexpif_sin, lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVsMxv_cexpif_cos, lo, hi, n)
+SV_CEXPIF_INTERVAL (0, 0x1p20, 500000)
+SV_CEXPIF_INTERVAL (-0, -0x1p20, 500000)
+SV_CEXPIF_INTERVAL (0x1p20, inf, 10000)
+SV_CEXPIF_INTERVAL (-0x1p20, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cos_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_cos_2u5.c
new file mode 100644
index 000000000000..76af3459b3f2
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_cos_2u5.c
@@ -0,0 +1,86 @@
+/*
+ * Double-precision SVE cos(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ double inv_pio2, pio2_1, pio2_2, pio2_3, shift;
+} data = {
+ /* Polynomial coefficients are hardwired in FTMAD instructions. */
+ .inv_pio2 = 0x1.45f306dc9c882p-1,
+ .pio2_1 = 0x1.921fb50000000p+0,
+ .pio2_2 = 0x1.110b460000000p-26,
+ .pio2_3 = 0x1.1a62633145c07p-54,
+ /* Original shift used in AdvSIMD cos,
+ plus a contribution to set the bit #0 of q
+ as expected by trigonometric instructions. */
+ .shift = 0x1.8000000000001p52
+};
+
+#define RangeVal 0x4160000000000000 /* asuint64 (0x1p23). */
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t oob)
+{
+ return sv_call_f64 (cos, x, y, oob);
+}
+
+/* A fast SVE implementation of cos based on trigonometric
+ instructions (FTMAD, FTSSEL, FTSMUL).
+ Maximum measured error: 2.108 ULPs.
+ SV_NAME_D1 (cos)(0x1.9b0ba158c98f3p+7) got -0x1.fddd4c65c7f07p-3
+ want -0x1.fddd4c65c7f05p-3. */
+svfloat64_t SV_NAME_D1 (cos) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat64_t r = svabs_x (pg, x);
+ svbool_t oob = svcmpge (pg, svreinterpret_u64 (r), RangeVal);
+
+ /* Load some constants in quad-word chunks to minimise memory access. */
+ svbool_t ptrue = svptrue_b64 ();
+ svfloat64_t invpio2_and_pio2_1 = svld1rq (ptrue, &d->inv_pio2);
+ svfloat64_t pio2_23 = svld1rq (ptrue, &d->pio2_2);
+
+ /* n = rint(|x|/(pi/2)). */
+ svfloat64_t q = svmla_lane (sv_f64 (d->shift), r, invpio2_and_pio2_1, 0);
+ svfloat64_t n = svsub_x (pg, q, d->shift);
+
+ /* r = |x| - n*(pi/2) (range reduction into -pi/4 .. pi/4). */
+ r = svmls_lane (r, n, invpio2_and_pio2_1, 1);
+ r = svmls_lane (r, n, pio2_23, 0);
+ r = svmls_lane (r, n, pio2_23, 1);
+
+ /* cos(r) poly approx. */
+ svfloat64_t r2 = svtsmul (r, svreinterpret_u64 (q));
+ svfloat64_t y = sv_f64 (0.0);
+ y = svtmad (y, r2, 7);
+ y = svtmad (y, r2, 6);
+ y = svtmad (y, r2, 5);
+ y = svtmad (y, r2, 4);
+ y = svtmad (y, r2, 3);
+ y = svtmad (y, r2, 2);
+ y = svtmad (y, r2, 1);
+ y = svtmad (y, r2, 0);
+
+ /* Final multiplicative factor: 1.0 or x depending on bit #0 of q. */
+ svfloat64_t f = svtssel (r, svreinterpret_u64 (q));
+
+ if (unlikely (svptest_any (pg, oob)))
+ return special_case (x, svmul_x (svnot_z (pg, oob), y, f), oob);
+
+ /* Apply factor. */
+ return svmul_x (pg, f, y);
+}
+
+PL_SIG (SV, D, 1, cos, -3.1, 3.1)
+PL_TEST_ULP (SV_NAME_D1 (cos), 1.61)
+PL_TEST_INTERVAL (SV_NAME_D1 (cos), 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (SV_NAME_D1 (cos), 0x1p-4, 0x1p4, 500000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cosf_2u1.c b/contrib/arm-optimized-routines/pl/math/sv_cosf_2u1.c
new file mode 100644
index 000000000000..4bdb0dd146bb
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_cosf_2u1.c
@@ -0,0 +1,80 @@
+/*
+ * Single-precision SVE cos(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float neg_pio2_1, neg_pio2_2, neg_pio2_3, inv_pio2, shift;
+} data = {
+ /* Polynomial coefficients are hard-wired in FTMAD instructions. */
+ .neg_pio2_1 = -0x1.921fb6p+0f,
+ .neg_pio2_2 = 0x1.777a5cp-25f,
+ .neg_pio2_3 = 0x1.ee59dap-50f,
+ .inv_pio2 = 0x1.45f306p-1f,
+ /* Original shift used in AdvSIMD cosf,
+ plus a contribution to set the bit #0 of q
+ as expected by trigonometric instructions. */
+ .shift = 0x1.800002p+23f
+};
+
+#define RangeVal 0x49800000 /* asuint32(0x1p20f). */
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t oob)
+{
+ return sv_call_f32 (cosf, x, y, oob);
+}
+
+/* A fast SVE implementation of cosf based on trigonometric
+ instructions (FTMAD, FTSSEL, FTSMUL).
+ Maximum measured error: 2.06 ULPs.
+ SV_NAME_F1 (cos)(0x1.dea2f2p+19) got 0x1.fffe7ap-6
+ want 0x1.fffe76p-6. */
+svfloat32_t SV_NAME_F1 (cos) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat32_t r = svabs_x (pg, x);
+ svbool_t oob = svcmpge (pg, svreinterpret_u32 (r), RangeVal);
+
+ /* Load some constants in quad-word chunks to minimise memory access. */
+ svfloat32_t negpio2_and_invpio2 = svld1rq (svptrue_b32 (), &d->neg_pio2_1);
+
+ /* n = rint(|x|/(pi/2)). */
+ svfloat32_t q = svmla_lane (sv_f32 (d->shift), r, negpio2_and_invpio2, 3);
+ svfloat32_t n = svsub_x (pg, q, d->shift);
+
+ /* r = |x| - n*(pi/2) (range reduction into -pi/4 .. pi/4). */
+ r = svmla_lane (r, n, negpio2_and_invpio2, 0);
+ r = svmla_lane (r, n, negpio2_and_invpio2, 1);
+ r = svmla_lane (r, n, negpio2_and_invpio2, 2);
+
+ /* Final multiplicative factor: 1.0 or x depending on bit #0 of q. */
+ svfloat32_t f = svtssel (r, svreinterpret_u32 (q));
+
+ /* cos(r) poly approx. */
+ svfloat32_t r2 = svtsmul (r, svreinterpret_u32 (q));
+ svfloat32_t y = sv_f32 (0.0f);
+ y = svtmad (y, r2, 4);
+ y = svtmad (y, r2, 3);
+ y = svtmad (y, r2, 2);
+ y = svtmad (y, r2, 1);
+ y = svtmad (y, r2, 0);
+
+ if (unlikely (svptest_any (pg, oob)))
+ return special_case (x, svmul_x (svnot_z (pg, oob), f, y), oob);
+ /* Apply factor. */
+ return svmul_x (pg, f, y);
+}
+
+PL_SIG (SV, F, 1, cos, -3.1, 3.1)
+PL_TEST_ULP (SV_NAME_F1 (cos), 1.57)
+PL_TEST_INTERVAL (SV_NAME_F1 (cos), 0, 0xffff0000, 10000)
+PL_TEST_INTERVAL (SV_NAME_F1 (cos), 0x1p-4, 0x1p4, 500000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cosh_2u.c b/contrib/arm-optimized-routines/pl/math/sv_cosh_2u.c
new file mode 100644
index 000000000000..a6d743fb9b96
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_cosh_2u.c
@@ -0,0 +1,100 @@
+/*
+ * Double-precision SVE cosh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64_t poly[3];
+ float64_t inv_ln2, ln2_hi, ln2_lo, shift, thres;
+ uint64_t index_mask, special_bound;
+} data = {
+ .poly = { 0x1.fffffffffffd4p-2, 0x1.5555571d6b68cp-3,
+ 0x1.5555576a59599p-5, },
+
+ .inv_ln2 = 0x1.71547652b82fep8, /* N/ln2. */
+ /* -ln2/N. */
+ .ln2_hi = -0x1.62e42fefa39efp-9,
+ .ln2_lo = -0x1.abc9e3b39803f3p-64,
+ .shift = 0x1.8p+52,
+ .thres = 704.0,
+
+ .index_mask = 0xff,
+ /* 0x1.6p9, above which exp overflows. */
+ .special_bound = 0x4086000000000000,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (cosh, x, y, special);
+}
+
+/* Helper for approximating exp(x). Copied from sv_exp_tail, with no
+ special-case handling or tail. */
+static inline svfloat64_t
+exp_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
+{
+ /* Calculate exp(x). */
+ svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+ svfloat64_t n = svsub_x (pg, z, d->shift);
+
+ svfloat64_t r = svmla_x (pg, x, n, d->ln2_hi);
+ r = svmla_x (pg, r, n, d->ln2_lo);
+
+ svuint64_t u = svreinterpret_u64 (z);
+ svuint64_t e = svlsl_x (pg, u, 52 - V_EXP_TAIL_TABLE_BITS);
+ svuint64_t i = svand_x (pg, u, d->index_mask);
+
+ svfloat64_t y = svmla_x (pg, sv_f64 (d->poly[1]), r, d->poly[2]);
+ y = svmla_x (pg, sv_f64 (d->poly[0]), r, y);
+ y = svmla_x (pg, sv_f64 (1.0), r, y);
+ y = svmul_x (pg, r, y);
+
+ /* s = 2^(n/N). */
+ u = svld1_gather_index (pg, __v_exp_tail_data, i);
+ svfloat64_t s = svreinterpret_f64 (svadd_x (pg, u, e));
+
+ return svmla_x (pg, s, s, y);
+}
+
+/* Approximation for SVE double-precision cosh(x) using exp_inline.
+ cosh(x) = (exp(x) + exp(-x)) / 2.
+ The greatest observed error is in the scalar fall-back region, so is the
+ same as the scalar routine, 1.93 ULP:
+ _ZGVsMxv_cosh (0x1.628ad45039d2fp+9) got 0x1.fd774e958236dp+1021
+ want 0x1.fd774e958236fp+1021.
+
+ The greatest observed error in the non-special region is 1.54 ULP:
+ _ZGVsMxv_cosh (0x1.ba5651dd4486bp+2) got 0x1.f5e2bb8d5c98fp+8
+ want 0x1.f5e2bb8d5c991p+8. */
+svfloat64_t SV_NAME_D1 (cosh) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat64_t ax = svabs_x (pg, x);
+ svbool_t special = svcmpgt (pg, svreinterpret_u64 (ax), d->special_bound);
+
+ /* Up to the point that exp overflows, we can use it to calculate cosh by
+ exp(|x|) / 2 + 1 / (2 * exp(|x|)). */
+ svfloat64_t t = exp_inline (ax, pg, d);
+ svfloat64_t half_t = svmul_x (pg, t, 0.5);
+ svfloat64_t half_over_t = svdivr_x (pg, t, 0.5);
+
+ /* Fall back to scalar for any special cases. */
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svadd_x (pg, half_t, half_over_t), special);
+
+ return svadd_x (pg, half_t, half_over_t);
+}
+
+PL_SIG (SV, D, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_D1 (cosh), 1.43)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_coshf_2u.c b/contrib/arm-optimized-routines/pl/math/sv_coshf_2u.c
new file mode 100644
index 000000000000..81680fef318e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_coshf_2u.c
@@ -0,0 +1,56 @@
+/*
+ * Single-precision SVE cosh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#include "sv_expf_inline.h"
+
+static const struct data
+{
+ struct sv_expf_data expf_consts;
+ uint32_t special_bound;
+} data = {
+ .expf_consts = SV_EXPF_DATA,
+ /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */
+ .special_bound = 0x42ad496c,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t pg)
+{
+ return sv_call_f32 (coshf, x, y, pg);
+}
+
+/* Single-precision vector cosh, using vector expf.
+ Maximum error is 1.89 ULP:
+ _ZGVsMxv_coshf (-0x1.65898cp+6) got 0x1.f00aep+127
+ want 0x1.f00adcp+127. */
+svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat32_t ax = svabs_x (pg, x);
+ svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->special_bound);
+
+ /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */
+ svfloat32_t t = expf_inline (ax, pg, &d->expf_consts);
+ svfloat32_t half_t = svmul_x (pg, t, 0.5);
+ svfloat32_t half_over_t = svdivr_x (pg, t, 0.5);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svadd_x (pg, half_t, half_over_t), special);
+
+ return svadd_x (pg, half_t, half_over_t);
+}
+
+PL_SIG (SV, F, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_F1 (cosh), 1.39)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1p-63, 100)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0, 0x1.5a92d8p+6, 80000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cospi_3u2.c b/contrib/arm-optimized-routines/pl/math/sv_cospi_3u2.c
new file mode 100644
index 000000000000..d80f899c41e4
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_cospi_3u2.c
@@ -0,0 +1,63 @@
+/*
+ * Double-precision SVE cospi(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f64.h"
+
+static const struct data
+{
+ double poly[10];
+ double range_val;
+} data = {
+ /* Polynomial coefficients generated using Remez algorithm,
+ see sinpi.sollya for details. */
+ .poly = { 0x1.921fb54442d184p1, -0x1.4abbce625be53p2, 0x1.466bc6775ab16p1,
+ -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
+ 0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16,
+ 0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 },
+ .range_val = 0x1p53,
+};
+
+/* A fast SVE implementation of cospi.
+ Maximum error 3.20 ULP:
+ _ZGVsMxv_cospi(0x1.f18ba32c63159p-6) got 0x1.fdabf595f9763p-1
+ want 0x1.fdabf595f9766p-1. */
+svfloat64_t SV_NAME_D1 (cospi) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Using cospi(x) = sinpi(0.5 - x)
+ range reduction and offset into sinpi range -1/2 .. 1/2
+ r = 0.5 - |x - rint(x)|. */
+ svfloat64_t n = svrinta_x (pg, x);
+ svfloat64_t r = svsub_x (pg, x, n);
+ r = svsub_x (pg, sv_f64 (0.5), svabs_x (pg, r));
+
+ /* Result should be negated based on if n is odd or not.
+ If ax >= 2^53, the result will always be positive. */
+ svbool_t cmp = svaclt (pg, x, d->range_val);
+ svuint64_t intn = svreinterpret_u64 (svcvt_s64_z (pg, n));
+ svuint64_t sign = svlsl_z (cmp, intn, 63);
+
+ /* y = sin(r). */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t r4 = svmul_x (pg, r2, r2);
+ svfloat64_t y = sv_pw_horner_9_f64_x (pg, r2, r4, d->poly);
+ y = svmul_x (pg, y, r);
+
+ return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+}
+
+PL_SIG (SV, D, 1, cospi, -0.9, 0.9)
+PL_TEST_ULP (SV_NAME_D1 (cospi), 2.71)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0, 0x1p-63, 5000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (cospi), 0x1p51, inf, 100000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_cospif_2u6.c b/contrib/arm-optimized-routines/pl/math/sv_cospif_2u6.c
new file mode 100644
index 000000000000..fb2922d0533a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_cospif_2u6.c
@@ -0,0 +1,59 @@
+/*
+ * Single-precision SVE cospi(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f32.h"
+
+static const struct data
+{
+ float poly[6];
+ float range_val;
+} data = {
+ /* Taylor series coefficents for sin(pi * x). */
+ .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
+ 0x1.50783p-4f, -0x1.e30750p-8f },
+ .range_val = 0x1p31f,
+};
+
+/* A fast SVE implementation of cospif.
+ Maximum error: 2.60 ULP:
+ _ZGVsMxv_cospif(+/-0x1.cae664p-4) got 0x1.e09c9ep-1
+ want 0x1.e09c98p-1. */
+svfloat32_t SV_NAME_F1 (cospi) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Using cospi(x) = sinpi(0.5 - x)
+ range reduction and offset into sinpi range -1/2 .. 1/2
+ r = 0.5 - |x - rint(x)|. */
+ svfloat32_t n = svrinta_x (pg, x);
+ svfloat32_t r = svsub_x (pg, x, n);
+ r = svsub_x (pg, sv_f32 (0.5f), svabs_x (pg, r));
+
+ /* Result should be negated based on if n is odd or not.
+ If ax >= 2^31, the result will always be positive. */
+ svbool_t cmp = svaclt (pg, x, d->range_val);
+ svuint32_t intn = svreinterpret_u32 (svcvt_s32_x (pg, n));
+ svuint32_t sign = svlsl_z (cmp, intn, 31);
+
+ /* y = sin(r). */
+ svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t y = sv_horner_5_f32_x (pg, r2, d->poly);
+ y = svmul_x (pg, y, r);
+
+ return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
+}
+
+PL_SIG (SV, F, 1, cospi, -0.9, 0.9)
+PL_TEST_ULP (SV_NAME_F1 (cospi), 2.08)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0, 0x1p-31, 5000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0.5, 0x1p31f, 10000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (cospi), 0x1p31f, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erf_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_erf_2u5.c
new file mode 100644
index 000000000000..cbf9718e5bb0
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_erf_2u5.c
@@ -0,0 +1,111 @@
+/*
+ * Double-precision vector erf(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ double third;
+ double tenth, two_over_five, two_over_fifteen;
+ double two_over_nine, two_over_fortyfive;
+ double max, shift;
+} data = {
+ .third = 0x1.5555555555556p-2, /* used to compute 2/3 and 1/6 too. */
+ .two_over_fifteen = 0x1.1111111111111p-3,
+ .tenth = -0x1.999999999999ap-4,
+ .two_over_five = -0x1.999999999999ap-2,
+ .two_over_nine = -0x1.c71c71c71c71cp-3,
+ .two_over_fortyfive = 0x1.6c16c16c16c17p-5,
+ .max = 5.9921875, /* 6 - 1/128. */
+ .shift = 0x1p45,
+};
+
+#define SignMask (0x8000000000000000)
+
+/* Double-precision implementation of vector erf(x).
+ Approximation based on series expansion near x rounded to
+ nearest multiple of 1/128.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+ erf(x) ~ erf(r) + scale * d * [
+ + 1
+ - r d
+ + 1/3 (2 r^2 - 1) d^2
+ - 1/6 (r (2 r^2 - 3)) d^3
+ + 1/30 (4 r^4 - 12 r^2 + 3) d^4
+ - 1/90 (4 r^4 - 20 r^2 + 15) d^5
+ ]
+
+ Maximum measure error: 2.29 ULP
+ _ZGVsMxv_erf(-0x1.00003c924e5d1p-8) got -0x1.20dd59132ebadp-8
+ want -0x1.20dd59132ebafp-8. */
+svfloat64_t SV_NAME_D1 (erf) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *dat = ptr_barrier (&data);
+
+ /* |x| >= 6.0 - 1/128. Opposite conditions except none of them catch NaNs so
+ they can be used in lookup and BSLs to yield the expected results. */
+ svbool_t a_ge_max = svacge (pg, x, dat->max);
+ svbool_t a_lt_max = svaclt (pg, x, dat->max);
+
+ /* Set r to multiple of 1/128 nearest to |x|. */
+ svfloat64_t a = svabs_x (pg, x);
+ svfloat64_t shift = sv_f64 (dat->shift);
+ svfloat64_t z = svadd_x (pg, a, shift);
+ svuint64_t i
+ = svsub_x (pg, svreinterpret_u64 (z), svreinterpret_u64 (shift));
+
+ /* Lookup without shortcut for small values but with predicate to avoid
+ segfault for large values and NaNs. */
+ svfloat64_t r = svsub_x (pg, z, shift);
+ svfloat64_t erfr = svld1_gather_index (a_lt_max, __sv_erf_data.erf, i);
+ svfloat64_t scale = svld1_gather_index (a_lt_max, __sv_erf_data.scale, i);
+
+ /* erf(x) ~ erf(r) + scale * d * poly (r, d). */
+ svfloat64_t d = svsub_x (pg, a, r);
+ svfloat64_t d2 = svmul_x (pg, d, d);
+ svfloat64_t r2 = svmul_x (pg, r, r);
+
+ /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5. */
+ svfloat64_t p1 = r;
+ svfloat64_t third = sv_f64 (dat->third);
+ svfloat64_t twothird = svmul_x (pg, third, 2.0);
+ svfloat64_t sixth = svmul_x (pg, third, 0.5);
+ svfloat64_t p2 = svmls_x (pg, third, r2, twothird);
+ svfloat64_t p3 = svmad_x (pg, r2, third, -0.5);
+ p3 = svmul_x (pg, r, p3);
+ svfloat64_t p4
+ = svmla_x (pg, sv_f64 (dat->two_over_five), r2, dat->two_over_fifteen);
+ p4 = svmls_x (pg, sv_f64 (dat->tenth), r2, p4);
+ svfloat64_t p5
+ = svmla_x (pg, sv_f64 (dat->two_over_nine), r2, dat->two_over_fortyfive);
+ p5 = svmla_x (pg, sixth, r2, p5);
+ p5 = svmul_x (pg, r, p5);
+
+ svfloat64_t p34 = svmla_x (pg, p3, d, p4);
+ svfloat64_t p12 = svmla_x (pg, p1, d, p2);
+ svfloat64_t y = svmla_x (pg, p34, d2, p5);
+ y = svmla_x (pg, p12, d2, y);
+
+ y = svmla_x (pg, erfr, scale, svmls_x (pg, d, d2, y));
+
+ /* Solves the |x| = inf and NaN cases. */
+ y = svsel (a_ge_max, sv_f64 (1.0), y);
+
+ /* Copy sign. */
+ svuint64_t ix = svreinterpret_u64 (x);
+ svuint64_t iy = svreinterpret_u64 (y);
+ svuint64_t sign = svand_x (pg, ix, SignMask);
+ return svreinterpret_f64 (svorr_x (pg, sign, iy));
+}
+
+PL_SIG (SV, D, 1, erf, -6.0, 6.0)
+PL_TEST_ULP (SV_NAME_D1 (erf), 1.79)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, 5.9921875, 40000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 5.9921875, inf, 40000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erf), 0, inf, 4000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erf_data.c b/contrib/arm-optimized-routines/pl/math/sv_erf_data.c
new file mode 100644
index 000000000000..7244aceda5a5
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_erf_data.c
@@ -0,0 +1,1558 @@
+/*
+ * Data for approximation of erf.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Lookup table used in vector erf.
+ For each possible rounded input r (multiples of 1/128), between
+ r = 0.0 and r = 6.0 (769 values):
+ - the first entry __erf_data.tab.erf contains the values of erf(r),
+ - the second entry __erf_data.tab.scale contains the values of
+ 2/sqrt(pi)*exp(-r^2). Note that indices 0 and 1 are never hit by the
+ algorithm, since lookup is performed only for x >= 1/64-1/512. */
+const struct sv_erf_data __sv_erf_data = {
+ .erf = { 0x0.0000000000000p+0,
+ 0x1.20dbf3deb1340p-7,
+ 0x1.20d77083f17a0p-6,
+ 0x1.b137e0cf584dcp-6,
+ 0x1.20c5645dd2538p-5,
+ 0x1.68e5d3bbc9526p-5,
+ 0x1.b0fafef135745p-5,
+ 0x1.f902a77bd3821p-5,
+ 0x1.207d480e90658p-4,
+ 0x1.44703e87e8593p-4,
+ 0x1.68591a1e83b5dp-4,
+ 0x1.8c36beb8a8d23p-4,
+ 0x1.b0081148a873ap-4,
+ 0x1.d3cbf7e70a4b3p-4,
+ 0x1.f78159ec8bb50p-4,
+ 0x1.0d939005f65e5p-3,
+ 0x1.1f5e1a35c3b89p-3,
+ 0x1.311fc15f56d14p-3,
+ 0x1.42d7fc2f64959p-3,
+ 0x1.548642321d7c6p-3,
+ 0x1.662a0bdf7a89fp-3,
+ 0x1.77c2d2a765f9ep-3,
+ 0x1.895010fdbdbfdp-3,
+ 0x1.9ad142662e14dp-3,
+ 0x1.ac45e37fe2526p-3,
+ 0x1.bdad72110a648p-3,
+ 0x1.cf076d1233237p-3,
+ 0x1.e05354b96ff36p-3,
+ 0x1.f190aa85540e2p-3,
+ 0x1.015f78a3dcf3dp-2,
+ 0x1.09eed6982b948p-2,
+ 0x1.127631eb8de32p-2,
+ 0x1.1af54e232d609p-2,
+ 0x1.236bef825d9a2p-2,
+ 0x1.2bd9db0f7827fp-2,
+ 0x1.343ed6989b7d9p-2,
+ 0x1.3c9aa8b84bedap-2,
+ 0x1.44ed18d9f6462p-2,
+ 0x1.4d35ef3e5372ep-2,
+ 0x1.5574f4ffac98ep-2,
+ 0x1.5da9f415ff23fp-2,
+ 0x1.65d4b75b00471p-2,
+ 0x1.6df50a8dff772p-2,
+ 0x1.760aba57a76bfp-2,
+ 0x1.7e15944d9d3e4p-2,
+ 0x1.861566f5fd3c0p-2,
+ 0x1.8e0a01cab516bp-2,
+ 0x1.95f3353cbb146p-2,
+ 0x1.9dd0d2b721f39p-2,
+ 0x1.a5a2aca209394p-2,
+ 0x1.ad68966569a87p-2,
+ 0x1.b522646bbda68p-2,
+ 0x1.bccfec24855b8p-2,
+ 0x1.c4710406a65fcp-2,
+ 0x1.cc058392a6d2dp-2,
+ 0x1.d38d4354c3bd0p-2,
+ 0x1.db081ce6e2a48p-2,
+ 0x1.e275eaf25e458p-2,
+ 0x1.e9d68931ae650p-2,
+ 0x1.f129d471eabb1p-2,
+ 0x1.f86faa9428f9dp-2,
+ 0x1.ffa7ea8eb5fd0p-2,
+ 0x1.03693a371519cp-1,
+ 0x1.06f794ab2cae7p-1,
+ 0x1.0a7ef5c18edd2p-1,
+ 0x1.0dff4f247f6c6p-1,
+ 0x1.1178930ada115p-1,
+ 0x1.14eab43841b55p-1,
+ 0x1.1855a5fd3dd50p-1,
+ 0x1.1bb95c3746199p-1,
+ 0x1.1f15cb50bc4dep-1,
+ 0x1.226ae840d4d70p-1,
+ 0x1.25b8a88b6dd7fp-1,
+ 0x1.28ff0240d52cdp-1,
+ 0x1.2c3debfd7d6c1p-1,
+ 0x1.2f755ce9a21f4p-1,
+ 0x1.32a54cb8db67bp-1,
+ 0x1.35cdb3a9a144dp-1,
+ 0x1.38ee8a84beb71p-1,
+ 0x1.3c07ca9cb4f9ep-1,
+ 0x1.3f196dcd0f135p-1,
+ 0x1.42236e79a5fa6p-1,
+ 0x1.4525c78dd5966p-1,
+ 0x1.4820747ba2dc2p-1,
+ 0x1.4b13713ad3513p-1,
+ 0x1.4dfeba47f63ccp-1,
+ 0x1.50e24ca35fd2cp-1,
+ 0x1.53be25d016a4fp-1,
+ 0x1.569243d2b3a9bp-1,
+ 0x1.595ea53035283p-1,
+ 0x1.5c2348ecc4dc3p-1,
+ 0x1.5ee02e8a71a53p-1,
+ 0x1.61955607dd15dp-1,
+ 0x1.6442bfdedd397p-1,
+ 0x1.66e86d0312e82p-1,
+ 0x1.69865ee075011p-1,
+ 0x1.6c1c9759d0e5fp-1,
+ 0x1.6eab18c74091bp-1,
+ 0x1.7131e5f496a5ap-1,
+ 0x1.73b1021fc0cb8p-1,
+ 0x1.762870f720c6fp-1,
+ 0x1.78983697dc96fp-1,
+ 0x1.7b00578c26037p-1,
+ 0x1.7d60d8c979f7bp-1,
+ 0x1.7fb9bfaed8078p-1,
+ 0x1.820b1202f27fbp-1,
+ 0x1.8454d5f25760dp-1,
+ 0x1.8697120d92a4ap-1,
+ 0x1.88d1cd474a2e0p-1,
+ 0x1.8b050ef253c37p-1,
+ 0x1.8d30debfc572ep-1,
+ 0x1.8f5544bd00c04p-1,
+ 0x1.91724951b8fc6p-1,
+ 0x1.9387f53df5238p-1,
+ 0x1.959651980da31p-1,
+ 0x1.979d67caa6631p-1,
+ 0x1.999d4192a5715p-1,
+ 0x1.9b95e8fd26abap-1,
+ 0x1.9d8768656cc42p-1,
+ 0x1.9f71ca72cffb6p-1,
+ 0x1.a1551a16aaeafp-1,
+ 0x1.a331628a45b92p-1,
+ 0x1.a506af4cc00f4p-1,
+ 0x1.a6d50c20fa293p-1,
+ 0x1.a89c850b7d54dp-1,
+ 0x1.aa5d265064366p-1,
+ 0x1.ac16fc7143263p-1,
+ 0x1.adca142b10f98p-1,
+ 0x1.af767a741088bp-1,
+ 0x1.b11c3c79bb424p-1,
+ 0x1.b2bb679ead19cp-1,
+ 0x1.b4540978921eep-1,
+ 0x1.b5e62fce16095p-1,
+ 0x1.b771e894d602ep-1,
+ 0x1.b8f741ef54f83p-1,
+ 0x1.ba764a2af2b78p-1,
+ 0x1.bbef0fbde6221p-1,
+ 0x1.bd61a1453ab44p-1,
+ 0x1.bece0d82d1a5cp-1,
+ 0x1.c034635b66e23p-1,
+ 0x1.c194b1d49a184p-1,
+ 0x1.c2ef0812fc1bdp-1,
+ 0x1.c443755820d64p-1,
+ 0x1.c5920900b5fd1p-1,
+ 0x1.c6dad2829ec62p-1,
+ 0x1.c81de16b14cefp-1,
+ 0x1.c95b455cce69dp-1,
+ 0x1.ca930e0e2a825p-1,
+ 0x1.cbc54b476248dp-1,
+ 0x1.ccf20ce0c0d27p-1,
+ 0x1.ce1962c0e0d8bp-1,
+ 0x1.cf3b5cdaf0c39p-1,
+ 0x1.d0580b2cfd249p-1,
+ 0x1.d16f7dbe41ca0p-1,
+ 0x1.d281c49d818d0p-1,
+ 0x1.d38eefdf64fddp-1,
+ 0x1.d4970f9ce00d9p-1,
+ 0x1.d59a33f19ed42p-1,
+ 0x1.d6986cfa798e7p-1,
+ 0x1.d791cad3eff01p-1,
+ 0x1.d8865d98abe01p-1,
+ 0x1.d97635600bb89p-1,
+ 0x1.da61623cb41e0p-1,
+ 0x1.db47f43b2980dp-1,
+ 0x1.dc29fb60715afp-1,
+ 0x1.dd0787a8bb39dp-1,
+ 0x1.dde0a90611a0dp-1,
+ 0x1.deb56f5f12d28p-1,
+ 0x1.df85ea8db188ep-1,
+ 0x1.e0522a5dfda73p-1,
+ 0x1.e11a3e8cf4eb8p-1,
+ 0x1.e1de36c75ba58p-1,
+ 0x1.e29e22a89d766p-1,
+ 0x1.e35a11b9b61cep-1,
+ 0x1.e4121370224ccp-1,
+ 0x1.e4c6372cd8927p-1,
+ 0x1.e5768c3b4a3fcp-1,
+ 0x1.e62321d06c5e0p-1,
+ 0x1.e6cc0709c8a0dp-1,
+ 0x1.e7714aec96534p-1,
+ 0x1.e812fc64db369p-1,
+ 0x1.e8b12a44944a8p-1,
+ 0x1.e94be342e6743p-1,
+ 0x1.e9e335fb56f87p-1,
+ 0x1.ea7730ed0bbb9p-1,
+ 0x1.eb07e27a133aap-1,
+ 0x1.eb9558e6b42cep-1,
+ 0x1.ec1fa258c4beap-1,
+ 0x1.eca6ccd709544p-1,
+ 0x1.ed2ae6489ac1ep-1,
+ 0x1.edabfc7453e63p-1,
+ 0x1.ee2a1d004692cp-1,
+ 0x1.eea5557137ae0p-1,
+ 0x1.ef1db32a2277cp-1,
+ 0x1.ef93436bc2daap-1,
+ 0x1.f006135426b26p-1,
+ 0x1.f0762fde45ee6p-1,
+ 0x1.f0e3a5e1a1788p-1,
+ 0x1.f14e8211e8c55p-1,
+ 0x1.f1b6d0fea5f4dp-1,
+ 0x1.f21c9f12f0677p-1,
+ 0x1.f27ff89525acfp-1,
+ 0x1.f2e0e9a6a8b09p-1,
+ 0x1.f33f7e43a706bp-1,
+ 0x1.f39bc242e43e6p-1,
+ 0x1.f3f5c1558b19ep-1,
+ 0x1.f44d870704911p-1,
+ 0x1.f4a31ebcd47dfp-1,
+ 0x1.f4f693b67bd77p-1,
+ 0x1.f547f10d60597p-1,
+ 0x1.f59741b4b97cfp-1,
+ 0x1.f5e4907982a07p-1,
+ 0x1.f62fe80272419p-1,
+ 0x1.f67952cff6282p-1,
+ 0x1.f6c0db3c34641p-1,
+ 0x1.f7068b7b10fd9p-1,
+ 0x1.f74a6d9a38383p-1,
+ 0x1.f78c8b812d498p-1,
+ 0x1.f7cceef15d631p-1,
+ 0x1.f80ba18636f07p-1,
+ 0x1.f848acb544e95p-1,
+ 0x1.f88419ce4e184p-1,
+ 0x1.f8bdf1fb78370p-1,
+ 0x1.f8f63e416ebffp-1,
+ 0x1.f92d077f8d56dp-1,
+ 0x1.f96256700da8ep-1,
+ 0x1.f99633a838a57p-1,
+ 0x1.f9c8a7989af0dp-1,
+ 0x1.f9f9ba8d3c733p-1,
+ 0x1.fa2974addae45p-1,
+ 0x1.fa57ddfe27376p-1,
+ 0x1.fa84fe5e05c8dp-1,
+ 0x1.fab0dd89d1309p-1,
+ 0x1.fadb831a9f9c3p-1,
+ 0x1.fb04f6868a944p-1,
+ 0x1.fb2d3f20f9101p-1,
+ 0x1.fb54641aebbc9p-1,
+ 0x1.fb7a6c834b5a2p-1,
+ 0x1.fb9f5f4739170p-1,
+ 0x1.fbc3433260ca5p-1,
+ 0x1.fbe61eef4cf6ap-1,
+ 0x1.fc07f907bc794p-1,
+ 0x1.fc28d7e4f9cd0p-1,
+ 0x1.fc48c1d033c7ap-1,
+ 0x1.fc67bcf2d7b8fp-1,
+ 0x1.fc85cf56ecd38p-1,
+ 0x1.fca2fee770c79p-1,
+ 0x1.fcbf5170b578bp-1,
+ 0x1.fcdacca0bfb73p-1,
+ 0x1.fcf57607a6e7cp-1,
+ 0x1.fd0f5317f582fp-1,
+ 0x1.fd2869270a56fp-1,
+ 0x1.fd40bd6d7a785p-1,
+ 0x1.fd58550773cb5p-1,
+ 0x1.fd6f34f52013ap-1,
+ 0x1.fd85621b0876dp-1,
+ 0x1.fd9ae142795e3p-1,
+ 0x1.fdafb719e6a69p-1,
+ 0x1.fdc3e835500b3p-1,
+ 0x1.fdd7790ea5bc0p-1,
+ 0x1.fdea6e062d0c9p-1,
+ 0x1.fdfccb62e52d3p-1,
+ 0x1.fe0e9552ebdd6p-1,
+ 0x1.fe1fcfebe2083p-1,
+ 0x1.fe307f2b503d0p-1,
+ 0x1.fe40a6f70af4bp-1,
+ 0x1.fe504b1d9696cp-1,
+ 0x1.fe5f6f568b301p-1,
+ 0x1.fe6e1742f7cf6p-1,
+ 0x1.fe7c466dc57a1p-1,
+ 0x1.fe8a004c19ae6p-1,
+ 0x1.fe97483db8670p-1,
+ 0x1.fea4218d6594ap-1,
+ 0x1.feb08f7146046p-1,
+ 0x1.febc950b3fa75p-1,
+ 0x1.fec835695932ep-1,
+ 0x1.fed37386190fbp-1,
+ 0x1.fede5248e38f4p-1,
+ 0x1.fee8d486585eep-1,
+ 0x1.fef2fd00af31ap-1,
+ 0x1.fefcce6813974p-1,
+ 0x1.ff064b5afffbep-1,
+ 0x1.ff0f766697c76p-1,
+ 0x1.ff18520700971p-1,
+ 0x1.ff20e0a7ba8c2p-1,
+ 0x1.ff2924a3f7a83p-1,
+ 0x1.ff312046f2339p-1,
+ 0x1.ff38d5cc4227fp-1,
+ 0x1.ff404760319b4p-1,
+ 0x1.ff47772010262p-1,
+ 0x1.ff4e671a85425p-1,
+ 0x1.ff55194fe19dfp-1,
+ 0x1.ff5b8fb26f5f6p-1,
+ 0x1.ff61cc26c1578p-1,
+ 0x1.ff67d08401202p-1,
+ 0x1.ff6d9e943c231p-1,
+ 0x1.ff733814af88cp-1,
+ 0x1.ff789eb6130c9p-1,
+ 0x1.ff7dd41ce2b4dp-1,
+ 0x1.ff82d9e1a76d8p-1,
+ 0x1.ff87b1913e853p-1,
+ 0x1.ff8c5cad200a5p-1,
+ 0x1.ff90dcaba4096p-1,
+ 0x1.ff9532f846ab0p-1,
+ 0x1.ff9960f3eb327p-1,
+ 0x1.ff9d67f51ddbap-1,
+ 0x1.ffa14948549a7p-1,
+ 0x1.ffa506302ebaep-1,
+ 0x1.ffa89fe5b3625p-1,
+ 0x1.ffac17988ef4bp-1,
+ 0x1.ffaf6e6f4f5c0p-1,
+ 0x1.ffb2a5879f35ep-1,
+ 0x1.ffb5bdf67fe6fp-1,
+ 0x1.ffb8b8c88295fp-1,
+ 0x1.ffbb970200110p-1,
+ 0x1.ffbe599f4f9d9p-1,
+ 0x1.ffc10194fcb64p-1,
+ 0x1.ffc38fcffbb7cp-1,
+ 0x1.ffc60535dd7f5p-1,
+ 0x1.ffc862a501fd7p-1,
+ 0x1.ffcaa8f4c9beap-1,
+ 0x1.ffccd8f5c66d1p-1,
+ 0x1.ffcef371ea4d7p-1,
+ 0x1.ffd0f92cb6ba7p-1,
+ 0x1.ffd2eae369a07p-1,
+ 0x1.ffd4c94d29fdbp-1,
+ 0x1.ffd6951b33686p-1,
+ 0x1.ffd84ef9009eep-1,
+ 0x1.ffd9f78c7524ap-1,
+ 0x1.ffdb8f7605ee7p-1,
+ 0x1.ffdd1750e1220p-1,
+ 0x1.ffde8fb314ebfp-1,
+ 0x1.ffdff92db56e5p-1,
+ 0x1.ffe1544d01ccbp-1,
+ 0x1.ffe2a1988857cp-1,
+ 0x1.ffe3e19349dc7p-1,
+ 0x1.ffe514bbdc197p-1,
+ 0x1.ffe63b8c8b5f7p-1,
+ 0x1.ffe7567b7b5e1p-1,
+ 0x1.ffe865fac722bp-1,
+ 0x1.ffe96a78a04a9p-1,
+ 0x1.ffea645f6d6dap-1,
+ 0x1.ffeb5415e7c44p-1,
+ 0x1.ffec39ff380b9p-1,
+ 0x1.ffed167b12ac2p-1,
+ 0x1.ffede9e5d3262p-1,
+ 0x1.ffeeb49896c6dp-1,
+ 0x1.ffef76e956a9fp-1,
+ 0x1.fff0312b010b5p-1,
+ 0x1.fff0e3ad91ec2p-1,
+ 0x1.fff18ebe2b0e1p-1,
+ 0x1.fff232a72b48ep-1,
+ 0x1.fff2cfb0453d9p-1,
+ 0x1.fff3661e9569dp-1,
+ 0x1.fff3f634b79f9p-1,
+ 0x1.fff48032dbe40p-1,
+ 0x1.fff50456dab8cp-1,
+ 0x1.fff582dc48d30p-1,
+ 0x1.fff5fbfc8a439p-1,
+ 0x1.fff66feee5129p-1,
+ 0x1.fff6dee89352ep-1,
+ 0x1.fff7491cd4af6p-1,
+ 0x1.fff7aebcff755p-1,
+ 0x1.fff80ff8911fdp-1,
+ 0x1.fff86cfd3e657p-1,
+ 0x1.fff8c5f702ccfp-1,
+ 0x1.fff91b102fca8p-1,
+ 0x1.fff96c717b695p-1,
+ 0x1.fff9ba420e834p-1,
+ 0x1.fffa04a7928b1p-1,
+ 0x1.fffa4bc63ee9ap-1,
+ 0x1.fffa8fc0e5f33p-1,
+ 0x1.fffad0b901755p-1,
+ 0x1.fffb0ecebee1bp-1,
+ 0x1.fffb4a210b172p-1,
+ 0x1.fffb82cd9dcbfp-1,
+ 0x1.fffbb8f1049c6p-1,
+ 0x1.fffbeca6adbe9p-1,
+ 0x1.fffc1e08f25f5p-1,
+ 0x1.fffc4d3120aa1p-1,
+ 0x1.fffc7a37857d2p-1,
+ 0x1.fffca53375ce3p-1,
+ 0x1.fffcce3b57bffp-1,
+ 0x1.fffcf564ab6b7p-1,
+ 0x1.fffd1ac4135f9p-1,
+ 0x1.fffd3e6d5cd87p-1,
+ 0x1.fffd607387b07p-1,
+ 0x1.fffd80e8ce0dap-1,
+ 0x1.fffd9fdeabccep-1,
+ 0x1.fffdbd65e5ad0p-1,
+ 0x1.fffdd98e903b2p-1,
+ 0x1.fffdf46816833p-1,
+ 0x1.fffe0e0140857p-1,
+ 0x1.fffe26683972ap-1,
+ 0x1.fffe3daa95b18p-1,
+ 0x1.fffe53d558ae9p-1,
+ 0x1.fffe68f4fa777p-1,
+ 0x1.fffe7d156d244p-1,
+ 0x1.fffe904222101p-1,
+ 0x1.fffea2860ee1ep-1,
+ 0x1.fffeb3ebb267bp-1,
+ 0x1.fffec47d19457p-1,
+ 0x1.fffed443e2787p-1,
+ 0x1.fffee34943b15p-1,
+ 0x1.fffef1960d85dp-1,
+ 0x1.fffeff32af7afp-1,
+ 0x1.ffff0c273bea2p-1,
+ 0x1.ffff187b6bc0ep-1,
+ 0x1.ffff2436a21dcp-1,
+ 0x1.ffff2f5fefcaap-1,
+ 0x1.ffff39fe16963p-1,
+ 0x1.ffff44178c8d2p-1,
+ 0x1.ffff4db27f146p-1,
+ 0x1.ffff56d4d5e5ep-1,
+ 0x1.ffff5f8435efcp-1,
+ 0x1.ffff67c604180p-1,
+ 0x1.ffff6f9f67e55p-1,
+ 0x1.ffff77154e0d6p-1,
+ 0x1.ffff7e2c6aea2p-1,
+ 0x1.ffff84e93cd75p-1,
+ 0x1.ffff8b500e77cp-1,
+ 0x1.ffff9164f8e46p-1,
+ 0x1.ffff972be5c59p-1,
+ 0x1.ffff9ca891572p-1,
+ 0x1.ffffa1de8c582p-1,
+ 0x1.ffffa6d13de73p-1,
+ 0x1.ffffab83e54b8p-1,
+ 0x1.ffffaff99bac4p-1,
+ 0x1.ffffb43555b5fp-1,
+ 0x1.ffffb839e52f3p-1,
+ 0x1.ffffbc09fa7cdp-1,
+ 0x1.ffffbfa82616bp-1,
+ 0x1.ffffc316d9ed0p-1,
+ 0x1.ffffc6586abf6p-1,
+ 0x1.ffffc96f1165ep-1,
+ 0x1.ffffcc5cec0c1p-1,
+ 0x1.ffffcf23ff5fcp-1,
+ 0x1.ffffd1c637b2bp-1,
+ 0x1.ffffd4456a10dp-1,
+ 0x1.ffffd6a3554a1p-1,
+ 0x1.ffffd8e1a2f22p-1,
+ 0x1.ffffdb01e8546p-1,
+ 0x1.ffffdd05a75eap-1,
+ 0x1.ffffdeee4f810p-1,
+ 0x1.ffffe0bd3e852p-1,
+ 0x1.ffffe273c15b7p-1,
+ 0x1.ffffe41314e06p-1,
+ 0x1.ffffe59c6698bp-1,
+ 0x1.ffffe710d565ep-1,
+ 0x1.ffffe8717232dp-1,
+ 0x1.ffffe9bf4098cp-1,
+ 0x1.ffffeafb377d5p-1,
+ 0x1.ffffec2641a9ep-1,
+ 0x1.ffffed413e5b7p-1,
+ 0x1.ffffee4d01cd6p-1,
+ 0x1.ffffef4a55bd4p-1,
+ 0x1.fffff039f9e8fp-1,
+ 0x1.fffff11ca4876p-1,
+ 0x1.fffff1f302bc1p-1,
+ 0x1.fffff2bdb904dp-1,
+ 0x1.fffff37d63a36p-1,
+ 0x1.fffff43297019p-1,
+ 0x1.fffff4dde0118p-1,
+ 0x1.fffff57fc4a95p-1,
+ 0x1.fffff618c3da6p-1,
+ 0x1.fffff6a956450p-1,
+ 0x1.fffff731ee681p-1,
+ 0x1.fffff7b2f8ed6p-1,
+ 0x1.fffff82cdcf1bp-1,
+ 0x1.fffff89ffc4aap-1,
+ 0x1.fffff90cb3c81p-1,
+ 0x1.fffff9735b73bp-1,
+ 0x1.fffff9d446cccp-1,
+ 0x1.fffffa2fc5015p-1,
+ 0x1.fffffa8621251p-1,
+ 0x1.fffffad7a2652p-1,
+ 0x1.fffffb248c39dp-1,
+ 0x1.fffffb6d1e95dp-1,
+ 0x1.fffffbb196132p-1,
+ 0x1.fffffbf22c1e2p-1,
+ 0x1.fffffc2f171e3p-1,
+ 0x1.fffffc688a9cfp-1,
+ 0x1.fffffc9eb76acp-1,
+ 0x1.fffffcd1cbc28p-1,
+ 0x1.fffffd01f36afp-1,
+ 0x1.fffffd2f57d68p-1,
+ 0x1.fffffd5a2041fp-1,
+ 0x1.fffffd8271d12p-1,
+ 0x1.fffffda86faa9p-1,
+ 0x1.fffffdcc3b117p-1,
+ 0x1.fffffdedf37edp-1,
+ 0x1.fffffe0db6b91p-1,
+ 0x1.fffffe2ba0ea5p-1,
+ 0x1.fffffe47ccb60p-1,
+ 0x1.fffffe62534d4p-1,
+ 0x1.fffffe7b4c81ep-1,
+ 0x1.fffffe92ced93p-1,
+ 0x1.fffffea8ef9cfp-1,
+ 0x1.fffffebdc2ec6p-1,
+ 0x1.fffffed15bcbap-1,
+ 0x1.fffffee3cc32cp-1,
+ 0x1.fffffef5251c2p-1,
+ 0x1.ffffff0576917p-1,
+ 0x1.ffffff14cfb92p-1,
+ 0x1.ffffff233ee1dp-1,
+ 0x1.ffffff30d18e8p-1,
+ 0x1.ffffff3d9480fp-1,
+ 0x1.ffffff4993c46p-1,
+ 0x1.ffffff54dab72p-1,
+ 0x1.ffffff5f74141p-1,
+ 0x1.ffffff6969fb8p-1,
+ 0x1.ffffff72c5fb6p-1,
+ 0x1.ffffff7b91176p-1,
+ 0x1.ffffff83d3d07p-1,
+ 0x1.ffffff8b962bep-1,
+ 0x1.ffffff92dfba2p-1,
+ 0x1.ffffff99b79d2p-1,
+ 0x1.ffffffa0248e8p-1,
+ 0x1.ffffffa62ce54p-1,
+ 0x1.ffffffabd69b4p-1,
+ 0x1.ffffffb127525p-1,
+ 0x1.ffffffb624592p-1,
+ 0x1.ffffffbad2affp-1,
+ 0x1.ffffffbf370cdp-1,
+ 0x1.ffffffc355dfdp-1,
+ 0x1.ffffffc733572p-1,
+ 0x1.ffffffcad3626p-1,
+ 0x1.ffffffce39b67p-1,
+ 0x1.ffffffd169d0cp-1,
+ 0x1.ffffffd466fa5p-1,
+ 0x1.ffffffd7344aap-1,
+ 0x1.ffffffd9d4aabp-1,
+ 0x1.ffffffdc4ad7ap-1,
+ 0x1.ffffffde9964ep-1,
+ 0x1.ffffffe0c2bf0p-1,
+ 0x1.ffffffe2c92dbp-1,
+ 0x1.ffffffe4aed5ep-1,
+ 0x1.ffffffe675bbdp-1,
+ 0x1.ffffffe81fc4ep-1,
+ 0x1.ffffffe9aeb97p-1,
+ 0x1.ffffffeb24467p-1,
+ 0x1.ffffffec81ff2p-1,
+ 0x1.ffffffedc95e7p-1,
+ 0x1.ffffffeefbc85p-1,
+ 0x1.fffffff01a8b6p-1,
+ 0x1.fffffff126e1ep-1,
+ 0x1.fffffff221f30p-1,
+ 0x1.fffffff30cd3fp-1,
+ 0x1.fffffff3e8892p-1,
+ 0x1.fffffff4b606fp-1,
+ 0x1.fffffff57632dp-1,
+ 0x1.fffffff629e44p-1,
+ 0x1.fffffff6d1e56p-1,
+ 0x1.fffffff76ef3fp-1,
+ 0x1.fffffff801c1fp-1,
+ 0x1.fffffff88af67p-1,
+ 0x1.fffffff90b2e3p-1,
+ 0x1.fffffff982fc1p-1,
+ 0x1.fffffff9f2e9fp-1,
+ 0x1.fffffffa5b790p-1,
+ 0x1.fffffffabd229p-1,
+ 0x1.fffffffb18582p-1,
+ 0x1.fffffffb6d844p-1,
+ 0x1.fffffffbbd0aap-1,
+ 0x1.fffffffc0748fp-1,
+ 0x1.fffffffc4c96cp-1,
+ 0x1.fffffffc8d462p-1,
+ 0x1.fffffffcc9a41p-1,
+ 0x1.fffffffd01f89p-1,
+ 0x1.fffffffd36871p-1,
+ 0x1.fffffffd678edp-1,
+ 0x1.fffffffd954aep-1,
+ 0x1.fffffffdbff2ap-1,
+ 0x1.fffffffde7ba0p-1,
+ 0x1.fffffffe0cd16p-1,
+ 0x1.fffffffe2f664p-1,
+ 0x1.fffffffe4fa30p-1,
+ 0x1.fffffffe6daf7p-1,
+ 0x1.fffffffe89b0cp-1,
+ 0x1.fffffffea3c9ap-1,
+ 0x1.fffffffebc1a9p-1,
+ 0x1.fffffffed2c21p-1,
+ 0x1.fffffffee7dc8p-1,
+ 0x1.fffffffefb847p-1,
+ 0x1.ffffffff0dd2bp-1,
+ 0x1.ffffffff1ede9p-1,
+ 0x1.ffffffff2ebdap-1,
+ 0x1.ffffffff3d843p-1,
+ 0x1.ffffffff4b453p-1,
+ 0x1.ffffffff58126p-1,
+ 0x1.ffffffff63fc3p-1,
+ 0x1.ffffffff6f121p-1,
+ 0x1.ffffffff79626p-1,
+ 0x1.ffffffff82fabp-1,
+ 0x1.ffffffff8be77p-1,
+ 0x1.ffffffff94346p-1,
+ 0x1.ffffffff9bec8p-1,
+ 0x1.ffffffffa319fp-1,
+ 0x1.ffffffffa9c63p-1,
+ 0x1.ffffffffaffa4p-1,
+ 0x1.ffffffffb5be5p-1,
+ 0x1.ffffffffbb1a2p-1,
+ 0x1.ffffffffc014ep-1,
+ 0x1.ffffffffc4b56p-1,
+ 0x1.ffffffffc901cp-1,
+ 0x1.ffffffffccfffp-1,
+ 0x1.ffffffffd0b56p-1,
+ 0x1.ffffffffd4271p-1,
+ 0x1.ffffffffd759dp-1,
+ 0x1.ffffffffda520p-1,
+ 0x1.ffffffffdd13cp-1,
+ 0x1.ffffffffdfa2dp-1,
+ 0x1.ffffffffe202dp-1,
+ 0x1.ffffffffe4371p-1,
+ 0x1.ffffffffe642ap-1,
+ 0x1.ffffffffe8286p-1,
+ 0x1.ffffffffe9eb0p-1,
+ 0x1.ffffffffeb8d0p-1,
+ 0x1.ffffffffed10ap-1,
+ 0x1.ffffffffee782p-1,
+ 0x1.ffffffffefc57p-1,
+ 0x1.fffffffff0fa7p-1,
+ 0x1.fffffffff218fp-1,
+ 0x1.fffffffff3227p-1,
+ 0x1.fffffffff4188p-1,
+ 0x1.fffffffff4fc9p-1,
+ 0x1.fffffffff5cfdp-1,
+ 0x1.fffffffff6939p-1,
+ 0x1.fffffffff748ep-1,
+ 0x1.fffffffff7f0dp-1,
+ 0x1.fffffffff88c5p-1,
+ 0x1.fffffffff91c6p-1,
+ 0x1.fffffffff9a1bp-1,
+ 0x1.fffffffffa1d2p-1,
+ 0x1.fffffffffa8f6p-1,
+ 0x1.fffffffffaf92p-1,
+ 0x1.fffffffffb5b0p-1,
+ 0x1.fffffffffbb58p-1,
+ 0x1.fffffffffc095p-1,
+ 0x1.fffffffffc56dp-1,
+ 0x1.fffffffffc9e8p-1,
+ 0x1.fffffffffce0dp-1,
+ 0x1.fffffffffd1e1p-1,
+ 0x1.fffffffffd56cp-1,
+ 0x1.fffffffffd8b3p-1,
+ 0x1.fffffffffdbbap-1,
+ 0x1.fffffffffde86p-1,
+ 0x1.fffffffffe11dp-1,
+ 0x1.fffffffffe380p-1,
+ 0x1.fffffffffe5b6p-1,
+ 0x1.fffffffffe7c0p-1,
+ 0x1.fffffffffe9a2p-1,
+ 0x1.fffffffffeb60p-1,
+ 0x1.fffffffffecfbp-1,
+ 0x1.fffffffffee77p-1,
+ 0x1.fffffffffefd6p-1,
+ 0x1.ffffffffff11ap-1,
+ 0x1.ffffffffff245p-1,
+ 0x1.ffffffffff359p-1,
+ 0x1.ffffffffff457p-1,
+ 0x1.ffffffffff542p-1,
+ 0x1.ffffffffff61bp-1,
+ 0x1.ffffffffff6e3p-1,
+ 0x1.ffffffffff79bp-1,
+ 0x1.ffffffffff845p-1,
+ 0x1.ffffffffff8e2p-1,
+ 0x1.ffffffffff973p-1,
+ 0x1.ffffffffff9f8p-1,
+ 0x1.ffffffffffa73p-1,
+ 0x1.ffffffffffae4p-1,
+ 0x1.ffffffffffb4cp-1,
+ 0x1.ffffffffffbadp-1,
+ 0x1.ffffffffffc05p-1,
+ 0x1.ffffffffffc57p-1,
+ 0x1.ffffffffffca2p-1,
+ 0x1.ffffffffffce7p-1,
+ 0x1.ffffffffffd27p-1,
+ 0x1.ffffffffffd62p-1,
+ 0x1.ffffffffffd98p-1,
+ 0x1.ffffffffffdcap-1,
+ 0x1.ffffffffffdf8p-1,
+ 0x1.ffffffffffe22p-1,
+ 0x1.ffffffffffe49p-1,
+ 0x1.ffffffffffe6cp-1,
+ 0x1.ffffffffffe8dp-1,
+ 0x1.ffffffffffeabp-1,
+ 0x1.ffffffffffec7p-1,
+ 0x1.ffffffffffee1p-1,
+ 0x1.ffffffffffef8p-1,
+ 0x1.fffffffffff0ep-1,
+ 0x1.fffffffffff22p-1,
+ 0x1.fffffffffff34p-1,
+ 0x1.fffffffffff45p-1,
+ 0x1.fffffffffff54p-1,
+ 0x1.fffffffffff62p-1,
+ 0x1.fffffffffff6fp-1,
+ 0x1.fffffffffff7bp-1,
+ 0x1.fffffffffff86p-1,
+ 0x1.fffffffffff90p-1,
+ 0x1.fffffffffff9ap-1,
+ 0x1.fffffffffffa2p-1,
+ 0x1.fffffffffffaap-1,
+ 0x1.fffffffffffb1p-1,
+ 0x1.fffffffffffb8p-1,
+ 0x1.fffffffffffbep-1,
+ 0x1.fffffffffffc3p-1,
+ 0x1.fffffffffffc8p-1,
+ 0x1.fffffffffffcdp-1,
+ 0x1.fffffffffffd1p-1,
+ 0x1.fffffffffffd5p-1,
+ 0x1.fffffffffffd9p-1,
+ 0x1.fffffffffffdcp-1,
+ 0x1.fffffffffffdfp-1,
+ 0x1.fffffffffffe2p-1,
+ 0x1.fffffffffffe4p-1,
+ 0x1.fffffffffffe7p-1,
+ 0x1.fffffffffffe9p-1,
+ 0x1.fffffffffffebp-1,
+ 0x1.fffffffffffedp-1,
+ 0x1.fffffffffffeep-1,
+ 0x1.ffffffffffff0p-1,
+ 0x1.ffffffffffff1p-1,
+ 0x1.ffffffffffff3p-1,
+ 0x1.ffffffffffff4p-1,
+ 0x1.ffffffffffff5p-1,
+ 0x1.ffffffffffff6p-1,
+ 0x1.ffffffffffff7p-1,
+ 0x1.ffffffffffff7p-1,
+ 0x1.ffffffffffff8p-1,
+ 0x1.ffffffffffff9p-1,
+ 0x1.ffffffffffff9p-1,
+ 0x1.ffffffffffffap-1,
+ 0x1.ffffffffffffbp-1,
+ 0x1.ffffffffffffbp-1,
+ 0x1.ffffffffffffbp-1,
+ 0x1.ffffffffffffcp-1,
+ 0x1.ffffffffffffcp-1,
+ 0x1.ffffffffffffdp-1,
+ 0x1.ffffffffffffdp-1,
+ 0x1.ffffffffffffdp-1,
+ 0x1.ffffffffffffdp-1,
+ 0x1.ffffffffffffep-1,
+ 0x1.ffffffffffffep-1,
+ 0x1.ffffffffffffep-1,
+ 0x1.ffffffffffffep-1,
+ 0x1.ffffffffffffep-1,
+ 0x1.ffffffffffffep-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.fffffffffffffp-1,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0,
+ },
+ .scale = { 0x1.20dd750429b6dp+0,
+ 0x1.20d8f1975c85dp+0,
+ 0x1.20cb67bd452c7p+0,
+ 0x1.20b4d8bac36c1p+0,
+ 0x1.209546ad13ccfp+0,
+ 0x1.206cb4897b148p+0,
+ 0x1.203b261cd0052p+0,
+ 0x1.2000a00ae3804p+0,
+ 0x1.1fbd27cdc72d3p+0,
+ 0x1.1f70c3b4f2cc7p+0,
+ 0x1.1f1b7ae44867fp+0,
+ 0x1.1ebd5552f795bp+0,
+ 0x1.1e565bca400d4p+0,
+ 0x1.1de697e413d28p+0,
+ 0x1.1d6e14099944ap+0,
+ 0x1.1cecdb718d61cp+0,
+ 0x1.1c62fa1e869b6p+0,
+ 0x1.1bd07cdd189acp+0,
+ 0x1.1b357141d95d5p+0,
+ 0x1.1a91e5a748165p+0,
+ 0x1.19e5e92b964abp+0,
+ 0x1.19318bae53a04p+0,
+ 0x1.1874ddcdfce24p+0,
+ 0x1.17aff0e56ec10p+0,
+ 0x1.16e2d7093cd8cp+0,
+ 0x1.160da304ed92fp+0,
+ 0x1.153068581b781p+0,
+ 0x1.144b3b337c90cp+0,
+ 0x1.135e3075d076bp+0,
+ 0x1.12695da8b5bdep+0,
+ 0x1.116cd8fd67618p+0,
+ 0x1.1068b94962e5ep+0,
+ 0x1.0f5d1602f7e41p+0,
+ 0x1.0e4a073dc1b91p+0,
+ 0x1.0d2fa5a70c168p+0,
+ 0x1.0c0e0a8223359p+0,
+ 0x1.0ae54fa490722p+0,
+ 0x1.09b58f724416bp+0,
+ 0x1.087ee4d9ad247p+0,
+ 0x1.07416b4fbfe7cp+0,
+ 0x1.05fd3ecbec297p+0,
+ 0x1.04b27bc403d30p+0,
+ 0x1.03613f2812dafp+0,
+ 0x1.0209a65e29545p+0,
+ 0x1.00abcf3e187a9p+0,
+ 0x1.fe8fb01a47307p-1,
+ 0x1.fbbbbef34b4b2p-1,
+ 0x1.f8dc092d58ff8p-1,
+ 0x1.f5f0cdaf15313p-1,
+ 0x1.f2fa4c16c0019p-1,
+ 0x1.eff8c4b1375dbp-1,
+ 0x1.ecec7870ebca7p-1,
+ 0x1.e9d5a8e4c934ep-1,
+ 0x1.e6b4982f158b9p-1,
+ 0x1.e38988fc46e72p-1,
+ 0x1.e054be79d3042p-1,
+ 0x1.dd167c4cf9d2ap-1,
+ 0x1.d9cf06898cdafp-1,
+ 0x1.d67ea1a8b5368p-1,
+ 0x1.d325927fb9d89p-1,
+ 0x1.cfc41e36c7df9p-1,
+ 0x1.cc5a8a3fbea40p-1,
+ 0x1.c8e91c4d01368p-1,
+ 0x1.c5701a484ef9dp-1,
+ 0x1.c1efca49a5011p-1,
+ 0x1.be68728e29d5dp-1,
+ 0x1.bada596f25436p-1,
+ 0x1.b745c55905bf8p-1,
+ 0x1.b3aafcc27502ep-1,
+ 0x1.b00a46237d5bep-1,
+ 0x1.ac63e7ecc1411p-1,
+ 0x1.a8b8287ec6a09p-1,
+ 0x1.a5074e2157620p-1,
+ 0x1.a1519efaf889ep-1,
+ 0x1.9d97610879642p-1,
+ 0x1.99d8da149c13fp-1,
+ 0x1.96164fafd8de3p-1,
+ 0x1.925007283d7aap-1,
+ 0x1.8e86458169af8p-1,
+ 0x1.8ab94f6caa71dp-1,
+ 0x1.86e9694134b9ep-1,
+ 0x1.8316d6f48133dp-1,
+ 0x1.7f41dc12c9e89p-1,
+ 0x1.7b6abbb7aaf19p-1,
+ 0x1.7791b886e7403p-1,
+ 0x1.73b714a552763p-1,
+ 0x1.6fdb11b1e0c34p-1,
+ 0x1.6bfdf0beddaf5p-1,
+ 0x1.681ff24b4ab04p-1,
+ 0x1.6441563c665d4p-1,
+ 0x1.60625bd75d07bp-1,
+ 0x1.5c8341bb23767p-1,
+ 0x1.58a445da7c74cp-1,
+ 0x1.54c5a57629db0p-1,
+ 0x1.50e79d1749ac9p-1,
+ 0x1.4d0a6889dfd9fp-1,
+ 0x1.492e42d78d2c5p-1,
+ 0x1.4553664273d24p-1,
+ 0x1.417a0c4049fd0p-1,
+ 0x1.3da26d759aef5p-1,
+ 0x1.39ccc1b136d5ap-1,
+ 0x1.35f93fe7d1b3dp-1,
+ 0x1.32281e2fd1a92p-1,
+ 0x1.2e5991bd4cbfcp-1,
+ 0x1.2a8dcede3673bp-1,
+ 0x1.26c508f6bd0ffp-1,
+ 0x1.22ff727dd6f7bp-1,
+ 0x1.1f3d3cf9ffe5ap-1,
+ 0x1.1b7e98fe26217p-1,
+ 0x1.17c3b626c7a11p-1,
+ 0x1.140cc3173f007p-1,
+ 0x1.1059ed7740313p-1,
+ 0x1.0cab61f084b93p-1,
+ 0x1.09014c2ca74dap-1,
+ 0x1.055bd6d32e8d7p-1,
+ 0x1.01bb2b87c6968p-1,
+ 0x1.fc3ee5d1524b0p-2,
+ 0x1.f511a91a67d2ap-2,
+ 0x1.edeeee0959518p-2,
+ 0x1.e6d6ffaa65a25p-2,
+ 0x1.dfca26f5bbf88p-2,
+ 0x1.d8c8aace11e63p-2,
+ 0x1.d1d2cfff91594p-2,
+ 0x1.cae8d93f1d7b6p-2,
+ 0x1.c40b0729ed547p-2,
+ 0x1.bd3998457afdap-2,
+ 0x1.b674c8ffc6283p-2,
+ 0x1.afbcd3afe8ab6p-2,
+ 0x1.a911f096fbc26p-2,
+ 0x1.a27455e14c93cp-2,
+ 0x1.9be437a7de946p-2,
+ 0x1.9561c7f23a47bp-2,
+ 0x1.8eed36b886d93p-2,
+ 0x1.8886b1e5ecfd1p-2,
+ 0x1.822e655b417e6p-2,
+ 0x1.7be47af1f5d89p-2,
+ 0x1.75a91a7f4d2edp-2,
+ 0x1.6f7c69d7d3ef8p-2,
+ 0x1.695e8cd31867ep-2,
+ 0x1.634fa54fa285fp-2,
+ 0x1.5d4fd33729015p-2,
+ 0x1.575f3483021c3p-2,
+ 0x1.517de540ce2a3p-2,
+ 0x1.4babff975a04cp-2,
+ 0x1.45e99bcbb7915p-2,
+ 0x1.4036d0468a7a2p-2,
+ 0x1.3a93b1998736cp-2,
+ 0x1.35005285227f1p-2,
+ 0x1.2f7cc3fe6f423p-2,
+ 0x1.2a09153529381p-2,
+ 0x1.24a55399ea239p-2,
+ 0x1.1f518ae487dc8p-2,
+ 0x1.1a0dc51a9934dp-2,
+ 0x1.14da0a961fd14p-2,
+ 0x1.0fb6620c550afp-2,
+ 0x1.0aa2d09497f2bp-2,
+ 0x1.059f59af7a906p-2,
+ 0x1.00abff4dec7a3p-2,
+ 0x1.f79183b101c5bp-3,
+ 0x1.edeb406d9c824p-3,
+ 0x1.e4652fadcb6b2p-3,
+ 0x1.daff4969c0b04p-3,
+ 0x1.d1b982c501370p-3,
+ 0x1.c893ce1dcbef7p-3,
+ 0x1.bf8e1b1ca2279p-3,
+ 0x1.b6a856c3ed54fp-3,
+ 0x1.ade26b7fbed95p-3,
+ 0x1.a53c4135a6526p-3,
+ 0x1.9cb5bd549b111p-3,
+ 0x1.944ec2e4f5630p-3,
+ 0x1.8c07329874652p-3,
+ 0x1.83deeada4d25ap-3,
+ 0x1.7bd5c7df3fe9cp-3,
+ 0x1.73eba3b5b07b7p-3,
+ 0x1.6c205655be71fp-3,
+ 0x1.6473b5b15a7a1p-3,
+ 0x1.5ce595c455b0ap-3,
+ 0x1.5575c8a468361p-3,
+ 0x1.4e241e912c305p-3,
+ 0x1.46f066040a832p-3,
+ 0x1.3fda6bc016994p-3,
+ 0x1.38e1fae1d6a9dp-3,
+ 0x1.3206dceef5f87p-3,
+ 0x1.2b48d9e5dea1cp-3,
+ 0x1.24a7b84d38971p-3,
+ 0x1.1e233d434b813p-3,
+ 0x1.17bb2c8d41535p-3,
+ 0x1.116f48a6476ccp-3,
+ 0x1.0b3f52ce8c383p-3,
+ 0x1.052b0b1a174eap-3,
+ 0x1.fe6460fef4680p-4,
+ 0x1.f2a901ccafb37p-4,
+ 0x1.e723726b824a9p-4,
+ 0x1.dbd32ac4c99b0p-4,
+ 0x1.d0b7a0f921e7cp-4,
+ 0x1.c5d0497c09e74p-4,
+ 0x1.bb1c972f23e50p-4,
+ 0x1.b09bfb7d11a83p-4,
+ 0x1.a64de673e8837p-4,
+ 0x1.9c31c6df3b1b8p-4,
+ 0x1.92470a61b6965p-4,
+ 0x1.888d1d8e510a3p-4,
+ 0x1.7f036c0107294p-4,
+ 0x1.75a96077274bap-4,
+ 0x1.6c7e64e7281cbp-4,
+ 0x1.6381e2980956bp-4,
+ 0x1.5ab342383d177p-4,
+ 0x1.5211ebf41880bp-4,
+ 0x1.499d478bca735p-4,
+ 0x1.4154bc68d75c3p-4,
+ 0x1.3937b1b319259p-4,
+ 0x1.31458e6542847p-4,
+ 0x1.297db960e4f63p-4,
+ 0x1.21df9981f8e53p-4,
+ 0x1.1a6a95b1e786fp-4,
+ 0x1.131e14fa1625dp-4,
+ 0x1.0bf97e95f2a64p-4,
+ 0x1.04fc3a0481321p-4,
+ 0x1.fc4b5e32d6259p-5,
+ 0x1.eeea8c1b1db93p-5,
+ 0x1.e1d4cf1e2450ap-5,
+ 0x1.d508f9a1ea64ep-5,
+ 0x1.c885df3451a07p-5,
+ 0x1.bc4a54a84e834p-5,
+ 0x1.b055303221015p-5,
+ 0x1.a4a549829587ep-5,
+ 0x1.993979e14fffdp-5,
+ 0x1.8e109c4622913p-5,
+ 0x1.83298d717210ep-5,
+ 0x1.78832c03aa2b1p-5,
+ 0x1.6e1c5893c380bp-5,
+ 0x1.63f3f5c4de13bp-5,
+ 0x1.5a08e85af27e0p-5,
+ 0x1.505a174e9c929p-5,
+ 0x1.46e66be002240p-5,
+ 0x1.3dacd1a8d8ccdp-5,
+ 0x1.34ac36ad8dafep-5,
+ 0x1.2be38b6d92415p-5,
+ 0x1.2351c2f2d1449p-5,
+ 0x1.1af5d2e04f3f6p-5,
+ 0x1.12ceb37ff9bc3p-5,
+ 0x1.0adb5fcfa8c75p-5,
+ 0x1.031ad58d56279p-5,
+ 0x1.f7182a851bca2p-6,
+ 0x1.e85c449e377f2p-6,
+ 0x1.da0005e5f28dfp-6,
+ 0x1.cc0180af00a8bp-6,
+ 0x1.be5ecd2fcb5f9p-6,
+ 0x1.b1160991ff737p-6,
+ 0x1.a4255a00b9f03p-6,
+ 0x1.978ae8b55ce1bp-6,
+ 0x1.8b44e6031383ep-6,
+ 0x1.7f5188610ddc8p-6,
+ 0x1.73af0c737bb45p-6,
+ 0x1.685bb5134ef13p-6,
+ 0x1.5d55cb54cd53ap-6,
+ 0x1.529b9e8cf9a1ep-6,
+ 0x1.482b8455dc491p-6,
+ 0x1.3e03d891b37dep-6,
+ 0x1.3422fd6d12e2bp-6,
+ 0x1.2a875b5ffab56p-6,
+ 0x1.212f612dee7fbp-6,
+ 0x1.181983e5133ddp-6,
+ 0x1.0f443edc5ce49p-6,
+ 0x1.06ae13b0d3255p-6,
+ 0x1.fcab1483ea7fcp-7,
+ 0x1.ec72615a894c4p-7,
+ 0x1.dcaf3691fc448p-7,
+ 0x1.cd5ec93c12431p-7,
+ 0x1.be7e5ac24963bp-7,
+ 0x1.b00b38d6b3575p-7,
+ 0x1.a202bd6372dcep-7,
+ 0x1.94624e78e0fafp-7,
+ 0x1.87275e3a6869dp-7,
+ 0x1.7a4f6aca256cbp-7,
+ 0x1.6dd7fe3358230p-7,
+ 0x1.61beae53b72b7p-7,
+ 0x1.56011cc3b036dp-7,
+ 0x1.4a9cf6bda3f4cp-7,
+ 0x1.3f8ff5042a88ep-7,
+ 0x1.34d7dbc76d7e5p-7,
+ 0x1.2a727a89a3f14p-7,
+ 0x1.205dac02bd6b9p-7,
+ 0x1.1697560347b25p-7,
+ 0x1.0d1d69569b82dp-7,
+ 0x1.03ede1a45bfeep-7,
+ 0x1.f60d8aa2a88f2p-8,
+ 0x1.e4cc4abf7d065p-8,
+ 0x1.d4143a9dfe965p-8,
+ 0x1.c3e1a5f5c077cp-8,
+ 0x1.b430ecf4a83a8p-8,
+ 0x1.a4fe83fb9db25p-8,
+ 0x1.9646f35a76623p-8,
+ 0x1.8806d70b2fc36p-8,
+ 0x1.7a3ade6c8b3e4p-8,
+ 0x1.6cdfcbfc1e263p-8,
+ 0x1.5ff2750fe7820p-8,
+ 0x1.536fc18f7ce5cp-8,
+ 0x1.4754abacdf1dcp-8,
+ 0x1.3b9e3f9d06e3fp-8,
+ 0x1.30499b503957fp-8,
+ 0x1.2553ee2a336bfp-8,
+ 0x1.1aba78ba3af89p-8,
+ 0x1.107a8c7323a6ep-8,
+ 0x1.06918b6355624p-8,
+ 0x1.f9f9cfd9c3035p-9,
+ 0x1.e77448fb66bb9p-9,
+ 0x1.d58da68fd1170p-9,
+ 0x1.c4412bf4b8f0bp-9,
+ 0x1.b38a3af2e55b4p-9,
+ 0x1.a3645330550ffp-9,
+ 0x1.93cb11a30d765p-9,
+ 0x1.84ba3004a50d0p-9,
+ 0x1.762d84469c18fp-9,
+ 0x1.6821000795a03p-9,
+ 0x1.5a90b00981d93p-9,
+ 0x1.4d78bba8ca5fdp-9,
+ 0x1.40d564548fad7p-9,
+ 0x1.34a305080681fp-9,
+ 0x1.28de11c5031ebp-9,
+ 0x1.1d83170fbf6fbp-9,
+ 0x1.128eb96be8798p-9,
+ 0x1.07fdb4dafea5fp-9,
+ 0x1.fb99b8b8279e1p-10,
+ 0x1.e7f232d9e2630p-10,
+ 0x1.d4fed7195d7e8p-10,
+ 0x1.c2b9cf7f893bfp-10,
+ 0x1.b11d702b3deb1p-10,
+ 0x1.a024365f771bdp-10,
+ 0x1.8fc8c794b03b5p-10,
+ 0x1.8005f08d6f1efp-10,
+ 0x1.70d6a46e07ddap-10,
+ 0x1.6235fbd7a4345p-10,
+ 0x1.541f340697987p-10,
+ 0x1.468dadf4080abp-10,
+ 0x1.397ced7af2b15p-10,
+ 0x1.2ce898809244ep-10,
+ 0x1.20cc76202c5fap-10,
+ 0x1.15246dda49d47p-10,
+ 0x1.09ec86c75d497p-10,
+ 0x1.fe41cd9bb4eeep-11,
+ 0x1.e97ba3b77f306p-11,
+ 0x1.d57f524723822p-11,
+ 0x1.c245d4b998479p-11,
+ 0x1.afc85e0f82e12p-11,
+ 0x1.9e005769dbc1dp-11,
+ 0x1.8ce75e9f6f8a0p-11,
+ 0x1.7c7744d9378f7p-11,
+ 0x1.6caa0d3582fe9p-11,
+ 0x1.5d79eb71e893bp-11,
+ 0x1.4ee1429bf7cc0p-11,
+ 0x1.40daa3c89f5b6p-11,
+ 0x1.3360ccd23db3ap-11,
+ 0x1.266ea71d4f71ap-11,
+ 0x1.19ff4663ae9dfp-11,
+ 0x1.0e0de78654d1ep-11,
+ 0x1.0295ef6591848p-11,
+ 0x1.ef25d37f49fe1p-12,
+ 0x1.da01102b5f851p-12,
+ 0x1.c5b5412dcafadp-12,
+ 0x1.b23a5a23e4210p-12,
+ 0x1.9f8893d8fd1c1p-12,
+ 0x1.8d986a4187285p-12,
+ 0x1.7c629a822bc9ep-12,
+ 0x1.6be02102b3520p-12,
+ 0x1.5c0a378c90bcap-12,
+ 0x1.4cda5374ea275p-12,
+ 0x1.3e4a23d1f4702p-12,
+ 0x1.30538fbb77ecdp-12,
+ 0x1.22f0b496539bdp-12,
+ 0x1.161be46ad3b50p-12,
+ 0x1.09cfa445b00ffp-12,
+ 0x1.fc0d55470cf51p-13,
+ 0x1.e577bbcd49935p-13,
+ 0x1.cfd4a5adec5bfp-13,
+ 0x1.bb1a9657ce465p-13,
+ 0x1.a740684026555p-13,
+ 0x1.943d4a1d1ed39p-13,
+ 0x1.8208bc334a6a5p-13,
+ 0x1.709a8db59f25cp-13,
+ 0x1.5feada379d8b7p-13,
+ 0x1.4ff207314a102p-13,
+ 0x1.40a8c1949f75ep-13,
+ 0x1.3207fb7420eb9p-13,
+ 0x1.2408e9ba3327fp-13,
+ 0x1.16a501f0e42cap-13,
+ 0x1.09d5f819c9e29p-13,
+ 0x1.fb2b792b40a22p-14,
+ 0x1.e3bcf436a1a95p-14,
+ 0x1.cd55277c18d05p-14,
+ 0x1.b7e94604479dcp-14,
+ 0x1.a36eec00926ddp-14,
+ 0x1.8fdc1b2dcf7b9p-14,
+ 0x1.7d2737527c3f9p-14,
+ 0x1.6b4702d7d5849p-14,
+ 0x1.5a329b7d30748p-14,
+ 0x1.49e17724f4d41p-14,
+ 0x1.3a4b60ba9aa4dp-14,
+ 0x1.2b6875310f785p-14,
+ 0x1.1d312098e9dbap-14,
+ 0x1.0f9e1b4dd36dfp-14,
+ 0x1.02a8673a94691p-14,
+ 0x1.ec929a665b449p-15,
+ 0x1.d4f4b4c8e09edp-15,
+ 0x1.be6abbb10a5aap-15,
+ 0x1.a8e8cc1fadef6p-15,
+ 0x1.94637d5bacfdbp-15,
+ 0x1.80cfdc72220cfp-15,
+ 0x1.6e2367dc27f95p-15,
+ 0x1.5c540b4936fd2p-15,
+ 0x1.4b581b8d170fcp-15,
+ 0x1.3b2652b06c2b2p-15,
+ 0x1.2bb5cc22e5db6p-15,
+ 0x1.1cfe010e2052dp-15,
+ 0x1.0ef6c4c84a0fep-15,
+ 0x1.01984165a5f36p-15,
+ 0x1.e9b5e8d00ce76p-16,
+ 0x1.d16f5716c6c1ap-16,
+ 0x1.ba4f035d60e02p-16,
+ 0x1.a447b7b03f045p-16,
+ 0x1.8f4ccca7fc90dp-16,
+ 0x1.7b5223dac7336p-16,
+ 0x1.684c227fcacefp-16,
+ 0x1.562fac4329b48p-16,
+ 0x1.44f21e49054f2p-16,
+ 0x1.34894a5e24657p-16,
+ 0x1.24eb7254ccf83p-16,
+ 0x1.160f438c70913p-16,
+ 0x1.07ebd2a2d2844p-16,
+ 0x1.f4f12e9ab070ap-17,
+ 0x1.db5ad0b27805cp-17,
+ 0x1.c304efa2c6f4ep-17,
+ 0x1.abe09e9144b5ep-17,
+ 0x1.95df988e76644p-17,
+ 0x1.80f439b4ee04bp-17,
+ 0x1.6d11788a69c64p-17,
+ 0x1.5a2adfa0b4bc4p-17,
+ 0x1.4834877429b8fp-17,
+ 0x1.37231085c7d9ap-17,
+ 0x1.26eb9daed6f7ep-17,
+ 0x1.1783ceac28910p-17,
+ 0x1.08e1badf0fcedp-17,
+ 0x1.f5f7d88472604p-18,
+ 0x1.db92b5212fb8dp-18,
+ 0x1.c282cd3957edap-18,
+ 0x1.aab7abace48dcp-18,
+ 0x1.94219bfcb4928p-18,
+ 0x1.7eb1a2075864dp-18,
+ 0x1.6a597219a93d9p-18,
+ 0x1.570b69502f313p-18,
+ 0x1.44ba864670882p-18,
+ 0x1.335a62115bce2p-18,
+ 0x1.22df298214423p-18,
+ 0x1.133d96ae7e0ddp-18,
+ 0x1.046aeabcfcdecp-18,
+ 0x1.ecb9cfe1d8642p-19,
+ 0x1.d21397ead99cbp-19,
+ 0x1.b8d094c86d374p-19,
+ 0x1.a0df0f0c626dcp-19,
+ 0x1.8a2e269750a39p-19,
+ 0x1.74adc8f4064d3p-19,
+ 0x1.604ea819f007cp-19,
+ 0x1.4d0231928c6f9p-19,
+ 0x1.3aba85fe22e1fp-19,
+ 0x1.296a70f414053p-19,
+ 0x1.1905613b3abf2p-19,
+ 0x1.097f6156f32c5p-19,
+ 0x1.f59a20caf6695p-20,
+ 0x1.d9c73698fb1dcp-20,
+ 0x1.bf716c6168baep-20,
+ 0x1.a6852c6b58392p-20,
+ 0x1.8eefd70594a88p-20,
+ 0x1.789fb715aae95p-20,
+ 0x1.6383f726a8e04p-20,
+ 0x1.4f8c96f26a26ap-20,
+ 0x1.3caa61607f920p-20,
+ 0x1.2acee2f5ecdb8p-20,
+ 0x1.19ec60b1242edp-20,
+ 0x1.09f5cf4dd2877p-20,
+ 0x1.f5bd95d8730d8p-21,
+ 0x1.d9371e2ff7c35p-21,
+ 0x1.be41de54d155ap-21,
+ 0x1.a4c89e08ef4f3p-21,
+ 0x1.8cb738399b12cp-21,
+ 0x1.75fa8dbc84becp-21,
+ 0x1.608078a70dcbcp-21,
+ 0x1.4c37c0394d094p-21,
+ 0x1.39100d5687bfep-21,
+ 0x1.26f9df8519bd6p-21,
+ 0x1.15e6827001f18p-21,
+ 0x1.05c803e4831c1p-21,
+ 0x1.ed22548cffd35p-22,
+ 0x1.d06ad6ecdf971p-22,
+ 0x1.b551c847fbc96p-22,
+ 0x1.9bc09f112b494p-22,
+ 0x1.83a1ff0aa239dp-22,
+ 0x1.6ce1aa3fd7bddp-22,
+ 0x1.576c72b514859p-22,
+ 0x1.43302cc4a0da8p-22,
+ 0x1.301ba221dc9bbp-22,
+ 0x1.1e1e857adc568p-22,
+ 0x1.0d2966b1746f7p-22,
+ 0x1.fa5b4f49cc6b2p-23,
+ 0x1.dc3ae30b55c16p-23,
+ 0x1.bfd7555a3bd68p-23,
+ 0x1.a517d9e61628ap-23,
+ 0x1.8be4f8f6c951fp-23,
+ 0x1.74287ded49339p-23,
+ 0x1.5dcd669f2cd34p-23,
+ 0x1.48bfd38302870p-23,
+ 0x1.34ecf8a3c124ap-23,
+ 0x1.22430f521cbcfp-23,
+ 0x1.10b1488aeb235p-23,
+ 0x1.0027c00a263a6p-23,
+ 0x1.e12ee004efc37p-24,
+ 0x1.c3e44ae32b16bp-24,
+ 0x1.a854ea14102a8p-24,
+ 0x1.8e6761569f45dp-24,
+ 0x1.7603bac345f65p-24,
+ 0x1.5f1353cdad001p-24,
+ 0x1.4980cb3c80949p-24,
+ 0x1.3537f00b6ad4dp-24,
+ 0x1.2225b12bffc68p-24,
+ 0x1.10380e1adb7e9p-24,
+ 0x1.febc107d5efaap-25,
+ 0x1.df0f2a0ee6946p-25,
+ 0x1.c14b2188bcee4p-25,
+ 0x1.a553644f7f07dp-25,
+ 0x1.8b0cfce0579dfp-25,
+ 0x1.725e7c5dd20f7p-25,
+ 0x1.5b2fe547a1340p-25,
+ 0x1.456a974e92e93p-25,
+ 0x1.30f93c3699078p-25,
+ 0x1.1dc7b5b978cf8p-25,
+ 0x1.0bc30c5d52f15p-25,
+ 0x1.f5b2be65a0c7fp-26,
+ 0x1.d5f3a8dea7357p-26,
+ 0x1.b82915b03515bp-26,
+ 0x1.9c3517e789488p-26,
+ 0x1.81fb7df06136ep-26,
+ 0x1.6961b8d641d06p-26,
+ 0x1.524ec4d916caep-26,
+ 0x1.3cab1343d18d1p-26,
+ 0x1.2860757487a01p-26,
+ 0x1.155a09065d4f7p-26,
+ 0x1.0384250e4c9fcp-26,
+ 0x1.e59890b926c78p-27,
+ 0x1.c642116a8a9e3p-27,
+ 0x1.a8e405e651ab6p-27,
+ 0x1.8d5f98114f872p-27,
+ 0x1.7397c5a66e307p-27,
+ 0x1.5b71456c5a4c4p-27,
+ 0x1.44d26de513197p-27,
+ 0x1.2fa31d6371537p-27,
+ 0x1.1bcca373b7b43p-27,
+ 0x1.0939ab853339fp-27,
+ 0x1.efac5187b2863p-28,
+ 0x1.cf1e86235d0e6p-28,
+ 0x1.b0a68a2128babp-28,
+ 0x1.9423165bc4444p-28,
+ 0x1.7974e743dea3cp-28,
+ 0x1.607e9eacd1050p-28,
+ 0x1.4924a74dec728p-28,
+ 0x1.334d19e0c2160p-28,
+ 0x1.1edfa3c5f5ccap-28,
+ 0x1.0bc56f1b54701p-28,
+ 0x1.f3d2185e047d9p-29,
+ 0x1.d26cb87945e87p-29,
+ 0x1.b334fac4b9f99p-29,
+ 0x1.96076f7918d1cp-29,
+ 0x1.7ac2d72fc2c63p-29,
+ 0x1.614801550319ep-29,
+ 0x1.4979ac8b28926p-29,
+ 0x1.333c68e2d0548p-29,
+ 0x1.1e767bce37dd7p-29,
+ 0x1.0b0fc5b6d05a0p-29,
+ 0x1.f1e3523b41d7dp-30,
+ 0x1.d00de6608effep-30,
+ 0x1.b0778b7b3301ap-30,
+ 0x1.92fb04ec0f6cfp-30,
+ 0x1.77756ec9f78fap-30,
+ 0x1.5dc61922d5a06p-30,
+ 0x1.45ce65699ff6dp-30,
+ 0x1.2f71a5f159970p-30,
+ 0x1.1a94ff571654fp-30,
+ 0x1.071f4bbea09ecp-30,
+ 0x1.e9f1ff8ddd774p-31,
+ 0x1.c818223a202c7p-31,
+ 0x1.a887bd2b4404dp-31,
+ 0x1.8b1a336c5eb6bp-31,
+ 0x1.6fab63324088ap-31,
+ 0x1.56197e30205bap-31,
+ 0x1.3e44e45301b92p-31,
+ 0x1.281000bfe4c3fp-31,
+ 0x1.135f28f2d50b4p-31,
+ 0x1.00187dded5975p-31,
+ 0x1.dc479de0ef001p-32,
+ 0x1.bad4fdad3caa1p-32,
+ 0x1.9baed3ed27ab8p-32,
+ 0x1.7ead9ce4285bbp-32,
+ 0x1.63ac6b4edc88ep-32,
+ 0x1.4a88be2a6390cp-32,
+ 0x1.332259185f1a0p-32,
+ 0x1.1d5b1f3793044p-32,
+ 0x1.0916f04b6e18bp-32,
+ 0x1.ec77101de6926p-33,
+ 0x1.c960bf23153e0p-33,
+ 0x1.a8bd20fc65ef7p-33,
+ 0x1.8a61745ec7d1dp-33,
+ 0x1.6e25d0e756261p-33,
+ 0x1.53e4f7d1666cbp-33,
+ 0x1.3b7c27a7ddb0ep-33,
+ 0x1.24caf2c32af14p-33,
+ 0x1.0fb3186804d0fp-33,
+ 0x1.f830c0bb41fd7p-34,
+ 0x1.d3c0f1a91c846p-34,
+ 0x1.b1e5acf351d87p-34,
+ 0x1.92712d259ce66p-34,
+ 0x1.7538c60a04476p-34,
+ 0x1.5a14b04b47879p-34,
+ 0x1.40dfd87456f4cp-34,
+ 0x1.2977b1172b9d5p-34,
+ 0x1.13bc07e891491p-34,
+ 0x1.ff1dbb4300811p-35,
+ 0x1.d9a880f306bd8p-35,
+ 0x1.b6e45220b55e0p-35,
+ 0x1.96a0b33f2c4dap-35,
+ 0x1.78b07e9e924acp-35,
+ 0x1.5ce9ab1670dd2p-35,
+ 0x1.4325167006bb0p-35,
+ 0x1.2b3e53538ff3fp-35,
+ 0x1.15137a7f44864p-35,
+ 0x1.0084ff125639dp-35,
+ 0x1.daeb0b7311ec7p-36,
+ 0x1.b7937d1c40c52p-36,
+ 0x1.96d082f59ab06p-36,
+ 0x1.7872d9fa10aadp-36,
+ 0x1.5c4e8e37bc7d0p-36,
+ 0x1.423ac0df49a40p-36,
+ 0x1.2a117230ad284p-36,
+ 0x1.13af4f04f9998p-36,
+ 0x1.fde703724e560p-37,
+ 0x1.d77f0c82e7641p-37,
+ 0x1.b3ee02611d7ddp-37,
+ 0x1.92ff33023d5bdp-37,
+ 0x1.7481a9e69f53fp-37,
+ 0x1.5847eda620959p-37,
+ 0x1.3e27c1fcc74bdp-37,
+ 0x1.25f9ee0b923dcp-37,
+ 0x1.0f9a0686531ffp-37,
+ 0x1.f5cc7718082afp-38,
+ 0x1.cf7e53d6a2ca5p-38,
+ 0x1.ac0f5f3229372p-38,
+ 0x1.8b498644847eap-38,
+ 0x1.6cfa9bcca59dcp-38,
+ 0x1.50f411d4fd2cdp-38,
+ 0x1.370ab8327af5ep-38,
+ 0x1.1f167f88c6b6ep-38,
+ 0x1.08f24085d4597p-38,
+ 0x1.e8f70e181d619p-39,
+ 0x1.c324c20e337dcp-39,
+ 0x1.a03261574b54ep-39,
+ 0x1.7fe903cdf5855p-39,
+ 0x1.6215c58da3450p-39,
+ 0x1.46897d4b69fc6p-39,
+ 0x1.2d1877d731b7bp-39,
+ 0x1.159a386b11517p-39,
+ 0x1.ffd27ae9393cep-40,
+ 0x1.d7c593130dd0bp-40,
+ 0x1.b2cd607c79bcfp-40,
+ 0x1.90ae4d3405651p-40,
+ 0x1.71312dd1759e2p-40,
+ 0x1.5422ef5d8949dp-40,
+ 0x1.39544b0ecc957p-40,
+ 0x1.20997f73e73ddp-40,
+ 0x1.09ca0eaacd277p-40,
+ 0x1.e9810295890ecp-41,
+ 0x1.c2b45b5aa4a1dp-41,
+ 0x1.9eee068fa7596p-41,
+ 0x1.7df2b399c10a8p-41,
+ 0x1.5f8b87a31bd85p-41,
+ 0x1.4385c96e9a2d9p-41,
+ 0x1.29b2933ef4cbcp-41,
+ 0x1.11e68a6378f8ap-41,
+ 0x1.f7f338086a86bp-42,
+ 0x1.cf8d7d9ce040ap-42,
+ 0x1.aa577251ae484p-42,
+ 0x1.8811d739efb5ep-42,
+ 0x1.68823e52970bep-42,
+ 0x1.4b72ae68e8b4cp-42,
+ 0x1.30b14dbe876bcp-42,
+ 0x1.181012ef86610p-42,
+ 0x1.01647ba798744p-42,
+ 0x1.d90e917701675p-43,
+ 0x1.b2a87e86d0c8ap-43,
+ 0x1.8f53dcb377293p-43,
+ 0x1.6ed2f2515e933p-43,
+ 0x1.50ecc9ed47f19p-43,
+ 0x1.356cd5ce7799ep-43,
+ 0x1.1c229a587ab78p-43,
+ 0x1.04e15ecc7f3f6p-43,
+ 0x1.deffc7e6a6017p-44,
+ 0x1.b7b040832f310p-44,
+ 0x1.938e021f36d76p-44,
+ 0x1.7258610b3b233p-44,
+ 0x1.53d3bfc82a909p-44,
+ 0x1.37c92babdc2fdp-44,
+ 0x1.1e06010120f6ap-44,
+ 0x1.065b9616170d4p-44,
+ 0x1.e13dd96b3753ap-45,
+ 0x1.b950d32467392p-45,
+ 0x1.94a72263259a5p-45,
+ 0x1.72fd93e036cdcp-45,
+ 0x1.54164576929abp-45,
+ 0x1.37b83c521fe96p-45,
+ 0x1.1daf033182e96p-45,
+ 0x1.05ca50205d26ap-45,
+ 0x1.dfbb6235639fap-46,
+ 0x1.b7807e294781fp-46,
+ 0x1.9298add70a734p-46,
+ 0x1.70beaf9c7ffb6p-46,
+ 0x1.51b2cd6709222p-46,
+ 0x1.353a6cf7f7fffp-46,
+ 0x1.1b1fa8cbe84a7p-46,
+ 0x1.0330f0fd69921p-46,
+ 0x1.da81670f96f9bp-47,
+ 0x1.b24a16b4d09aap-47,
+ 0x1.8d6eeb6efdbd6p-47,
+ 0x1.6ba91ac734785p-47,
+ 0x1.4cb7966770ab5p-47,
+ 0x1.305e9721d0981p-47,
+ 0x1.1667311fff70ap-47,
+ 0x1.fd3de10d62855p-48,
+ 0x1.d1aefbcd48d0cp-48,
+ 0x1.a9cc93c25aca9p-48,
+ 0x1.85487ee3ea735p-48,
+ 0x1.63daf8b4b1e0cp-48,
+ 0x1.45421e69a6ca1p-48,
+ 0x1.294175802d99ap-48,
+ 0x1.0fa17bf41068fp-48,
+ 0x1.f05e82aae2bb9p-49,
+ 0x1.c578101b29058p-49,
+ 0x1.9e39dc5dd2f7cp-49,
+ 0x1.7a553a728bbf2p-49,
+ 0x1.5982008db1304p-49,
+ 0x1.3b7e00422e51bp-49,
+ 0x1.200c898d9ee3ep-49,
+ 0x1.06f5f7eb65a56p-49,
+ 0x1.e00e9148a1d25p-50,
+ 0x1.b623734024e92p-50,
+ 0x1.8fd4e01891bf8p-50,
+ 0x1.6cd44c7470d89p-50,
+ 0x1.4cd9c04158cd7p-50,
+ 0x1.2fa34bf5c8344p-50,
+ 0x1.14f4890ff2461p-50,
+ 0x1.f92c49dfa4df5p-51,
+ 0x1.ccaaea71ab0dfp-51,
+ 0x1.a40829f001197p-51,
+ 0x1.7eef13b59e96cp-51,
+ 0x1.5d11e1a252bf5p-51,
+ 0x1.3e296303b2297p-51,
+ 0x1.21f47009f43cep-51,
+ 0x1.083768c5e4541p-51,
+ 0x1.e1777d831265ep-52,
+ 0x1.b69f10b0191b5p-52,
+ 0x1.8f8a3a05b5b52p-52,
+ 0x1.6be573c40c8e7p-52,
+ 0x1.4b645ba991fdbp-52,
+ 0x1.2dc119095729fp-52,
+ },
+};
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erfc_1u8.c b/contrib/arm-optimized-routines/pl/math/sv_erfc_1u8.c
new file mode 100644
index 000000000000..a91bef96f2e7
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_erfc_1u8.c
@@ -0,0 +1,164 @@
+/*
+ * Double-precision vector erfc(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ uint64_t off_idx, off_arr;
+ double max, shift;
+ double p20, p40, p41, p42;
+ double p51, p52;
+ double q5, r5;
+ double q6, r6;
+ double q7, r7;
+ double q8, r8;
+ double q9, r9;
+ uint64_t table_scale;
+} data = {
+ /* Set an offset so the range of the index used for lookup is 3487, and it
+ can be clamped using a saturated add on an offset index.
+ Index offset is 0xffffffffffffffff - asuint64(shift) - 3487. */
+ .off_idx = 0xbd3ffffffffff260,
+ .off_arr = 0xfffffffffffff260, /* 0xffffffffffffffff - 3487. */
+ .max = 0x1.b3ep+4, /* 3487/128. */
+ .shift = 0x1p45,
+ .table_scale = 0x37f0000000000000, /* asuint64(0x1p-128). */
+ .p20 = 0x1.5555555555555p-2, /* 1/3, used to compute 2/3 and 1/6. */
+ .p40 = -0x1.999999999999ap-4, /* 1/10. */
+ .p41 = -0x1.999999999999ap-2, /* 2/5. */
+ .p42 = 0x1.1111111111111p-3, /* 2/15. */
+ .p51 = -0x1.c71c71c71c71cp-3, /* 2/9. */
+ .p52 = 0x1.6c16c16c16c17p-5, /* 2/45. */
+ /* Qi = (i+1) / i, for i = 5, ..., 9. */
+ .q5 = 0x1.3333333333333p0,
+ .q6 = 0x1.2aaaaaaaaaaabp0,
+ .q7 = 0x1.2492492492492p0,
+ .q8 = 0x1.2p0,
+ .q9 = 0x1.1c71c71c71c72p0,
+ /* Ri = -2 * i / ((i+1)*(i+2)), for i = 5, ..., 9. */
+ .r5 = -0x1.e79e79e79e79ep-3,
+ .r6 = -0x1.b6db6db6db6dbp-3,
+ .r7 = -0x1.8e38e38e38e39p-3,
+ .r8 = -0x1.6c16c16c16c17p-3,
+ .r9 = -0x1.4f2094f2094f2p-3,
+};
+
+/* Optimized double-precision vector erfc(x).
+ Approximation based on series expansion near x rounded to
+ nearest multiple of 1/128.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
+
+ poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
+ + (2/15 r^4 - 2/5 r^2 + 1/10) d^4
+ - r * (2/45 r^4 - 2/9 r^2 + 1/6) d^5
+ + p6(r) d^6 + ... + p10(r) d^10
+
+ Polynomials p6(r) to p10(r) are computed using recurrence relation
+
+ 2(i+1)p_i + 2r(i+2)p_{i+1} + (i+2)(i+3)p_{i+2} = 0,
+ with p0 = 1, and p1(r) = -r.
+
+ Values of erfc(r) and scale are read from lookup tables. Stored values
+ are scaled to avoid hitting the subnormal range.
+
+ Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
+
+ Maximum measured error: 1.71 ULP
+ _ZGVsMxv_erfc(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608
+ want 0x1.e15fcbea3e7adp-608. */
+svfloat64_t SV_NAME_D1 (erfc) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *dat = ptr_barrier (&data);
+
+ svfloat64_t a = svabs_x (pg, x);
+
+ /* Clamp input at |x| <= 3487/128. */
+ a = svmin_x (pg, a, dat->max);
+
+ /* Reduce x to the nearest multiple of 1/128. */
+ svfloat64_t shift = sv_f64 (dat->shift);
+ svfloat64_t z = svadd_x (pg, a, shift);
+
+ /* Saturate index for the NaN case. */
+ svuint64_t i = svqadd (svreinterpret_u64 (z), dat->off_idx);
+
+ /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */
+ i = svadd_x (pg, i, i);
+ const float64_t *p = &__erfc_data.tab[0].erfc - 2 * dat->off_arr;
+ svfloat64_t erfcr = svld1_gather_index (pg, p, i);
+ svfloat64_t scale = svld1_gather_index (pg, p + 1, i);
+
+ /* erfc(x) ~ erfc(r) - scale * d * poly(r, d). */
+ svfloat64_t r = svsub_x (pg, z, shift);
+ svfloat64_t d = svsub_x (pg, a, r);
+ svfloat64_t d2 = svmul_x (pg, d, d);
+ svfloat64_t r2 = svmul_x (pg, r, r);
+
+ /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p9(r) * d^9. */
+ svfloat64_t p1 = r;
+ svfloat64_t third = sv_f64 (dat->p20);
+ svfloat64_t twothird = svmul_x (pg, third, 2.0);
+ svfloat64_t sixth = svmul_x (pg, third, 0.5);
+ svfloat64_t p2 = svmls_x (pg, third, r2, twothird);
+ svfloat64_t p3 = svmad_x (pg, r2, third, -0.5);
+ p3 = svmul_x (pg, r, p3);
+ svfloat64_t p4 = svmla_x (pg, sv_f64 (dat->p41), r2, dat->p42);
+ p4 = svmls_x (pg, sv_f64 (dat->p40), r2, p4);
+ svfloat64_t p5 = svmla_x (pg, sv_f64 (dat->p51), r2, dat->p52);
+ p5 = svmla_x (pg, sixth, r2, p5);
+ p5 = svmul_x (pg, r, p5);
+ /* Compute p_i using recurrence relation:
+ p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */
+ svfloat64_t qr5 = svld1rq (svptrue_b64 (), &dat->q5);
+ svfloat64_t qr6 = svld1rq (svptrue_b64 (), &dat->q6);
+ svfloat64_t qr7 = svld1rq (svptrue_b64 (), &dat->q7);
+ svfloat64_t qr8 = svld1rq (svptrue_b64 (), &dat->q8);
+ svfloat64_t qr9 = svld1rq (svptrue_b64 (), &dat->q9);
+ svfloat64_t p6 = svmla_x (pg, p4, p5, svmul_lane (r, qr5, 0));
+ p6 = svmul_lane (p6, qr5, 1);
+ svfloat64_t p7 = svmla_x (pg, p5, p6, svmul_lane (r, qr6, 0));
+ p7 = svmul_lane (p7, qr6, 1);
+ svfloat64_t p8 = svmla_x (pg, p6, p7, svmul_lane (r, qr7, 0));
+ p8 = svmul_lane (p8, qr7, 1);
+ svfloat64_t p9 = svmla_x (pg, p7, p8, svmul_lane (r, qr8, 0));
+ p9 = svmul_lane (p9, qr8, 1);
+ svfloat64_t p10 = svmla_x (pg, p8, p9, svmul_lane (r, qr9, 0));
+ p10 = svmul_lane (p10, qr9, 1);
+ /* Compute polynomial in d using pairwise Horner scheme. */
+ svfloat64_t p90 = svmla_x (pg, p9, d, p10);
+ svfloat64_t p78 = svmla_x (pg, p7, d, p8);
+ svfloat64_t p56 = svmla_x (pg, p5, d, p6);
+ svfloat64_t p34 = svmla_x (pg, p3, d, p4);
+ svfloat64_t p12 = svmla_x (pg, p1, d, p2);
+ svfloat64_t y = svmla_x (pg, p78, d2, p90);
+ y = svmla_x (pg, p56, d2, y);
+ y = svmla_x (pg, p34, d2, y);
+ y = svmla_x (pg, p12, d2, y);
+
+ y = svmls_x (pg, erfcr, scale, svmls_x (pg, d, d2, y));
+
+ /* Offset equals 2.0 if sign, else 0.0. */
+ svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), 0x8000000000000000);
+ svfloat64_t off = svreinterpret_f64 (svlsr_x (pg, sign, 1));
+ /* Handle sign and scale back in a single fma. */
+ svfloat64_t fac = svreinterpret_f64 (svorr_x (pg, sign, dat->table_scale));
+
+ return svmla_x (pg, off, fac, y);
+}
+
+PL_SIG (SV, D, 1, erfc, -6.0, 28.0)
+PL_TEST_ULP (SV_NAME_D1 (erfc), 1.21)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (erfc), 0.0, 0x1p-26, 40000)
+PL_TEST_INTERVAL (SV_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_D1 (erfc), 28.0, inf, 40000)
+PL_TEST_INTERVAL (SV_NAME_D1 (erfc), 6.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erfcf_1u7.c b/contrib/arm-optimized-routines/pl/math/sv_erfcf_1u7.c
new file mode 100644
index 000000000000..cda8f0b3752e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_erfcf_1u7.c
@@ -0,0 +1,111 @@
+/*
+ * Single-precision vector erfc(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ uint32_t off_idx, off_arr;
+ float max, shift;
+ float third, two_thirds, two_over_fifteen, two_over_five, tenth;
+} data = {
+ /* Set an offset so the range of the index used for lookup is 644, and it can
+ be clamped using a saturated add. */
+ .off_idx = 0xb7fffd7b, /* 0xffffffff - asuint(shift) - 644. */
+ .off_arr = 0xfffffd7b, /* 0xffffffff - 644. */
+ .max = 10.0625f, /* 644/64. */
+ .shift = 0x1p17f,
+ .third = 0x1.555556p-2f,
+ .two_thirds = 0x1.555556p-1f,
+ .two_over_fifteen = 0x1.111112p-3f,
+ .two_over_five = -0x1.99999ap-2f,
+ .tenth = -0x1.99999ap-4f,
+};
+
+#define SignMask 0x80000000
+#define TableScale 0x28000000 /* 0x1p-47. */
+
+/* Optimized single-precision vector erfcf(x).
+ Approximation based on series expansion near x rounded to
+ nearest multiple of 1/64.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
+
+ poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
+ + (2/15 r^4 - 2/5 r^2 + 1/10) d^4
+
+ Values of erfc(r) and scale are read from lookup tables. Stored values
+ are scaled to avoid hitting the subnormal range.
+
+ Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
+
+ Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
+ _ZGVsMxv_erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
+ want 0x1.f51216p-120. */
+svfloat32_t SV_NAME_F1 (erfc) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *dat = ptr_barrier (&data);
+
+ svfloat32_t a = svabs_x (pg, x);
+
+ /* Clamp input at |x| <= 10.0 + 4/64. */
+ a = svmin_x (pg, a, dat->max);
+
+ /* Reduce x to the nearest multiple of 1/64. */
+ svfloat32_t shift = sv_f32 (dat->shift);
+ svfloat32_t z = svadd_x (pg, a, shift);
+
+ /* Saturate index for the NaN case. */
+ svuint32_t i = svqadd (svreinterpret_u32 (z), dat->off_idx);
+
+ /* Lookup erfc(r) and 2/sqrt(pi)*exp(-r^2) in tables. */
+ i = svmul_x (pg, i, 2);
+ const float32_t *p = &__erfcf_data.tab[0].erfc - 2 * dat->off_arr;
+ svfloat32_t erfcr = svld1_gather_index (pg, p, i);
+ svfloat32_t scale = svld1_gather_index (pg, p + 1, i);
+
+ /* erfc(x) ~ erfc(r) - scale * d * poly(r, d). */
+ svfloat32_t r = svsub_x (pg, z, shift);
+ svfloat32_t d = svsub_x (pg, a, r);
+ svfloat32_t d2 = svmul_x (pg, d, d);
+ svfloat32_t r2 = svmul_x (pg, r, r);
+
+ svfloat32_t coeffs = svld1rq (svptrue_b32 (), &dat->third);
+ svfloat32_t third = svdup_lane (coeffs, 0);
+
+ svfloat32_t p1 = r;
+ svfloat32_t p2 = svmls_lane (third, r2, coeffs, 1);
+ svfloat32_t p3 = svmul_x (pg, r, svmla_lane (sv_f32 (-0.5), r2, coeffs, 0));
+ svfloat32_t p4 = svmla_lane (sv_f32 (dat->two_over_five), r2, coeffs, 2);
+ p4 = svmls_x (pg, sv_f32 (dat->tenth), r2, p4);
+
+ svfloat32_t y = svmla_x (pg, p3, d, p4);
+ y = svmla_x (pg, p2, d, y);
+ y = svmla_x (pg, p1, d, y);
+
+ /* Solves the |x| = inf/nan case. */
+ y = svmls_x (pg, erfcr, scale, svmls_x (pg, d, d2, y));
+
+ /* Offset equals 2.0f if sign, else 0.0f. */
+ svuint32_t sign = svand_x (pg, svreinterpret_u32 (x), SignMask);
+ svfloat32_t off = svreinterpret_f32 (svlsr_x (pg, sign, 1));
+ /* Handle sign and scale back in a single fma. */
+ svfloat32_t fac = svreinterpret_f32 (svorr_x (pg, sign, TableScale));
+
+ return svmla_x (pg, off, fac, y);
+}
+
+PL_SIG (SV, F, 1, erfc, -4.0, 10.0)
+PL_TEST_ULP (SV_NAME_F1 (erfc), 1.14)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erfc), 0.0, 0x1p-26, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (erfc), 10.0625, inf, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (erfc), -4.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erff_2u.c b/contrib/arm-optimized-routines/pl/math/sv_erff_2u.c
new file mode 100644
index 000000000000..adeee798ee2e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_erff_2u.c
@@ -0,0 +1,90 @@
+/*
+ * Single-precision vector erf(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float min, max, scale, shift, third;
+} data = {
+ .min = 0x1.cp-7f, /* 1/64 - 1/512. */
+ .max = 3.9375, /* 4 - 8/128. */
+ .scale = 0x1.20dd76p+0f, /* 2/sqrt(pi). */
+ .shift = 0x1p16f,
+ .third = 0x1.555556p-2f, /* 1/3. */
+};
+
+#define SignMask (0x80000000)
+
+/* Single-precision implementation of vector erf(x).
+ Approximation based on series expansion near x rounded to
+ nearest multiple of 1/128.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erf(x) ~ erf(r) + scale * d * [1 - r * d - 1/3 * d^2]
+
+ Values of erf(r) and scale are read from lookup tables.
+ For |x| < 0x1.cp-7, the algorithm sets r = 0, erf(r) = 0, and scale = 2 /
+ sqrt(pi), so it simply boils down to a Taylor series expansion near 0. For
+ |x| > 3.9375, erf(|x|) rounds to 1.0f.
+
+ Maximum error on each interval:
+ - [0, 0x1.cp-7]: 1.93 ULP
+ _ZGVsMxv_erff(0x1.c373e6p-9) got 0x1.fd686cp-9 want 0x1.fd6868p-9
+ - [0x1.cp-7, 4.0]: 1.26 ULP
+ _ZGVsMxv_erff(0x1.1d002ep+0) got 0x1.c4eb9ap-1 want 0x1.c4eb98p-1. */
+svfloat32_t SV_NAME_F1 (erf) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *dat = ptr_barrier (&data);
+
+ /* |x| > 1/64 - 1/512. */
+ svbool_t a_gt_min = svacgt (pg, x, dat->min);
+
+ /* |x| >= 4.0 - 8/128. */
+ svbool_t a_ge_max = svacge (pg, x, dat->max);
+ svfloat32_t a = svabs_x (pg, x);
+
+ svfloat32_t shift = sv_f32 (dat->shift);
+ svfloat32_t z = svadd_x (pg, a, shift);
+ svuint32_t i
+ = svsub_x (pg, svreinterpret_u32 (z), svreinterpret_u32 (shift));
+
+ /* Saturate lookup index. */
+ i = svsel (a_ge_max, sv_u32 (512), i);
+
+ /* r and erf(r) set to 0 for |x| below min. */
+ svfloat32_t r = svsub_z (a_gt_min, z, shift);
+ svfloat32_t erfr = svld1_gather_index (a_gt_min, __sv_erff_data.erf, i);
+
+ /* scale set to 2/sqrt(pi) for |x| below min. */
+ svfloat32_t scale = svld1_gather_index (a_gt_min, __sv_erff_data.scale, i);
+ scale = svsel (a_gt_min, scale, sv_f32 (dat->scale));
+
+ /* erf(x) ~ erf(r) + scale * d * (1 - r * d + 1/3 * d^2). */
+ svfloat32_t d = svsub_x (pg, a, r);
+ svfloat32_t d2 = svmul_x (pg, d, d);
+ svfloat32_t y = svmla_x (pg, r, d, dat->third);
+ y = svmla_x (pg, erfr, scale, svmls_x (pg, d, d2, y));
+
+ /* Solves the |x| = inf case. */
+ y = svsel (a_ge_max, sv_f32 (1.0f), y);
+
+ /* Copy sign. */
+ svuint32_t ix = svreinterpret_u32 (x);
+ svuint32_t iy = svreinterpret_u32 (y);
+ svuint32_t sign = svand_x (pg, ix, SignMask);
+ return svreinterpret_f32 (svorr_x (pg, sign, iy));
+}
+
+PL_SIG (SV, F, 1, erf, -4.0, 4.0)
+PL_TEST_ULP (SV_NAME_F1 (erf), 1.43)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, 0x1.cp-7, 40000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0x1.cp-7, 3.9375, 40000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 3.9375, inf, 40000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (erf), 0, inf, 4000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_erff_data.c b/contrib/arm-optimized-routines/pl/math/sv_erff_data.c
new file mode 100644
index 000000000000..154d3c188874
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_erff_data.c
@@ -0,0 +1,1046 @@
+/*
+ * Data for approximation of vector erff.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* Lookup table used in SVE erff.
+ For each possible rounded input r (multiples of 1/128), between
+ r = 0.0 and r = 4.0 (513 values):
+ - __erff_data.erf contains the values of erf(r),
+ - __erff_data.scale contains the values of 2/sqrt(pi)*exp(-r^2).
+ Note that indices 0 and 1 are never hit by the algorithm, since lookup is
+ performed only for x >= 1/64-1/512. */
+const struct sv_erff_data __sv_erff_data = {
+ .erf = { 0x0.000000p+0,
+ 0x1.20dbf4p-7,
+ 0x1.20d770p-6,
+ 0x1.b137e0p-6,
+ 0x1.20c564p-5,
+ 0x1.68e5d4p-5,
+ 0x1.b0fafep-5,
+ 0x1.f902a8p-5,
+ 0x1.207d48p-4,
+ 0x1.44703ep-4,
+ 0x1.68591ap-4,
+ 0x1.8c36bep-4,
+ 0x1.b00812p-4,
+ 0x1.d3cbf8p-4,
+ 0x1.f7815ap-4,
+ 0x1.0d9390p-3,
+ 0x1.1f5e1ap-3,
+ 0x1.311fc2p-3,
+ 0x1.42d7fcp-3,
+ 0x1.548642p-3,
+ 0x1.662a0cp-3,
+ 0x1.77c2d2p-3,
+ 0x1.895010p-3,
+ 0x1.9ad142p-3,
+ 0x1.ac45e4p-3,
+ 0x1.bdad72p-3,
+ 0x1.cf076ep-3,
+ 0x1.e05354p-3,
+ 0x1.f190aap-3,
+ 0x1.015f78p-2,
+ 0x1.09eed6p-2,
+ 0x1.127632p-2,
+ 0x1.1af54ep-2,
+ 0x1.236bf0p-2,
+ 0x1.2bd9dcp-2,
+ 0x1.343ed6p-2,
+ 0x1.3c9aa8p-2,
+ 0x1.44ed18p-2,
+ 0x1.4d35f0p-2,
+ 0x1.5574f4p-2,
+ 0x1.5da9f4p-2,
+ 0x1.65d4b8p-2,
+ 0x1.6df50ap-2,
+ 0x1.760abap-2,
+ 0x1.7e1594p-2,
+ 0x1.861566p-2,
+ 0x1.8e0a02p-2,
+ 0x1.95f336p-2,
+ 0x1.9dd0d2p-2,
+ 0x1.a5a2acp-2,
+ 0x1.ad6896p-2,
+ 0x1.b52264p-2,
+ 0x1.bccfecp-2,
+ 0x1.c47104p-2,
+ 0x1.cc0584p-2,
+ 0x1.d38d44p-2,
+ 0x1.db081cp-2,
+ 0x1.e275eap-2,
+ 0x1.e9d68ap-2,
+ 0x1.f129d4p-2,
+ 0x1.f86faap-2,
+ 0x1.ffa7eap-2,
+ 0x1.03693ap-1,
+ 0x1.06f794p-1,
+ 0x1.0a7ef6p-1,
+ 0x1.0dff50p-1,
+ 0x1.117894p-1,
+ 0x1.14eab4p-1,
+ 0x1.1855a6p-1,
+ 0x1.1bb95cp-1,
+ 0x1.1f15ccp-1,
+ 0x1.226ae8p-1,
+ 0x1.25b8a8p-1,
+ 0x1.28ff02p-1,
+ 0x1.2c3decp-1,
+ 0x1.2f755cp-1,
+ 0x1.32a54cp-1,
+ 0x1.35cdb4p-1,
+ 0x1.38ee8ap-1,
+ 0x1.3c07cap-1,
+ 0x1.3f196ep-1,
+ 0x1.42236ep-1,
+ 0x1.4525c8p-1,
+ 0x1.482074p-1,
+ 0x1.4b1372p-1,
+ 0x1.4dfebap-1,
+ 0x1.50e24cp-1,
+ 0x1.53be26p-1,
+ 0x1.569244p-1,
+ 0x1.595ea6p-1,
+ 0x1.5c2348p-1,
+ 0x1.5ee02ep-1,
+ 0x1.619556p-1,
+ 0x1.6442c0p-1,
+ 0x1.66e86ep-1,
+ 0x1.69865ep-1,
+ 0x1.6c1c98p-1,
+ 0x1.6eab18p-1,
+ 0x1.7131e6p-1,
+ 0x1.73b102p-1,
+ 0x1.762870p-1,
+ 0x1.789836p-1,
+ 0x1.7b0058p-1,
+ 0x1.7d60d8p-1,
+ 0x1.7fb9c0p-1,
+ 0x1.820b12p-1,
+ 0x1.8454d6p-1,
+ 0x1.869712p-1,
+ 0x1.88d1cep-1,
+ 0x1.8b050ep-1,
+ 0x1.8d30dep-1,
+ 0x1.8f5544p-1,
+ 0x1.91724ap-1,
+ 0x1.9387f6p-1,
+ 0x1.959652p-1,
+ 0x1.979d68p-1,
+ 0x1.999d42p-1,
+ 0x1.9b95e8p-1,
+ 0x1.9d8768p-1,
+ 0x1.9f71cap-1,
+ 0x1.a1551ap-1,
+ 0x1.a33162p-1,
+ 0x1.a506b0p-1,
+ 0x1.a6d50cp-1,
+ 0x1.a89c86p-1,
+ 0x1.aa5d26p-1,
+ 0x1.ac16fcp-1,
+ 0x1.adca14p-1,
+ 0x1.af767ap-1,
+ 0x1.b11c3cp-1,
+ 0x1.b2bb68p-1,
+ 0x1.b4540ap-1,
+ 0x1.b5e630p-1,
+ 0x1.b771e8p-1,
+ 0x1.b8f742p-1,
+ 0x1.ba764ap-1,
+ 0x1.bbef10p-1,
+ 0x1.bd61a2p-1,
+ 0x1.bece0ep-1,
+ 0x1.c03464p-1,
+ 0x1.c194b2p-1,
+ 0x1.c2ef08p-1,
+ 0x1.c44376p-1,
+ 0x1.c5920ap-1,
+ 0x1.c6dad2p-1,
+ 0x1.c81de2p-1,
+ 0x1.c95b46p-1,
+ 0x1.ca930ep-1,
+ 0x1.cbc54cp-1,
+ 0x1.ccf20cp-1,
+ 0x1.ce1962p-1,
+ 0x1.cf3b5cp-1,
+ 0x1.d0580cp-1,
+ 0x1.d16f7ep-1,
+ 0x1.d281c4p-1,
+ 0x1.d38ef0p-1,
+ 0x1.d49710p-1,
+ 0x1.d59a34p-1,
+ 0x1.d6986cp-1,
+ 0x1.d791cap-1,
+ 0x1.d8865ep-1,
+ 0x1.d97636p-1,
+ 0x1.da6162p-1,
+ 0x1.db47f4p-1,
+ 0x1.dc29fcp-1,
+ 0x1.dd0788p-1,
+ 0x1.dde0aap-1,
+ 0x1.deb570p-1,
+ 0x1.df85eap-1,
+ 0x1.e0522ap-1,
+ 0x1.e11a3ep-1,
+ 0x1.e1de36p-1,
+ 0x1.e29e22p-1,
+ 0x1.e35a12p-1,
+ 0x1.e41214p-1,
+ 0x1.e4c638p-1,
+ 0x1.e5768cp-1,
+ 0x1.e62322p-1,
+ 0x1.e6cc08p-1,
+ 0x1.e7714ap-1,
+ 0x1.e812fcp-1,
+ 0x1.e8b12ap-1,
+ 0x1.e94be4p-1,
+ 0x1.e9e336p-1,
+ 0x1.ea7730p-1,
+ 0x1.eb07e2p-1,
+ 0x1.eb9558p-1,
+ 0x1.ec1fa2p-1,
+ 0x1.eca6ccp-1,
+ 0x1.ed2ae6p-1,
+ 0x1.edabfcp-1,
+ 0x1.ee2a1ep-1,
+ 0x1.eea556p-1,
+ 0x1.ef1db4p-1,
+ 0x1.ef9344p-1,
+ 0x1.f00614p-1,
+ 0x1.f07630p-1,
+ 0x1.f0e3a6p-1,
+ 0x1.f14e82p-1,
+ 0x1.f1b6d0p-1,
+ 0x1.f21ca0p-1,
+ 0x1.f27ff8p-1,
+ 0x1.f2e0eap-1,
+ 0x1.f33f7ep-1,
+ 0x1.f39bc2p-1,
+ 0x1.f3f5c2p-1,
+ 0x1.f44d88p-1,
+ 0x1.f4a31ep-1,
+ 0x1.f4f694p-1,
+ 0x1.f547f2p-1,
+ 0x1.f59742p-1,
+ 0x1.f5e490p-1,
+ 0x1.f62fe8p-1,
+ 0x1.f67952p-1,
+ 0x1.f6c0dcp-1,
+ 0x1.f7068cp-1,
+ 0x1.f74a6ep-1,
+ 0x1.f78c8cp-1,
+ 0x1.f7cceep-1,
+ 0x1.f80ba2p-1,
+ 0x1.f848acp-1,
+ 0x1.f8841ap-1,
+ 0x1.f8bdf2p-1,
+ 0x1.f8f63ep-1,
+ 0x1.f92d08p-1,
+ 0x1.f96256p-1,
+ 0x1.f99634p-1,
+ 0x1.f9c8a8p-1,
+ 0x1.f9f9bap-1,
+ 0x1.fa2974p-1,
+ 0x1.fa57dep-1,
+ 0x1.fa84fep-1,
+ 0x1.fab0dep-1,
+ 0x1.fadb84p-1,
+ 0x1.fb04f6p-1,
+ 0x1.fb2d40p-1,
+ 0x1.fb5464p-1,
+ 0x1.fb7a6cp-1,
+ 0x1.fb9f60p-1,
+ 0x1.fbc344p-1,
+ 0x1.fbe61ep-1,
+ 0x1.fc07fap-1,
+ 0x1.fc28d8p-1,
+ 0x1.fc48c2p-1,
+ 0x1.fc67bcp-1,
+ 0x1.fc85d0p-1,
+ 0x1.fca2fep-1,
+ 0x1.fcbf52p-1,
+ 0x1.fcdaccp-1,
+ 0x1.fcf576p-1,
+ 0x1.fd0f54p-1,
+ 0x1.fd286ap-1,
+ 0x1.fd40bep-1,
+ 0x1.fd5856p-1,
+ 0x1.fd6f34p-1,
+ 0x1.fd8562p-1,
+ 0x1.fd9ae2p-1,
+ 0x1.fdafb8p-1,
+ 0x1.fdc3e8p-1,
+ 0x1.fdd77ap-1,
+ 0x1.fdea6ep-1,
+ 0x1.fdfcccp-1,
+ 0x1.fe0e96p-1,
+ 0x1.fe1fd0p-1,
+ 0x1.fe3080p-1,
+ 0x1.fe40a6p-1,
+ 0x1.fe504cp-1,
+ 0x1.fe5f70p-1,
+ 0x1.fe6e18p-1,
+ 0x1.fe7c46p-1,
+ 0x1.fe8a00p-1,
+ 0x1.fe9748p-1,
+ 0x1.fea422p-1,
+ 0x1.feb090p-1,
+ 0x1.febc96p-1,
+ 0x1.fec836p-1,
+ 0x1.fed374p-1,
+ 0x1.fede52p-1,
+ 0x1.fee8d4p-1,
+ 0x1.fef2fep-1,
+ 0x1.fefccep-1,
+ 0x1.ff064cp-1,
+ 0x1.ff0f76p-1,
+ 0x1.ff1852p-1,
+ 0x1.ff20e0p-1,
+ 0x1.ff2924p-1,
+ 0x1.ff3120p-1,
+ 0x1.ff38d6p-1,
+ 0x1.ff4048p-1,
+ 0x1.ff4778p-1,
+ 0x1.ff4e68p-1,
+ 0x1.ff551ap-1,
+ 0x1.ff5b90p-1,
+ 0x1.ff61ccp-1,
+ 0x1.ff67d0p-1,
+ 0x1.ff6d9ep-1,
+ 0x1.ff7338p-1,
+ 0x1.ff789ep-1,
+ 0x1.ff7dd4p-1,
+ 0x1.ff82dap-1,
+ 0x1.ff87b2p-1,
+ 0x1.ff8c5cp-1,
+ 0x1.ff90dcp-1,
+ 0x1.ff9532p-1,
+ 0x1.ff9960p-1,
+ 0x1.ff9d68p-1,
+ 0x1.ffa14ap-1,
+ 0x1.ffa506p-1,
+ 0x1.ffa8a0p-1,
+ 0x1.ffac18p-1,
+ 0x1.ffaf6ep-1,
+ 0x1.ffb2a6p-1,
+ 0x1.ffb5bep-1,
+ 0x1.ffb8b8p-1,
+ 0x1.ffbb98p-1,
+ 0x1.ffbe5ap-1,
+ 0x1.ffc102p-1,
+ 0x1.ffc390p-1,
+ 0x1.ffc606p-1,
+ 0x1.ffc862p-1,
+ 0x1.ffcaa8p-1,
+ 0x1.ffccd8p-1,
+ 0x1.ffcef4p-1,
+ 0x1.ffd0fap-1,
+ 0x1.ffd2eap-1,
+ 0x1.ffd4cap-1,
+ 0x1.ffd696p-1,
+ 0x1.ffd84ep-1,
+ 0x1.ffd9f8p-1,
+ 0x1.ffdb90p-1,
+ 0x1.ffdd18p-1,
+ 0x1.ffde90p-1,
+ 0x1.ffdffap-1,
+ 0x1.ffe154p-1,
+ 0x1.ffe2a2p-1,
+ 0x1.ffe3e2p-1,
+ 0x1.ffe514p-1,
+ 0x1.ffe63cp-1,
+ 0x1.ffe756p-1,
+ 0x1.ffe866p-1,
+ 0x1.ffe96ap-1,
+ 0x1.ffea64p-1,
+ 0x1.ffeb54p-1,
+ 0x1.ffec3ap-1,
+ 0x1.ffed16p-1,
+ 0x1.ffedeap-1,
+ 0x1.ffeeb4p-1,
+ 0x1.ffef76p-1,
+ 0x1.fff032p-1,
+ 0x1.fff0e4p-1,
+ 0x1.fff18ep-1,
+ 0x1.fff232p-1,
+ 0x1.fff2d0p-1,
+ 0x1.fff366p-1,
+ 0x1.fff3f6p-1,
+ 0x1.fff480p-1,
+ 0x1.fff504p-1,
+ 0x1.fff582p-1,
+ 0x1.fff5fcp-1,
+ 0x1.fff670p-1,
+ 0x1.fff6dep-1,
+ 0x1.fff74ap-1,
+ 0x1.fff7aep-1,
+ 0x1.fff810p-1,
+ 0x1.fff86cp-1,
+ 0x1.fff8c6p-1,
+ 0x1.fff91cp-1,
+ 0x1.fff96cp-1,
+ 0x1.fff9bap-1,
+ 0x1.fffa04p-1,
+ 0x1.fffa4cp-1,
+ 0x1.fffa90p-1,
+ 0x1.fffad0p-1,
+ 0x1.fffb0ep-1,
+ 0x1.fffb4ap-1,
+ 0x1.fffb82p-1,
+ 0x1.fffbb8p-1,
+ 0x1.fffbecp-1,
+ 0x1.fffc1ep-1,
+ 0x1.fffc4ep-1,
+ 0x1.fffc7ap-1,
+ 0x1.fffca6p-1,
+ 0x1.fffccep-1,
+ 0x1.fffcf6p-1,
+ 0x1.fffd1ap-1,
+ 0x1.fffd3ep-1,
+ 0x1.fffd60p-1,
+ 0x1.fffd80p-1,
+ 0x1.fffda0p-1,
+ 0x1.fffdbep-1,
+ 0x1.fffddap-1,
+ 0x1.fffdf4p-1,
+ 0x1.fffe0ep-1,
+ 0x1.fffe26p-1,
+ 0x1.fffe3ep-1,
+ 0x1.fffe54p-1,
+ 0x1.fffe68p-1,
+ 0x1.fffe7ep-1,
+ 0x1.fffe90p-1,
+ 0x1.fffea2p-1,
+ 0x1.fffeb4p-1,
+ 0x1.fffec4p-1,
+ 0x1.fffed4p-1,
+ 0x1.fffee4p-1,
+ 0x1.fffef2p-1,
+ 0x1.ffff00p-1,
+ 0x1.ffff0cp-1,
+ 0x1.ffff18p-1,
+ 0x1.ffff24p-1,
+ 0x1.ffff30p-1,
+ 0x1.ffff3ap-1,
+ 0x1.ffff44p-1,
+ 0x1.ffff4ep-1,
+ 0x1.ffff56p-1,
+ 0x1.ffff60p-1,
+ 0x1.ffff68p-1,
+ 0x1.ffff70p-1,
+ 0x1.ffff78p-1,
+ 0x1.ffff7ep-1,
+ 0x1.ffff84p-1,
+ 0x1.ffff8cp-1,
+ 0x1.ffff92p-1,
+ 0x1.ffff98p-1,
+ 0x1.ffff9cp-1,
+ 0x1.ffffa2p-1,
+ 0x1.ffffa6p-1,
+ 0x1.ffffacp-1,
+ 0x1.ffffb0p-1,
+ 0x1.ffffb4p-1,
+ 0x1.ffffb8p-1,
+ 0x1.ffffbcp-1,
+ 0x1.ffffc0p-1,
+ 0x1.ffffc4p-1,
+ 0x1.ffffc6p-1,
+ 0x1.ffffcap-1,
+ 0x1.ffffccp-1,
+ 0x1.ffffd0p-1,
+ 0x1.ffffd2p-1,
+ 0x1.ffffd4p-1,
+ 0x1.ffffd6p-1,
+ 0x1.ffffd8p-1,
+ 0x1.ffffdcp-1,
+ 0x1.ffffdep-1,
+ 0x1.ffffdep-1,
+ 0x1.ffffe0p-1,
+ 0x1.ffffe2p-1,
+ 0x1.ffffe4p-1,
+ 0x1.ffffe6p-1,
+ 0x1.ffffe8p-1,
+ 0x1.ffffe8p-1,
+ 0x1.ffffeap-1,
+ 0x1.ffffeap-1,
+ 0x1.ffffecp-1,
+ 0x1.ffffeep-1,
+ 0x1.ffffeep-1,
+ 0x1.fffff0p-1,
+ 0x1.fffff0p-1,
+ 0x1.fffff2p-1,
+ 0x1.fffff2p-1,
+ 0x1.fffff2p-1,
+ 0x1.fffff4p-1,
+ 0x1.fffff4p-1,
+ 0x1.fffff4p-1,
+ 0x1.fffff6p-1,
+ 0x1.fffff6p-1,
+ 0x1.fffff6p-1,
+ 0x1.fffff8p-1,
+ 0x1.fffff8p-1,
+ 0x1.fffff8p-1,
+ 0x1.fffff8p-1,
+ 0x1.fffffap-1,
+ 0x1.fffffap-1,
+ 0x1.fffffap-1,
+ 0x1.fffffap-1,
+ 0x1.fffffap-1,
+ 0x1.fffffap-1,
+ 0x1.fffffcp-1,
+ 0x1.fffffcp-1,
+ 0x1.fffffcp-1,
+ 0x1.fffffcp-1,
+ 0x1.fffffcp-1,
+ 0x1.fffffcp-1,
+ 0x1.fffffcp-1,
+ 0x1.fffffcp-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.fffffep-1,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ 0x1.000000p+0,
+ },
+ .scale = { 0x1.20dd76p+0,
+ 0x1.20d8f2p+0,
+ 0x1.20cb68p+0,
+ 0x1.20b4d8p+0,
+ 0x1.209546p+0,
+ 0x1.206cb4p+0,
+ 0x1.203b26p+0,
+ 0x1.2000a0p+0,
+ 0x1.1fbd28p+0,
+ 0x1.1f70c4p+0,
+ 0x1.1f1b7ap+0,
+ 0x1.1ebd56p+0,
+ 0x1.1e565cp+0,
+ 0x1.1de698p+0,
+ 0x1.1d6e14p+0,
+ 0x1.1cecdcp+0,
+ 0x1.1c62fap+0,
+ 0x1.1bd07cp+0,
+ 0x1.1b3572p+0,
+ 0x1.1a91e6p+0,
+ 0x1.19e5eap+0,
+ 0x1.19318cp+0,
+ 0x1.1874dep+0,
+ 0x1.17aff0p+0,
+ 0x1.16e2d8p+0,
+ 0x1.160da4p+0,
+ 0x1.153068p+0,
+ 0x1.144b3cp+0,
+ 0x1.135e30p+0,
+ 0x1.12695ep+0,
+ 0x1.116cd8p+0,
+ 0x1.1068bap+0,
+ 0x1.0f5d16p+0,
+ 0x1.0e4a08p+0,
+ 0x1.0d2fa6p+0,
+ 0x1.0c0e0ap+0,
+ 0x1.0ae550p+0,
+ 0x1.09b590p+0,
+ 0x1.087ee4p+0,
+ 0x1.07416cp+0,
+ 0x1.05fd3ep+0,
+ 0x1.04b27cp+0,
+ 0x1.036140p+0,
+ 0x1.0209a6p+0,
+ 0x1.00abd0p+0,
+ 0x1.fe8fb0p-1,
+ 0x1.fbbbbep-1,
+ 0x1.f8dc0ap-1,
+ 0x1.f5f0cep-1,
+ 0x1.f2fa4cp-1,
+ 0x1.eff8c4p-1,
+ 0x1.ecec78p-1,
+ 0x1.e9d5a8p-1,
+ 0x1.e6b498p-1,
+ 0x1.e38988p-1,
+ 0x1.e054bep-1,
+ 0x1.dd167cp-1,
+ 0x1.d9cf06p-1,
+ 0x1.d67ea2p-1,
+ 0x1.d32592p-1,
+ 0x1.cfc41ep-1,
+ 0x1.cc5a8ap-1,
+ 0x1.c8e91cp-1,
+ 0x1.c5701ap-1,
+ 0x1.c1efcap-1,
+ 0x1.be6872p-1,
+ 0x1.bada5ap-1,
+ 0x1.b745c6p-1,
+ 0x1.b3aafcp-1,
+ 0x1.b00a46p-1,
+ 0x1.ac63e8p-1,
+ 0x1.a8b828p-1,
+ 0x1.a5074ep-1,
+ 0x1.a1519ep-1,
+ 0x1.9d9762p-1,
+ 0x1.99d8dap-1,
+ 0x1.961650p-1,
+ 0x1.925008p-1,
+ 0x1.8e8646p-1,
+ 0x1.8ab950p-1,
+ 0x1.86e96ap-1,
+ 0x1.8316d6p-1,
+ 0x1.7f41dcp-1,
+ 0x1.7b6abcp-1,
+ 0x1.7791b8p-1,
+ 0x1.73b714p-1,
+ 0x1.6fdb12p-1,
+ 0x1.6bfdf0p-1,
+ 0x1.681ff2p-1,
+ 0x1.644156p-1,
+ 0x1.60625cp-1,
+ 0x1.5c8342p-1,
+ 0x1.58a446p-1,
+ 0x1.54c5a6p-1,
+ 0x1.50e79ep-1,
+ 0x1.4d0a68p-1,
+ 0x1.492e42p-1,
+ 0x1.455366p-1,
+ 0x1.417a0cp-1,
+ 0x1.3da26ep-1,
+ 0x1.39ccc2p-1,
+ 0x1.35f940p-1,
+ 0x1.32281ep-1,
+ 0x1.2e5992p-1,
+ 0x1.2a8dcep-1,
+ 0x1.26c508p-1,
+ 0x1.22ff72p-1,
+ 0x1.1f3d3cp-1,
+ 0x1.1b7e98p-1,
+ 0x1.17c3b6p-1,
+ 0x1.140cc4p-1,
+ 0x1.1059eep-1,
+ 0x1.0cab62p-1,
+ 0x1.09014cp-1,
+ 0x1.055bd6p-1,
+ 0x1.01bb2cp-1,
+ 0x1.fc3ee6p-2,
+ 0x1.f511aap-2,
+ 0x1.edeeeep-2,
+ 0x1.e6d700p-2,
+ 0x1.dfca26p-2,
+ 0x1.d8c8aap-2,
+ 0x1.d1d2d0p-2,
+ 0x1.cae8dap-2,
+ 0x1.c40b08p-2,
+ 0x1.bd3998p-2,
+ 0x1.b674c8p-2,
+ 0x1.afbcd4p-2,
+ 0x1.a911f0p-2,
+ 0x1.a27456p-2,
+ 0x1.9be438p-2,
+ 0x1.9561c8p-2,
+ 0x1.8eed36p-2,
+ 0x1.8886b2p-2,
+ 0x1.822e66p-2,
+ 0x1.7be47ap-2,
+ 0x1.75a91ap-2,
+ 0x1.6f7c6ap-2,
+ 0x1.695e8cp-2,
+ 0x1.634fa6p-2,
+ 0x1.5d4fd4p-2,
+ 0x1.575f34p-2,
+ 0x1.517de6p-2,
+ 0x1.4bac00p-2,
+ 0x1.45e99cp-2,
+ 0x1.4036d0p-2,
+ 0x1.3a93b2p-2,
+ 0x1.350052p-2,
+ 0x1.2f7cc4p-2,
+ 0x1.2a0916p-2,
+ 0x1.24a554p-2,
+ 0x1.1f518ap-2,
+ 0x1.1a0dc6p-2,
+ 0x1.14da0ap-2,
+ 0x1.0fb662p-2,
+ 0x1.0aa2d0p-2,
+ 0x1.059f5ap-2,
+ 0x1.00ac00p-2,
+ 0x1.f79184p-3,
+ 0x1.edeb40p-3,
+ 0x1.e46530p-3,
+ 0x1.daff4ap-3,
+ 0x1.d1b982p-3,
+ 0x1.c893cep-3,
+ 0x1.bf8e1cp-3,
+ 0x1.b6a856p-3,
+ 0x1.ade26cp-3,
+ 0x1.a53c42p-3,
+ 0x1.9cb5bep-3,
+ 0x1.944ec2p-3,
+ 0x1.8c0732p-3,
+ 0x1.83deeap-3,
+ 0x1.7bd5c8p-3,
+ 0x1.73eba4p-3,
+ 0x1.6c2056p-3,
+ 0x1.6473b6p-3,
+ 0x1.5ce596p-3,
+ 0x1.5575c8p-3,
+ 0x1.4e241ep-3,
+ 0x1.46f066p-3,
+ 0x1.3fda6cp-3,
+ 0x1.38e1fap-3,
+ 0x1.3206dcp-3,
+ 0x1.2b48dap-3,
+ 0x1.24a7b8p-3,
+ 0x1.1e233ep-3,
+ 0x1.17bb2cp-3,
+ 0x1.116f48p-3,
+ 0x1.0b3f52p-3,
+ 0x1.052b0cp-3,
+ 0x1.fe6460p-4,
+ 0x1.f2a902p-4,
+ 0x1.e72372p-4,
+ 0x1.dbd32ap-4,
+ 0x1.d0b7a0p-4,
+ 0x1.c5d04ap-4,
+ 0x1.bb1c98p-4,
+ 0x1.b09bfcp-4,
+ 0x1.a64de6p-4,
+ 0x1.9c31c6p-4,
+ 0x1.92470ap-4,
+ 0x1.888d1ep-4,
+ 0x1.7f036cp-4,
+ 0x1.75a960p-4,
+ 0x1.6c7e64p-4,
+ 0x1.6381e2p-4,
+ 0x1.5ab342p-4,
+ 0x1.5211ecp-4,
+ 0x1.499d48p-4,
+ 0x1.4154bcp-4,
+ 0x1.3937b2p-4,
+ 0x1.31458ep-4,
+ 0x1.297dbap-4,
+ 0x1.21df9ap-4,
+ 0x1.1a6a96p-4,
+ 0x1.131e14p-4,
+ 0x1.0bf97ep-4,
+ 0x1.04fc3ap-4,
+ 0x1.fc4b5ep-5,
+ 0x1.eeea8cp-5,
+ 0x1.e1d4d0p-5,
+ 0x1.d508fap-5,
+ 0x1.c885e0p-5,
+ 0x1.bc4a54p-5,
+ 0x1.b05530p-5,
+ 0x1.a4a54ap-5,
+ 0x1.99397ap-5,
+ 0x1.8e109cp-5,
+ 0x1.83298ep-5,
+ 0x1.78832cp-5,
+ 0x1.6e1c58p-5,
+ 0x1.63f3f6p-5,
+ 0x1.5a08e8p-5,
+ 0x1.505a18p-5,
+ 0x1.46e66cp-5,
+ 0x1.3dacd2p-5,
+ 0x1.34ac36p-5,
+ 0x1.2be38cp-5,
+ 0x1.2351c2p-5,
+ 0x1.1af5d2p-5,
+ 0x1.12ceb4p-5,
+ 0x1.0adb60p-5,
+ 0x1.031ad6p-5,
+ 0x1.f7182ap-6,
+ 0x1.e85c44p-6,
+ 0x1.da0006p-6,
+ 0x1.cc0180p-6,
+ 0x1.be5ecep-6,
+ 0x1.b1160ap-6,
+ 0x1.a4255ap-6,
+ 0x1.978ae8p-6,
+ 0x1.8b44e6p-6,
+ 0x1.7f5188p-6,
+ 0x1.73af0cp-6,
+ 0x1.685bb6p-6,
+ 0x1.5d55ccp-6,
+ 0x1.529b9ep-6,
+ 0x1.482b84p-6,
+ 0x1.3e03d8p-6,
+ 0x1.3422fep-6,
+ 0x1.2a875cp-6,
+ 0x1.212f62p-6,
+ 0x1.181984p-6,
+ 0x1.0f443ep-6,
+ 0x1.06ae14p-6,
+ 0x1.fcab14p-7,
+ 0x1.ec7262p-7,
+ 0x1.dcaf36p-7,
+ 0x1.cd5ecap-7,
+ 0x1.be7e5ap-7,
+ 0x1.b00b38p-7,
+ 0x1.a202bep-7,
+ 0x1.94624ep-7,
+ 0x1.87275ep-7,
+ 0x1.7a4f6ap-7,
+ 0x1.6dd7fep-7,
+ 0x1.61beaep-7,
+ 0x1.56011cp-7,
+ 0x1.4a9cf6p-7,
+ 0x1.3f8ff6p-7,
+ 0x1.34d7dcp-7,
+ 0x1.2a727ap-7,
+ 0x1.205dacp-7,
+ 0x1.169756p-7,
+ 0x1.0d1d6ap-7,
+ 0x1.03ede2p-7,
+ 0x1.f60d8ap-8,
+ 0x1.e4cc4ap-8,
+ 0x1.d4143ap-8,
+ 0x1.c3e1a6p-8,
+ 0x1.b430ecp-8,
+ 0x1.a4fe84p-8,
+ 0x1.9646f4p-8,
+ 0x1.8806d8p-8,
+ 0x1.7a3adep-8,
+ 0x1.6cdfccp-8,
+ 0x1.5ff276p-8,
+ 0x1.536fc2p-8,
+ 0x1.4754acp-8,
+ 0x1.3b9e40p-8,
+ 0x1.30499cp-8,
+ 0x1.2553eep-8,
+ 0x1.1aba78p-8,
+ 0x1.107a8cp-8,
+ 0x1.06918cp-8,
+ 0x1.f9f9d0p-9,
+ 0x1.e77448p-9,
+ 0x1.d58da6p-9,
+ 0x1.c4412cp-9,
+ 0x1.b38a3ap-9,
+ 0x1.a36454p-9,
+ 0x1.93cb12p-9,
+ 0x1.84ba30p-9,
+ 0x1.762d84p-9,
+ 0x1.682100p-9,
+ 0x1.5a90b0p-9,
+ 0x1.4d78bcp-9,
+ 0x1.40d564p-9,
+ 0x1.34a306p-9,
+ 0x1.28de12p-9,
+ 0x1.1d8318p-9,
+ 0x1.128ebap-9,
+ 0x1.07fdb4p-9,
+ 0x1.fb99b8p-10,
+ 0x1.e7f232p-10,
+ 0x1.d4fed8p-10,
+ 0x1.c2b9d0p-10,
+ 0x1.b11d70p-10,
+ 0x1.a02436p-10,
+ 0x1.8fc8c8p-10,
+ 0x1.8005f0p-10,
+ 0x1.70d6a4p-10,
+ 0x1.6235fcp-10,
+ 0x1.541f34p-10,
+ 0x1.468daep-10,
+ 0x1.397ceep-10,
+ 0x1.2ce898p-10,
+ 0x1.20cc76p-10,
+ 0x1.15246ep-10,
+ 0x1.09ec86p-10,
+ 0x1.fe41cep-11,
+ 0x1.e97ba4p-11,
+ 0x1.d57f52p-11,
+ 0x1.c245d4p-11,
+ 0x1.afc85ep-11,
+ 0x1.9e0058p-11,
+ 0x1.8ce75ep-11,
+ 0x1.7c7744p-11,
+ 0x1.6caa0ep-11,
+ 0x1.5d79ecp-11,
+ 0x1.4ee142p-11,
+ 0x1.40daa4p-11,
+ 0x1.3360ccp-11,
+ 0x1.266ea8p-11,
+ 0x1.19ff46p-11,
+ 0x1.0e0de8p-11,
+ 0x1.0295f0p-11,
+ 0x1.ef25d4p-12,
+ 0x1.da0110p-12,
+ 0x1.c5b542p-12,
+ 0x1.b23a5ap-12,
+ 0x1.9f8894p-12,
+ 0x1.8d986ap-12,
+ 0x1.7c629ap-12,
+ 0x1.6be022p-12,
+ 0x1.5c0a38p-12,
+ 0x1.4cda54p-12,
+ 0x1.3e4a24p-12,
+ 0x1.305390p-12,
+ 0x1.22f0b4p-12,
+ 0x1.161be4p-12,
+ 0x1.09cfa4p-12,
+ 0x1.fc0d56p-13,
+ 0x1.e577bcp-13,
+ 0x1.cfd4a6p-13,
+ 0x1.bb1a96p-13,
+ 0x1.a74068p-13,
+ 0x1.943d4ap-13,
+ 0x1.8208bcp-13,
+ 0x1.709a8ep-13,
+ 0x1.5feadap-13,
+ 0x1.4ff208p-13,
+ 0x1.40a8c2p-13,
+ 0x1.3207fcp-13,
+ 0x1.2408eap-13,
+ 0x1.16a502p-13,
+ 0x1.09d5f8p-13,
+ 0x1.fb2b7ap-14,
+ 0x1.e3bcf4p-14,
+ 0x1.cd5528p-14,
+ 0x1.b7e946p-14,
+ 0x1.a36eecp-14,
+ 0x1.8fdc1cp-14,
+ 0x1.7d2738p-14,
+ 0x1.6b4702p-14,
+ 0x1.5a329cp-14,
+ 0x1.49e178p-14,
+ 0x1.3a4b60p-14,
+ 0x1.2b6876p-14,
+ 0x1.1d3120p-14,
+ 0x1.0f9e1cp-14,
+ 0x1.02a868p-14,
+ 0x1.ec929ap-15,
+ 0x1.d4f4b4p-15,
+ 0x1.be6abcp-15,
+ 0x1.a8e8ccp-15,
+ 0x1.94637ep-15,
+ 0x1.80cfdcp-15,
+ 0x1.6e2368p-15,
+ 0x1.5c540cp-15,
+ 0x1.4b581cp-15,
+ 0x1.3b2652p-15,
+ 0x1.2bb5ccp-15,
+ 0x1.1cfe02p-15,
+ 0x1.0ef6c4p-15,
+ 0x1.019842p-15,
+ 0x1.e9b5e8p-16,
+ 0x1.d16f58p-16,
+ 0x1.ba4f04p-16,
+ 0x1.a447b8p-16,
+ 0x1.8f4cccp-16,
+ 0x1.7b5224p-16,
+ 0x1.684c22p-16,
+ 0x1.562facp-16,
+ 0x1.44f21ep-16,
+ 0x1.34894ap-16,
+ 0x1.24eb72p-16,
+ 0x1.160f44p-16,
+ 0x1.07ebd2p-16,
+ 0x1.f4f12ep-17,
+ 0x1.db5ad0p-17,
+ 0x1.c304f0p-17,
+ 0x1.abe09ep-17,
+ 0x1.95df98p-17,
+ 0x1.80f43ap-17,
+ 0x1.6d1178p-17,
+ 0x1.5a2ae0p-17,
+ 0x1.483488p-17,
+ 0x1.372310p-17,
+ 0x1.26eb9ep-17,
+ 0x1.1783cep-17,
+ 0x1.08e1bap-17,
+ 0x1.f5f7d8p-18,
+ 0x1.db92b6p-18,
+ 0x1.c282cep-18,
+ 0x1.aab7acp-18,
+ 0x1.94219cp-18,
+ 0x1.7eb1a2p-18,
+ 0x1.6a5972p-18,
+ 0x1.570b6ap-18,
+ 0x1.44ba86p-18,
+ 0x1.335a62p-18,
+ 0x1.22df2ap-18,
+ 0x1.133d96p-18,
+ 0x1.046aeap-18,
+ 0x1.ecb9d0p-19,
+ 0x1.d21398p-19,
+ 0x1.b8d094p-19,
+ 0x1.a0df10p-19,
+ 0x1.8a2e26p-19,
+ 0x1.74adc8p-19,
+ 0x1.604ea8p-19,
+ 0x1.4d0232p-19,
+ 0x1.3aba86p-19,
+ 0x1.296a70p-19,
+ 0x1.190562p-19,
+ 0x1.097f62p-19,
+ 0x1.f59a20p-20,
+ 0x1.d9c736p-20,
+ 0x1.bf716cp-20,
+ 0x1.a6852cp-20,
+ 0x1.8eefd8p-20,
+ 0x1.789fb8p-20,
+ 0x1.6383f8p-20,
+ 0x1.4f8c96p-20,
+ 0x1.3caa62p-20,
+ 0x1.2acee2p-20,
+ 0x1.19ec60p-20,
+ 0x1.09f5d0p-20,
+ 0x1.f5bd96p-21,
+ 0x1.d9371ep-21,
+ 0x1.be41dep-21,
+ 0x1.a4c89ep-21,
+ 0x1.8cb738p-21,
+ 0x1.75fa8ep-21,
+ 0x1.608078p-21,
+ 0x1.4c37c0p-21,
+ 0x1.39100ep-21,
+ 0x1.26f9e0p-21,
+ 0x1.15e682p-21,
+ 0x1.05c804p-21,
+ 0x1.ed2254p-22,
+ 0x1.d06ad6p-22,
+ 0x1.b551c8p-22,
+ 0x1.9bc0a0p-22,
+ 0x1.83a200p-22,
+ 0x1.6ce1aap-22,
+ 0x1.576c72p-22,
+ 0x1.43302cp-22,
+ 0x1.301ba2p-22,
+ 0x1.1e1e86p-22,
+ 0x1.0d2966p-22,
+ 0x1.fa5b50p-23,
+ 0x1.dc3ae4p-23,
+ 0x1.bfd756p-23,
+ 0x1.a517dap-23,
+ 0x1.8be4f8p-23,
+ 0x1.74287ep-23,
+ 0x1.5dcd66p-23,
+ 0x1.48bfd4p-23,
+ 0x1.34ecf8p-23,
+ 0x1.224310p-23,
+ 0x1.10b148p-23,
+ },
+};
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp10_1u5.c b/contrib/arm-optimized-routines/pl/math/sv_exp10_1u5.c
new file mode 100644
index 000000000000..519693afcab0
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_exp10_1u5.c
@@ -0,0 +1,122 @@
+/*
+ * Double-precision SVE 10^x function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f64.h"
+
+#define SpecialBound 307.0 /* floor (log10 (2^1023)). */
+
+static const struct data
+{
+ double poly[5];
+ double shift, log10_2, log2_10_hi, log2_10_lo, scale_thres, special_bound;
+} data = {
+ /* Coefficients generated using Remez algorithm.
+ rel error: 0x1.9fcb9b3p-60
+ abs error: 0x1.a20d9598p-60 in [ -log10(2)/128, log10(2)/128 ]
+ max ulp err 0.52 +0.5. */
+ .poly = { 0x1.26bb1bbb55516p1, 0x1.53524c73cd32ap1, 0x1.0470591daeafbp1,
+ 0x1.2bd77b1361ef6p0, 0x1.142b5d54e9621p-1 },
+ /* 1.5*2^46+1023. This value is further explained below. */
+ .shift = 0x1.800000000ffc0p+46,
+ .log10_2 = 0x1.a934f0979a371p1, /* 1/log2(10). */
+ .log2_10_hi = 0x1.34413509f79ffp-2, /* log2(10). */
+ .log2_10_lo = -0x1.9dc1da994fd21p-59,
+ .scale_thres = 1280.0,
+ .special_bound = SpecialBound,
+};
+
+#define SpecialOffset 0x6000000000000000 /* 0x1p513. */
+/* SpecialBias1 + SpecialBias1 = asuint(1.0). */
+#define SpecialBias1 0x7000000000000000 /* 0x1p769. */
+#define SpecialBias2 0x3010000000000000 /* 0x1p-254. */
+
+/* Update of both special and non-special cases, if any special case is
+ detected. */
+static inline svfloat64_t
+special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
+ const struct data *d)
+{
+ /* s=2^n may overflow, break it up into s=s1*s2,
+ such that exp = s + s*y can be computed as s1*(s2+s2*y)
+ and s1*s1 overflows only if n>0. */
+
+ /* If n<=0 then set b to 0x6, 0 otherwise. */
+ svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0. */
+ svuint64_t b = svdup_u64_z (p_sign, SpecialOffset);
+
+ /* Set s1 to generate overflow depending on sign of exponent n. */
+ svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
+ /* Offset s to avoid overflow in final result if n is below threshold. */
+ svfloat64_t s2 = svreinterpret_f64 (
+ svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
+
+ /* |n| > 1280 => 2^(n) overflows. */
+ svbool_t p_cmp = svacgt (pg, n, d->scale_thres);
+
+ svfloat64_t r1 = svmul_x (pg, s1, s1);
+ svfloat64_t r2 = svmla_x (pg, s2, s2, y);
+ svfloat64_t r0 = svmul_x (pg, r2, s1);
+
+ return svsel (p_cmp, r1, r0);
+}
+
+/* Fast vector implementation of exp10 using FEXPA instruction.
+ Maximum measured error is 1.02 ulp.
+ SV_NAME_D1 (exp10)(-0x1.2862fec805e58p+2) got 0x1.885a89551d782p-16
+ want 0x1.885a89551d781p-16. */
+svfloat64_t SV_NAME_D1 (exp10) (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ svbool_t no_big_scale = svacle (pg, x, d->special_bound);
+ svbool_t special = svnot_z (pg, no_big_scale);
+
+ /* n = round(x/(log10(2)/N)). */
+ svfloat64_t shift = sv_f64 (d->shift);
+ svfloat64_t z = svmla_x (pg, shift, x, d->log10_2);
+ svfloat64_t n = svsub_x (pg, z, shift);
+
+ /* r = x - n*log10(2)/N. */
+ svfloat64_t log2_10 = svld1rq (svptrue_b64 (), &d->log2_10_hi);
+ svfloat64_t r = x;
+ r = svmls_lane (r, n, log2_10, 0);
+ r = svmls_lane (r, n, log2_10, 1);
+
+ /* scale = 2^(n/N), computed using FEXPA. FEXPA does not propagate NaNs, so
+ for consistent NaN handling we have to manually propagate them. This
+ comes at significant performance cost. */
+ svuint64_t u = svreinterpret_u64 (z);
+ svfloat64_t scale = svexpa (u);
+
+ /* Approximate exp10(r) using polynomial. */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t y = svmla_x (pg, svmul_x (pg, r, d->poly[0]), r2,
+ sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly + 1));
+
+ /* Assemble result as exp10(x) = 2^n * exp10(r). If |x| > SpecialBound
+ multiplication may overflow, so use special case routine. */
+ if (unlikely (svptest_any (pg, special)))
+ {
+ /* FEXPA zeroes the sign bit, however the sign is meaningful to the
+ special case function so needs to be copied.
+ e = sign bit of u << 46. */
+ svuint64_t e = svand_x (pg, svlsl_x (pg, u, 46), 0x8000000000000000);
+ /* Copy sign to scale. */
+ scale = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (scale)));
+ return special_case (pg, scale, y, n, d);
+ }
+
+ /* No special case. */
+ return svmla_x (pg, scale, scale, y);
+}
+
+PL_SIG (SV, D, 1, exp10, -9.9, 9.9)
+PL_TEST_ULP (SV_NAME_D1 (exp10), 0.52)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), 0, 307, 10000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp10), 307, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp10f_1u5.c b/contrib/arm-optimized-routines/pl/math/sv_exp10f_1u5.c
new file mode 100644
index 000000000000..9ecde8f1aa52
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_exp10f_1u5.c
@@ -0,0 +1,87 @@
+/*
+ * Single-precision SVE 2^x function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "include/mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f32.h"
+
+/* For x < -SpecialBound, the result is subnormal and not handled correctly by
+ FEXPA. */
+#define SpecialBound 37.9
+
+static const struct data
+{
+ float poly[5];
+ float shift, log10_2, log2_10_hi, log2_10_lo, special_bound;
+} data = {
+ /* Coefficients generated using Remez algorithm with minimisation of relative
+ error.
+ rel error: 0x1.89dafa3p-24
+ abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
+ maxerr: 0.52 +0.5 ulp. */
+ .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f,
+ 0x1.12b41ap-1f },
+ /* 1.5*2^17 + 127, a shift value suitable for FEXPA. */
+ .shift = 0x1.903f8p17f,
+ .log10_2 = 0x1.a934fp+1,
+ .log2_10_hi = 0x1.344136p-2,
+ .log2_10_lo = -0x1.ec10cp-27,
+ .special_bound = SpecialBound,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (exp10f, x, y, special);
+}
+
+/* Single-precision SVE exp10f routine. Implements the same algorithm
+ as AdvSIMD exp10f.
+ Worst case error is 1.02 ULPs.
+ _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
+ want 0x1.ba5f9cp-1. */
+svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
+ with poly(r) in [1/sqrt(2), sqrt(2)] and
+ x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N]. */
+
+ /* Load some constants in quad-word chunks to minimise memory access (last
+ lane is wasted). */
+ svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2);
+
+ /* n = round(x/(log10(2)/N)). */
+ svfloat32_t shift = sv_f32 (d->shift);
+ svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0);
+ svfloat32_t n = svsub_x (pg, z, shift);
+
+ /* r = x - n*log10(2)/N. */
+ svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1);
+ r = svmls_lane (r, n, log10_2_and_inv, 2);
+
+ svbool_t special = svacgt (pg, x, d->special_bound);
+ svfloat32_t scale = svexpa (svreinterpret_u32 (z));
+
+ /* Polynomial evaluation: poly(r) ~ exp10(r)-1. */
+ svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t poly
+ = svmla_x (pg, svmul_x (pg, r, d->poly[0]),
+ sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1), r2);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svmla_x (pg, scale, scale, poly), special);
+
+ return svmla_x (pg, scale, scale, poly);
+}
+
+PL_SIG (SV, F, 1, exp10, -9.9, 9.9)
+PL_TEST_ULP (SV_NAME_F1 (exp10), 0.52)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), 0, SpecialBound, 50000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp10), SpecialBound, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp2_2u.c b/contrib/arm-optimized-routines/pl/math/sv_exp2_2u.c
new file mode 100644
index 000000000000..dcbca8adddd1
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_exp2_2u.c
@@ -0,0 +1,107 @@
+/*
+ * Double-precision SVE 2^x function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define N (1 << V_EXP_TABLE_BITS)
+
+#define BigBound 1022
+#define UOFlowBound 1280
+
+static const struct data
+{
+ double poly[4];
+ double shift, big_bound, uoflow_bound;
+} data = {
+ /* Coefficients are computed using Remez algorithm with
+ minimisation of the absolute error. */
+ .poly = { 0x1.62e42fefa3686p-1, 0x1.ebfbdff82c241p-3, 0x1.c6b09b16de99ap-5,
+ 0x1.3b2abf5571ad8p-7 },
+ .shift = 0x1.8p52 / N,
+ .uoflow_bound = UOFlowBound,
+ .big_bound = BigBound,
+};
+
+#define SpecialOffset 0x6000000000000000 /* 0x1p513. */
+/* SpecialBias1 + SpecialBias1 = asuint(1.0). */
+#define SpecialBias1 0x7000000000000000 /* 0x1p769. */
+#define SpecialBias2 0x3010000000000000 /* 0x1p-254. */
+
+/* Update of both special and non-special cases, if any special case is
+ detected. */
+static inline svfloat64_t
+special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
+ const struct data *d)
+{
+ /* s=2^n may overflow, break it up into s=s1*s2,
+ such that exp = s + s*y can be computed as s1*(s2+s2*y)
+ and s1*s1 overflows only if n>0. */
+
+ /* If n<=0 then set b to 0x6, 0 otherwise. */
+ svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0. */
+ svuint64_t b = svdup_u64_z (p_sign, SpecialOffset);
+
+ /* Set s1 to generate overflow depending on sign of exponent n. */
+ svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
+ /* Offset s to avoid overflow in final result if n is below threshold. */
+ svfloat64_t s2 = svreinterpret_f64 (
+ svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
+
+ /* |n| > 1280 => 2^(n) overflows. */
+ svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound);
+
+ svfloat64_t r1 = svmul_x (pg, s1, s1);
+ svfloat64_t r2 = svmla_x (pg, s2, s2, y);
+ svfloat64_t r0 = svmul_x (pg, r2, s1);
+
+ return svsel (p_cmp, r1, r0);
+}
+
+/* Fast vector implementation of exp2.
+ Maximum measured error is 1.65 ulp.
+ _ZGVsMxv_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
+ want 0x1.f8db0d4df721dp-1. */
+svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ svbool_t no_big_scale = svacle (pg, x, d->big_bound);
+ svbool_t special = svnot_z (pg, no_big_scale);
+
+ /* Reduce x to k/N + r, where k is integer and r in [-1/2N, 1/2N]. */
+ svfloat64_t shift = sv_f64 (d->shift);
+ svfloat64_t kd = svadd_x (pg, x, shift);
+ svuint64_t ki = svreinterpret_u64 (kd);
+ /* kd = k/N. */
+ kd = svsub_x (pg, kd, shift);
+ svfloat64_t r = svsub_x (pg, x, kd);
+
+ /* scale ~= 2^(k/N). */
+ svuint64_t idx = svand_x (pg, ki, N - 1);
+ svuint64_t sbits = svld1_gather_index (pg, __v_exp_data, idx);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ svuint64_t top = svlsl_x (pg, ki, 52 - V_EXP_TABLE_BITS);
+ svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top));
+
+ /* Approximate exp2(r) using polynomial. */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t p = sv_pairwise_poly_3_f64_x (pg, r, r2, d->poly);
+ svfloat64_t y = svmul_x (pg, r, p);
+
+ /* Assemble exp2(x) = exp2(r) * scale. */
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (pg, scale, y, kd, d);
+ return svmla_x (pg, scale, scale, y);
+}
+
+PL_SIG (SV, D, 1, exp2, -9.9, 9.9)
+PL_TEST_ULP (SV_NAME_D1 (exp2), 1.15)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), 0, BigBound, 1000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), BigBound, UOFlowBound, 100000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp2), UOFlowBound, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp2f_1u6.c b/contrib/arm-optimized-routines/pl/math/sv_exp2f_1u6.c
new file mode 100644
index 000000000000..9698ff6f0682
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_exp2f_1u6.c
@@ -0,0 +1,80 @@
+/*
+ * Single-precision SVE 2^x function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float poly[5];
+ float shift, thres;
+} data = {
+ /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
+ compatibility with polynomial helpers. */
+ .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f,
+ 0x1.59977ap-10f },
+ /* 1.5*2^17 + 127. */
+ .shift = 0x1.903f8p17f,
+ /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
+ correctly by FEXPA. */
+ .thres = 0x1.5d5e2ap+6f,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (exp2f, x, y, special);
+}
+
+/* Single-precision SVE exp2f routine. Implements the same algorithm
+ as AdvSIMD exp2f.
+ Worst case error is 1.04 ULPs.
+ SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0
+ want 0x1.ba7ebp+0. */
+svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = n + r, with r in [-1/2, 1/2]. */
+ svfloat32_t shift = sv_f32 (d->shift);
+ svfloat32_t z = svadd_x (pg, x, shift);
+ svfloat32_t n = svsub_x (pg, z, shift);
+ svfloat32_t r = svsub_x (pg, x, n);
+
+ svbool_t special = svacgt (pg, x, d->thres);
+ svfloat32_t scale = svexpa (svreinterpret_u32 (z));
+
+ /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
+ Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
+ coefficients 1 to 4, and apply most significant coefficient directly. */
+ svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1);
+ svfloat32_t p0 = svmul_x (pg, r, d->poly[0]);
+ svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svmla_x (pg, scale, scale, poly), special);
+
+ return svmla_x (pg, scale, scale, poly);
+}
+
+PL_SIG (SV, F, 1, exp2, -9.9, 9.9)
+PL_TEST_ULP (SV_NAME_F1 (exp2), 0.55)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), 0, Thres, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, 1, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), 1, Thres, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, inf, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, -0x1p-23, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p-23, -1, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, -0x1p23, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p23, -inf, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, ScaleThres, 40000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -1, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, ScaleThres, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_exp_1u5.c b/contrib/arm-optimized-routines/pl/math/sv_exp_1u5.c
new file mode 100644
index 000000000000..c187def9e625
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_exp_1u5.c
@@ -0,0 +1,137 @@
+/*
+ * Double-precision vector e^x function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ double poly[4];
+ double ln2_hi, ln2_lo, inv_ln2, shift, thres;
+} data = {
+ .poly = { /* ulp error: 0.53. */
+ 0x1.fffffffffdbcdp-2, 0x1.555555555444cp-3, 0x1.555573c6a9f7dp-5,
+ 0x1.1111266d28935p-7 },
+ .ln2_hi = 0x1.62e42fefa3800p-1,
+ .ln2_lo = 0x1.ef35793c76730p-45,
+ /* 1/ln2. */
+ .inv_ln2 = 0x1.71547652b82fep+0,
+ /* 1.5*2^46+1023. This value is further explained below. */
+ .shift = 0x1.800000000ffc0p+46,
+ .thres = 704.0,
+};
+
+#define C(i) sv_f64 (d->poly[i])
+#define SpecialOffset 0x6000000000000000 /* 0x1p513. */
+/* SpecialBias1 + SpecialBias1 = asuint(1.0). */
+#define SpecialBias1 0x7000000000000000 /* 0x1p769. */
+#define SpecialBias2 0x3010000000000000 /* 0x1p-254. */
+
+/* Update of both special and non-special cases, if any special case is
+ detected. */
+static inline svfloat64_t
+special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n)
+{
+ /* s=2^n may overflow, break it up into s=s1*s2,
+ such that exp = s + s*y can be computed as s1*(s2+s2*y)
+ and s1*s1 overflows only if n>0. */
+
+ /* If n<=0 then set b to 0x6, 0 otherwise. */
+ svbool_t p_sign = svcmple (pg, n, 0.0); /* n <= 0. */
+ svuint64_t b
+ = svdup_u64_z (p_sign, SpecialOffset); /* Inactive lanes set to 0. */
+
+ /* Set s1 to generate overflow depending on sign of exponent n. */
+ svfloat64_t s1 = svreinterpret_f64 (
+ svsubr_x (pg, b, SpecialBias1)); /* 0x70...0 - b. */
+ /* Offset s to avoid overflow in final result if n is below threshold. */
+ svfloat64_t s2 = svreinterpret_f64 (
+ svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2),
+ b)); /* as_u64 (s) - 0x3010...0 + b. */
+
+ /* |n| > 1280 => 2^(n) overflows. */
+ svbool_t p_cmp = svacgt (pg, n, 1280.0);
+
+ svfloat64_t r1 = svmul_x (pg, s1, s1);
+ svfloat64_t r2 = svmla_x (pg, s2, s2, y);
+ svfloat64_t r0 = svmul_x (pg, r2, s1);
+
+ return svsel (p_cmp, r1, r0);
+}
+
+/* SVE exp algorithm. Maximum measured error is 1.01ulps:
+ SV_NAME_D1 (exp)(0x1.4619d7b04da41p+6) got 0x1.885d9acc41da7p+117
+ want 0x1.885d9acc41da6p+117. */
+svfloat64_t SV_NAME_D1 (exp) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svbool_t special = svacgt (pg, x, d->thres);
+
+ /* Use a modifed version of the shift used for flooring, such that x/ln2 is
+ rounded to a multiple of 2^-6=1/64, shift = 1.5 * 2^52 * 2^-6 = 1.5 *
+ 2^46.
+
+ n is not an integer but can be written as n = m + i/64, with i and m
+ integer, 0 <= i < 64 and m <= n.
+
+ Bits 5:0 of z will be null every time x/ln2 reaches a new integer value
+ (n=m, i=0), and is incremented every time z (or n) is incremented by 1/64.
+ FEXPA expects i in bits 5:0 of the input so it can be used as index into
+ FEXPA hardwired table T[i] = 2^(i/64) for i = 0:63, that will in turn
+ populate the mantissa of the output. Therefore, we use u=asuint(z) as
+ input to FEXPA.
+
+ We add 1023 to the modified shift value in order to set bits 16:6 of u to
+ 1, such that once these bits are moved to the exponent of the output of
+ FEXPA, we get the exponent of 2^n right, i.e. we get 2^m. */
+ svfloat64_t z = svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2);
+ svuint64_t u = svreinterpret_u64 (z);
+ svfloat64_t n = svsub_x (pg, z, d->shift);
+
+ /* r = x - n * ln2, r is in [-ln2/(2N), ln2/(2N)]. */
+ svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+ svfloat64_t r = svmls_lane (x, n, ln2, 0);
+ r = svmls_lane (r, n, ln2, 1);
+
+ /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5. */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t p01 = svmla_x (pg, C (0), C (1), r);
+ svfloat64_t p23 = svmla_x (pg, C (2), C (3), r);
+ svfloat64_t p04 = svmla_x (pg, p01, p23, r2);
+ svfloat64_t y = svmla_x (pg, r, p04, r2);
+
+ /* s = 2^n, computed using FEXPA. FEXPA does not propagate NaNs, so for
+ consistent NaN handling we have to manually propagate them. This comes at
+ significant performance cost. */
+ svfloat64_t s = svexpa (u);
+
+ /* Assemble result as exp(x) = 2^n * exp(r). If |x| > Thresh the
+ multiplication may overflow, so use special case routine. */
+
+ if (unlikely (svptest_any (pg, special)))
+ {
+ /* FEXPA zeroes the sign bit, however the sign is meaningful to the
+ special case function so needs to be copied.
+ e = sign bit of u << 46. */
+ svuint64_t e = svand_x (pg, svlsl_x (pg, u, 46), 0x8000000000000000);
+ /* Copy sign to s. */
+ s = svreinterpret_f64 (svadd_x (pg, e, svreinterpret_u64 (s)));
+ return special_case (pg, s, y, n);
+ }
+
+ /* No special case. */
+ return svmla_x (pg, s, s, y);
+}
+
+PL_SIG (SV, D, 1, exp, -9.9, 9.9)
+PL_TEST_ULP (SV_NAME_D1 (exp), 1.46)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0, 0x1p-23, 40000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p-23, 1, 50000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 1, 0x1p23, 50000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (exp), 0x1p23, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expf_2u.c b/contrib/arm-optimized-routines/pl/math/sv_expf_2u.c
new file mode 100644
index 000000000000..93d705ce420a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_expf_2u.c
@@ -0,0 +1,86 @@
+/*
+ * Single-precision vector e^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float poly[5];
+ float inv_ln2, ln2_hi, ln2_lo, shift, thres;
+} data = {
+ /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
+ compatibility with polynomial helpers. */
+ .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f,
+ 0x1.0e4020p-7f },
+ .inv_ln2 = 0x1.715476p+0f,
+ .ln2_hi = 0x1.62e4p-1f,
+ .ln2_lo = 0x1.7f7d1cp-20f,
+ /* 1.5*2^17 + 127. */
+ .shift = 0x1.903f8p17f,
+ /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
+ correctly by FEXPA. */
+ .thres = 0x1.5d5e2ap+6f,
+};
+
+#define C(i) sv_f32 (d->poly[i])
+#define ExponentBias 0x3f800000
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (expf, x, y, special);
+}
+
+/* Optimised single-precision SVE exp function.
+ Worst-case error is 1.04 ulp:
+ SV_NAME_F1 (exp)(0x1.a8eda4p+1) got 0x1.ba74bcp+4
+ want 0x1.ba74bap+4. */
+svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+
+ /* Load some constants in quad-word chunks to minimise memory access (last
+ lane is wasted). */
+ svfloat32_t invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->inv_ln2);
+
+ /* n = round(x/(ln2/N)). */
+ svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, invln2_and_ln2, 0);
+ svfloat32_t n = svsub_x (pg, z, d->shift);
+
+ /* r = x - n*ln2/N. */
+ svfloat32_t r = svmls_lane (x, n, invln2_and_ln2, 1);
+ r = svmls_lane (r, n, invln2_and_ln2, 2);
+
+ /* scale = 2^(n/N). */
+ svbool_t is_special_case = svacgt (pg, x, d->thres);
+ svfloat32_t scale = svexpa (svreinterpret_u32 (z));
+
+ /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */
+ svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
+ svfloat32_t p34 = svmla_x (pg, C (3), C (4), r);
+ svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
+ svfloat32_t p0 = svmul_x (pg, r, C (0));
+ svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+ if (unlikely (svptest_any (pg, is_special_case)))
+ return special_case (x, svmla_x (pg, scale, scale, poly), is_special_case);
+
+ return svmla_x (pg, scale, scale, poly);
+}
+
+PL_SIG (SV, F, 1, exp, -9.9, 9.9)
+PL_TEST_ULP (SV_NAME_F1 (exp), 0.55)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0, 0x1p-23, 40000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0x1p-23, 1, 50000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 1, 0x1p23, 50000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0x1p23, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expf_inline.h b/contrib/arm-optimized-routines/pl/math/sv_expf_inline.h
new file mode 100644
index 000000000000..0ef4e0fda946
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_expf_inline.h
@@ -0,0 +1,66 @@
+/*
+ * SVE helper for single-precision routines which calculate exp(x) and do
+ * not need special-case handling
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_SV_EXPF_INLINE_H
+#define PL_MATH_SV_EXPF_INLINE_H
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+struct sv_expf_data
+{
+ float poly[5];
+ float inv_ln2, ln2_hi, ln2_lo, shift;
+};
+
+/* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
+ compatibility with polynomial helpers. Shift is 1.5*2^17 + 127. */
+#define SV_EXPF_DATA \
+ { \
+ .poly = { 0x1.ffffecp-1f, 0x1.fffdb6p-2f, 0x1.555e66p-3f, 0x1.573e2ep-5f, \
+ 0x1.0e4020p-7f }, \
+ \
+ .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \
+ .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \
+ }
+
+#define C(i) sv_f32 (d->poly[i])
+
+static inline svfloat32_t
+expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
+{
+ /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+
+ /* Load some constants in quad-word chunks to minimise memory access. */
+ svfloat32_t c4_invln2_and_ln2 = svld1rq (svptrue_b32 (), &d->poly[4]);
+
+ /* n = round(x/(ln2/N)). */
+ svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, c4_invln2_and_ln2, 1);
+ svfloat32_t n = svsub_x (pg, z, d->shift);
+
+ /* r = x - n*ln2/N. */
+ svfloat32_t r = svmls_lane (x, n, c4_invln2_and_ln2, 2);
+ r = svmls_lane (r, n, c4_invln2_and_ln2, 3);
+
+ /* scale = 2^(n/N). */
+ svfloat32_t scale = svexpa (svreinterpret_u32_f32 (z));
+
+ /* y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4 + C3 r^5 + C4 r^6. */
+ svfloat32_t p12 = svmla_x (pg, C (1), C (2), r);
+ svfloat32_t p34 = svmla_lane (C (3), r, c4_invln2_and_ln2, 0);
+ svfloat32_t r2 = svmul_f32_x (pg, r, r);
+ svfloat32_t p14 = svmla_x (pg, p12, p34, r2);
+ svfloat32_t p0 = svmul_f32_x (pg, r, C (0));
+ svfloat32_t poly = svmla_x (pg, p0, r2, p14);
+
+ return svmla_x (pg, scale, scale, poly);
+}
+
+#endif // PL_MATH_SV_EXPF_INLINE_H \ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expm1_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_expm1_2u5.c
new file mode 100644
index 000000000000..82a31f6d9c0e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_expm1_2u5.c
@@ -0,0 +1,95 @@
+/*
+ * Double-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define SpecialBound 0x1.62b7d369a5aa9p+9
+#define ExponentBias 0x3ff0000000000000
+
+static const struct data
+{
+ double poly[11];
+ double shift, inv_ln2, special_bound;
+ /* To be loaded in one quad-word. */
+ double ln2_hi, ln2_lo;
+} data = {
+ /* Generated using fpminimax. */
+ .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
+ 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10, 0x1.a01a01affa35dp-13,
+ 0x1.a01a018b4ecbbp-16, 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
+ 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
+
+ .special_bound = SpecialBound,
+ .inv_ln2 = 0x1.71547652b82fep0,
+ .ln2_hi = 0x1.62e42fefa39efp-1,
+ .ln2_lo = 0x1.abc9e3b39803fp-56,
+ .shift = 0x1.8p52,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t pg)
+{
+ return sv_call_f64 (expm1, x, y, pg);
+}
+
+/* Double-precision vector exp(x) - 1 function.
+ The maximum error observed error is 2.18 ULP:
+ _ZGVsMxv_expm1(0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
+ want 0x1.a8b9ea8d66e2p-2. */
+svfloat64_t SV_NAME_D1 (expm1) (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Large, Nan/Inf. */
+ svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound));
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ svfloat64_t shift = sv_f64 (d->shift);
+ svfloat64_t n = svsub_x (pg, svmla_x (pg, shift, x, d->inv_ln2), shift);
+ svint64_t i = svcvt_s64_x (pg, n);
+ svfloat64_t ln2 = svld1rq (svptrue_b64 (), &d->ln2_hi);
+ svfloat64_t f = svmls_lane (x, n, ln2, 0);
+ f = svmls_lane (f, n, ln2, 1);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ svfloat64_t f2 = svmul_x (pg, f, f);
+ svfloat64_t f4 = svmul_x (pg, f2, f2);
+ svfloat64_t f8 = svmul_x (pg, f4, f4);
+ svfloat64_t p
+ = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
+
+ /* Assemble the result.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^i. */
+ svint64_t u = svadd_x (pg, svlsl_x (pg, i, 52), ExponentBias);
+ svfloat64_t t = svreinterpret_f64 (u);
+
+ /* expm1(x) ~= p * t + (t - 1). */
+ svfloat64_t y = svmla_x (pg, svsub_x (pg, t, 1), p, t);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, y, special);
+
+ return y;
+}
+
+PL_SIG (SV, D, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (SV_NAME_D1 (expm1), 1.68)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0, 0x1p-23, 1000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), 0x1p-23, SpecialBound, 200000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (expm1), SpecialBound, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expm1f_1u6.c b/contrib/arm-optimized-routines/pl/math/sv_expm1f_1u6.c
new file mode 100644
index 000000000000..0ec7c00f5300
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_expm1f_1u6.c
@@ -0,0 +1,93 @@
+/*
+ * Single-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* Largest value of x for which expm1(x) should round to -1. */
+#define SpecialBound 0x1.5ebc4p+6f
+
+static const struct data
+{
+ /* These 4 are grouped together so they can be loaded as one quadword, then
+ used with _lane forms of svmla/svmls. */
+ float c2, c4, ln2_hi, ln2_lo;
+ float c0, c1, c3, inv_ln2, special_bound, shift;
+} data = {
+ /* Generated using fpminimax. */
+ .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3,
+ .c2 = 0x1.555736p-5, .c3 = 0x1.12287cp-7,
+ .c4 = 0x1.6b55a2p-10,
+
+ .special_bound = SpecialBound, .shift = 0x1.8p23f,
+ .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f,
+ .ln2_lo = 0x1.7f7d1cp-20f,
+};
+
+#define C(i) sv_f32 (d->c##i)
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svbool_t pg)
+{
+ return sv_call_f32 (expm1f, x, x, pg);
+}
+
+/* Single-precision SVE exp(x) - 1. Maximum error is 1.52 ULP:
+ _ZGVsMxv_expm1f(0x1.8f4ebcp-2) got 0x1.e859dp-2
+ want 0x1.e859d4p-2. */
+svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Large, NaN/Inf. */
+ svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound));
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, pg);
+
+ /* This vector is reliant on layout of data - it contains constants
+ that can be used with _lane forms of svmla/svmls. Values are:
+ [ coeff_2, coeff_4, ln2_hi, ln2_lo ]. */
+ svfloat32_t lane_constants = svld1rq (svptrue_b32 (), &d->c2);
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2);
+ j = svsub_x (pg, j, d->shift);
+ svint32_t i = svcvt_s32_x (pg, j);
+
+ svfloat32_t f = svmls_lane (x, j, lane_constants, 2);
+ f = svmls_lane (f, j, lane_constants, 3);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0);
+ svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1);
+ svfloat32_t f2 = svmul_x (pg, f, f);
+ svfloat32_t p = svmla_x (pg, p12, f2, p34);
+ p = svmla_x (pg, C (0), f, p);
+ p = svmla_x (pg, f, f2, p);
+
+ /* Assemble the result.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^i. */
+ svfloat32_t t = svreinterpret_f32 (
+ svadd_x (pg, svreinterpret_u32 (svlsl_x (pg, i, 23)), 0x3f800000));
+ return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+}
+
+PL_SIG (SV, F, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (SV_NAME_F1 (expm1), 1.02)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), 0, SpecialBound, 100000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (expm1), SpecialBound, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_expm1f_inline.h b/contrib/arm-optimized-routines/pl/math/sv_expm1f_inline.h
new file mode 100644
index 000000000000..a6e2050ff4a6
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_expm1f_inline.h
@@ -0,0 +1,73 @@
+/*
+ * SVE helper for single-precision routines which calculate exp(x) - 1 and do
+ * not need special-case handling
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_SV_EXPM1F_INLINE_H
+#define PL_MATH_SV_EXPM1F_INLINE_H
+
+#include "sv_math.h"
+
+struct sv_expm1f_data
+{
+ /* These 4 are grouped together so they can be loaded as one quadword, then
+ used with _lane forms of svmla/svmls. */
+ float32_t c2, c4, ln2_hi, ln2_lo;
+ float32_t c0, c1, c3, inv_ln2, shift;
+};
+
+/* Coefficients generated using fpminimax. */
+#define SV_EXPM1F_DATA \
+ { \
+ .c0 = 0x1.fffffep-2, .c1 = 0x1.5554aep-3, .c2 = 0x1.555736p-5, \
+ .c3 = 0x1.12287cp-7, .c4 = 0x1.6b55a2p-10, \
+ \
+ .shift = 0x1.8p23f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \
+ .ln2_lo = 0x1.7f7d1cp-20f, \
+ }
+
+#define C(i) sv_f32 (d->c##i)
+
+static inline svfloat32_t
+expm1f_inline (svfloat32_t x, svbool_t pg, const struct sv_expm1f_data *d)
+{
+ /* This vector is reliant on layout of data - it contains constants
+ that can be used with _lane forms of svmla/svmls. Values are:
+ [ coeff_2, coeff_4, ln2_hi, ln2_lo ]. */
+ svfloat32_t lane_constants = svld1rq (svptrue_b32 (), &d->c2);
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ svfloat32_t j = svmla_x (pg, sv_f32 (d->shift), x, d->inv_ln2);
+ j = svsub_x (pg, j, d->shift);
+ svint32_t i = svcvt_s32_x (pg, j);
+
+ svfloat32_t f = svmls_lane (x, j, lane_constants, 2);
+ f = svmls_lane (f, j, lane_constants, 3);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ svfloat32_t p12 = svmla_lane (C (1), f, lane_constants, 0);
+ svfloat32_t p34 = svmla_lane (C (3), f, lane_constants, 1);
+ svfloat32_t f2 = svmul_x (pg, f, f);
+ svfloat32_t p = svmla_x (pg, p12, f2, p34);
+ p = svmla_x (pg, C (0), f, p);
+ p = svmla_x (pg, f, f2, p);
+
+ /* Assemble the result.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^i. */
+ svfloat32_t t = svscale_x (pg, sv_f32 (1), i);
+ return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+}
+
+#endif // PL_MATH_SV_EXPM1F_INLINE_H \ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/sv_hypot_1u5.c b/contrib/arm-optimized-routines/pl/math/sv_hypot_1u5.c
new file mode 100644
index 000000000000..cf1590e4b9ab
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_hypot_1u5.c
@@ -0,0 +1,51 @@
+/*
+ * Double-precision SVE hypot(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ uint64_t tiny_bound, thres;
+} data = {
+ .tiny_bound = 0x0c80000000000000, /* asuint (0x1p-102). */
+ .thres = 0x7300000000000000, /* asuint (inf) - tiny_bound. */
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg,
+ svbool_t special)
+{
+ return sv_call2_f64 (hypot, x, y, svsqrt_x (pg, sqsum), special);
+}
+
+/* SVE implementation of double-precision hypot.
+ Maximum error observed is 1.21 ULP:
+ _ZGVsMxvv_hypot (-0x1.6a22d0412cdd3p+352, 0x1.d3d89bd66fb1ap+330)
+ got 0x1.6a22d0412cfp+352
+ want 0x1.6a22d0412cf01p+352. */
+svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat64_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
+
+ svbool_t special = svcmpge (
+ pg, svsub_x (pg, svreinterpret_u64 (sqsum), d->tiny_bound), d->thres);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (sqsum, x, y, pg, special);
+ return svsqrt_x (pg, sqsum);
+}
+
+PL_SIG (SV, D, 2, hypot, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_D2 (hypot), 0.71)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_hypotf_1u5.c b/contrib/arm-optimized-routines/pl/math/sv_hypotf_1u5.c
new file mode 100644
index 000000000000..f428832b3dbc
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_hypotf_1u5.c
@@ -0,0 +1,45 @@
+/*
+ * Single-precision SVE hypot(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define TinyBound 0x0c800000 /* asuint (0x1p-102). */
+#define Thres 0x73000000 /* 0x70000000 - TinyBound. */
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg,
+ svbool_t special)
+{
+ return sv_call2_f32 (hypotf, x, y, svsqrt_x (pg, sqsum), special);
+}
+
+/* SVE implementation of single-precision hypot.
+ Maximum error observed is 1.21 ULP:
+ _ZGVsMxvv_hypotf (0x1.6a213cp-19, -0x1.32b982p-26) got 0x1.6a2346p-19
+ want 0x1.6a2344p-19. */
+svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
+ const svbool_t pg)
+{
+ svfloat32_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
+
+ svbool_t special = svcmpge (
+ pg, svsub_x (pg, svreinterpret_u32 (sqsum), TinyBound), Thres);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (sqsum, x, y, pg, special);
+
+ return svsqrt_x (pg, sqsum);
+}
+
+PL_SIG (SV, F, 2, hypot, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_F2 (hypot), 0.71)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log10_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_log10_2u5.c
new file mode 100644
index 000000000000..f55e068fd442
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_log10_2u5.c
@@ -0,0 +1,75 @@
+/*
+ * Double-precision SVE log10(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f64.h"
+
+#define Min 0x0010000000000000
+#define Max 0x7ff0000000000000
+#define Thres 0x7fe0000000000000 /* Max - Min. */
+#define Off 0x3fe6900900000000
+#define N (1 << V_LOG10_TABLE_BITS)
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (log10, x, y, special);
+}
+
+/* SVE log10 algorithm.
+ Maximum measured error is 2.46 ulps.
+ SV_NAME_D1 (log10)(0x1.131956cd4b627p+0) got 0x1.fffbdf6eaa669p-6
+ want 0x1.fffbdf6eaa667p-6. */
+svfloat64_t SV_NAME_D1 (log10) (svfloat64_t x, const svbool_t pg)
+{
+ svuint64_t ix = svreinterpret_u64 (x);
+ svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
+
+ /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ svuint64_t tmp = svsub_x (pg, ix, Off);
+ svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG10_TABLE_BITS);
+ i = svand_x (pg, i, (N - 1) << 1);
+ svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
+ svfloat64_t z = svreinterpret_f64 (
+ svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
+
+ /* log(x) = k*log(2) + log(c) + log(z/c). */
+ svfloat64_t invc = svld1_gather_index (pg, &__v_log10_data.table[0].invc, i);
+ svfloat64_t logc
+ = svld1_gather_index (pg, &__v_log10_data.table[0].log10c, i);
+
+ /* We approximate log(z/c) with a polynomial P(x) ~= log(x + 1):
+ r = z/c - 1 (we look up precomputed 1/c)
+ log(z/c) ~= P(r). */
+ svfloat64_t r = svmad_x (pg, invc, z, -1.0);
+
+ /* hi = log(c) + k*log(2). */
+ svfloat64_t w = svmla_x (pg, logc, r, __v_log10_data.invln10);
+ svfloat64_t hi = svmla_x (pg, w, k, __v_log10_data.log10_2);
+
+ /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log10_data.poly);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y),
+ special);
+ return svmla_x (pg, hi, r2, y);
+}
+
+PL_SIG (SV, D, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (SV_NAME_D1 (log10), 1.97)
+PL_TEST_INTERVAL (SV_NAME_D1 (log10), -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log10), 1.0, 100, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log10), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log10f_3u5.c b/contrib/arm-optimized-routines/pl/math/sv_log10f_3u5.c
new file mode 100644
index 000000000000..a685b23e5de5
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_log10f_3u5.c
@@ -0,0 +1,93 @@
+/*
+ * Single-precision SVE log10 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float poly_0246[4];
+ float poly_1357[4];
+ float ln2, inv_ln10;
+} data = {
+ .poly_1357 = {
+ /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
+ 1, 3, 5 and 7 can be loaded as a single quad-word, hence used with _lane
+ variant of MLA intrinsic. */
+ 0x1.2879c8p-3f, 0x1.6408f8p-4f, 0x1.f0e514p-5f, 0x1.f5f76ap-5f
+ },
+ .poly_0246 = { -0x1.bcb79cp-3f, -0x1.bcd472p-4f, -0x1.246f8p-4f,
+ -0x1.0fc92cp-4f },
+ .ln2 = 0x1.62e43p-1f,
+ .inv_ln10 = 0x1.bcb7b2p-2f,
+};
+
+#define Min 0x00800000
+#define Max 0x7f800000
+#define Thres 0x7f000000 /* Max - Min. */
+#define Offset 0x3f2aaaab /* 0.666667. */
+#define MantissaMask 0x007fffff
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (log10f, x, y, special);
+}
+
+/* Optimised implementation of SVE log10f using the same algorithm and
+ polynomial as AdvSIMD log10f.
+ Maximum error is 3.31ulps:
+ SV_NAME_F1 (log10)(0x1.555c16p+0) got 0x1.ffe2fap-4
+ want 0x1.ffe2f4p-4. */
+svfloat32_t SV_NAME_F1 (log10) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ svuint32_t ix = svreinterpret_u32 (x);
+ svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thres);
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ ix = svsub_x (pg, ix, Offset);
+ svfloat32_t n = svcvt_f32_x (
+ pg, svasr_x (pg, svreinterpret_s32 (ix), 23)); /* signextend. */
+ ix = svand_x (pg, ix, MantissaMask);
+ ix = svadd_x (pg, ix, Offset);
+ svfloat32_t r = svsub_x (pg, svreinterpret_f32 (ix), 1.0f);
+
+ /* y = log10(1+r) + n*log10(2)
+ log10(1+r) ~ r * InvLn(10) + P(r)
+ where P(r) is a polynomial. Use order 9 for log10(1+x), i.e. order 8 for
+ log10(1+x)/x, with x in [-1/3, 1/3] (offset=2/3). */
+ svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t r4 = svmul_x (pg, r2, r2);
+ svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
+ svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_0246[0]), r, p_1357, 0);
+ svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_0246[1]), r, p_1357, 1);
+ svfloat32_t q_45 = svmla_lane (sv_f32 (d->poly_0246[2]), r, p_1357, 2);
+ svfloat32_t q_67 = svmla_lane (sv_f32 (d->poly_0246[3]), r, p_1357, 3);
+ svfloat32_t q_47 = svmla_x (pg, q_45, r2, q_67);
+ svfloat32_t q_03 = svmla_x (pg, q_01, r2, q_23);
+ svfloat32_t y = svmla_x (pg, q_03, r4, q_47);
+
+ /* Using hi = Log10(2)*n + r*InvLn(10) is faster but less accurate. */
+ svfloat32_t hi = svmla_x (pg, r, n, d->ln2);
+ hi = svmul_x (pg, hi, d->inv_ln10);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svmla_x (svnot_z (pg, special), hi, r2, y),
+ special);
+ return svmla_x (pg, hi, r2, y);
+}
+
+PL_SIG (SV, F, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (SV_NAME_F1 (log10), 2.82)
+PL_TEST_INTERVAL (SV_NAME_F1 (log10), -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log10), 1.0, 100, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log10), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log1p_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_log1p_2u5.c
new file mode 100644
index 000000000000..f178ab16238a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_log1p_2u5.c
@@ -0,0 +1,116 @@
+/*
+ * Double-precision SVE log(1+x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ double poly[19];
+ double ln2_hi, ln2_lo;
+ uint64_t hfrt2_top, onemhfrt2_top, inf, mone;
+} data = {
+ /* Generated using Remez in [ sqrt(2)/2 - 1, sqrt(2) - 1]. Order 20
+ polynomial, however first 2 coefficients are 0 and 1 so are not stored. */
+ .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
+ 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
+ -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
+ 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
+ -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
+ 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
+ -0x1.cfa7385bdb37ep-6, },
+ .ln2_hi = 0x1.62e42fefa3800p-1,
+ .ln2_lo = 0x1.ef35793c76730p-45,
+ /* top32(asuint64(sqrt(2)/2)) << 32. */
+ .hfrt2_top = 0x3fe6a09e00000000,
+ /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32. */
+ .onemhfrt2_top = 0x00095f6200000000,
+ .inf = 0x7ff0000000000000,
+ .mone = 0xbff0000000000000,
+};
+
+#define AbsMask 0x7fffffffffffffff
+#define BottomMask 0xffffffff
+
+static svfloat64_t NOINLINE
+special_case (svbool_t special, svfloat64_t x, svfloat64_t y)
+{
+ return sv_call_f64 (log1p, x, y, special);
+}
+
+/* Vector approximation for log1p using polynomial on reduced interval. Maximum
+ observed error is 2.46 ULP:
+ _ZGVsMxv_log1p(0x1.654a1307242a4p+11) got 0x1.fd5565fb590f4p+2
+ want 0x1.fd5565fb590f6p+2. */
+svfloat64_t SV_NAME_D1 (log1p) (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ svuint64_t ix = svreinterpret_u64 (x);
+ svuint64_t ax = svand_x (pg, ix, AbsMask);
+ svbool_t special
+ = svorr_z (pg, svcmpge (pg, ax, d->inf), svcmpge (pg, ix, d->mone));
+
+ /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
+ is in [sqrt(2)/2, sqrt(2)]):
+ log1p(x) = k*log(2) + log1p(f).
+
+ f may not be representable exactly, so we need a correction term:
+ let m = round(1 + x), c = (1 + x) - m.
+ c << m: at very small x, log1p(x) ~ x, hence:
+ log(1+x) - log(m) ~ c/m.
+
+ We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */
+
+ /* Obtain correctly scaled k by manipulation in the exponent.
+ The scalar algorithm casts down to 32-bit at this point to calculate k and
+ u_red. We stay in double-width to obtain f and k, using the same constants
+ as the scalar algorithm but shifted left by 32. */
+ svfloat64_t m = svadd_x (pg, x, 1);
+ svuint64_t mi = svreinterpret_u64 (m);
+ svuint64_t u = svadd_x (pg, mi, d->onemhfrt2_top);
+
+ svint64_t ki = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, u, 52)), 0x3ff);
+ svfloat64_t k = svcvt_f64_x (pg, ki);
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ svuint64_t utop
+ = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hfrt2_top);
+ svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, BottomMask));
+ svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1);
+
+ /* Correction term c/m. */
+ svfloat64_t cm = svdiv_x (pg, svsub_x (pg, x, svsub_x (pg, m, 1)), m);
+
+ /* Approximate log1p(x) on the reduced input using a polynomial. Because
+ log1p(0)=0 we choose an approximation of the form:
+ x + C0*x^2 + C1*x^3 + C2x^4 + ...
+ Hence approximation has the form f + f^2 * P(f)
+ where P(x) = C0 + C1*x + C2x^2 + ...
+ Assembling this all correctly is dealt with at the final step. */
+ svfloat64_t f2 = svmul_x (pg, f, f), f4 = svmul_x (pg, f2, f2),
+ f8 = svmul_x (pg, f4, f4), f16 = svmul_x (pg, f8, f8);
+ svfloat64_t p = sv_estrin_18_f64_x (pg, f, f2, f4, f8, f16, d->poly);
+
+ svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2_lo);
+ svfloat64_t yhi = svmla_x (pg, f, k, d->ln2_hi);
+ svfloat64_t y = svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (special, x, y);
+
+ return y;
+}
+
+PL_SIG (SV, D, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (SV_NAME_D1 (log1p), 1.97)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (log1p), 0.001, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log1p), 1, inf, 10000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log1p), -1, -inf, 10)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log1p_inline.h b/contrib/arm-optimized-routines/pl/math/sv_log1p_inline.h
new file mode 100644
index 000000000000..983f8e1b0413
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_log1p_inline.h
@@ -0,0 +1,96 @@
+/*
+ * Helper for SVE double-precision routines which calculate log(1 + x) and do
+ * not need special-case handling
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#ifndef PL_MATH_SV_LOG1P_INLINE_H
+#define PL_MATH_SV_LOG1P_INLINE_H
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+
+static const struct sv_log1p_data
+{
+ double poly[19], ln2[2];
+ uint64_t hf_rt2_top;
+ uint64_t one_m_hf_rt2_top;
+ uint32_t bottom_mask;
+ int64_t one_top;
+} sv_log1p_data = {
+ /* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1].
+ */
+ .poly = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
+ 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
+ -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
+ 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
+ -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
+ 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
+ -0x1.cfa7385bdb37ep-6 },
+ .ln2 = { 0x1.62e42fefa3800p-1, 0x1.ef35793c76730p-45 },
+ .hf_rt2_top = 0x3fe6a09e00000000,
+ .one_m_hf_rt2_top = 0x00095f6200000000,
+ .bottom_mask = 0xffffffff,
+ .one_top = 0x3ff
+};
+
+static inline svfloat64_t
+sv_log1p_inline (svfloat64_t x, const svbool_t pg)
+{
+ /* Helper for calculating log(x + 1). Adapted from v_log1p_inline.h, which
+ differs from v_log1p_2u5.c by:
+ - No special-case handling - this should be dealt with by the caller.
+ - Pairwise Horner polynomial evaluation for improved accuracy.
+ - Optionally simulate the shortcut for k=0, used in the scalar routine,
+ using svsel, for improved accuracy when the argument to log1p is close
+ to 0. This feature is enabled by defining WANT_SV_LOG1P_K0_SHORTCUT as 1
+ in the source of the caller before including this file.
+ See sv_log1p_2u1.c for details of the algorithm. */
+ const struct sv_log1p_data *d = ptr_barrier (&sv_log1p_data);
+ svfloat64_t m = svadd_x (pg, x, 1);
+ svuint64_t mi = svreinterpret_u64 (m);
+ svuint64_t u = svadd_x (pg, mi, d->one_m_hf_rt2_top);
+
+ svint64_t ki
+ = svsub_x (pg, svreinterpret_s64 (svlsr_x (pg, u, 52)), d->one_top);
+ svfloat64_t k = svcvt_f64_x (pg, ki);
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ svuint64_t utop
+ = svadd_x (pg, svand_x (pg, u, 0x000fffff00000000), d->hf_rt2_top);
+ svuint64_t u_red = svorr_x (pg, utop, svand_x (pg, mi, d->bottom_mask));
+ svfloat64_t f = svsub_x (pg, svreinterpret_f64 (u_red), 1);
+
+ /* Correction term c/m. */
+ svfloat64_t c = svsub_x (pg, x, svsub_x (pg, m, 1));
+ svfloat64_t cm;
+
+#ifndef WANT_SV_LOG1P_K0_SHORTCUT
+#error \
+ "Cannot use sv_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+#elif WANT_SV_LOG1P_K0_SHORTCUT
+ /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
+ that the approximation is solely the polynomial. */
+ svbool_t knot0 = svcmpne (pg, k, 0);
+ cm = svdiv_z (knot0, c, m);
+ if (likely (!svptest_any (pg, knot0)))
+ {
+ f = svsel (knot0, f, x);
+ }
+#else
+ /* No shortcut. */
+ cm = svdiv_x (pg, c, m);
+#endif
+
+ /* Approximate log1p(f) on the reduced input using a polynomial. */
+ svfloat64_t f2 = svmul_x (pg, f, f);
+ svfloat64_t p = sv_pw_horner_18_f64_x (pg, f, f2, d->poly);
+
+ /* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */
+ svfloat64_t ylo = svmla_x (pg, cm, k, d->ln2[0]);
+ svfloat64_t yhi = svmla_x (pg, f, k, d->ln2[1]);
+
+ return svmla_x (pg, svadd_x (pg, ylo, yhi), f2, p);
+}
+#endif // PL_MATH_SV_LOG1P_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log1pf_1u3.c b/contrib/arm-optimized-routines/pl/math/sv_log1pf_1u3.c
new file mode 100644
index 000000000000..ea1a3dbf723a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_log1pf_1u3.c
@@ -0,0 +1,97 @@
+/*
+ * Single-precision vector log(x + 1) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f32.h"
+
+static const struct data
+{
+ float poly[8];
+ float ln2, exp_bias;
+ uint32_t four, three_quarters;
+} data = {.poly = {/* Do not store first term of polynomial, which is -0.5, as
+ this can be fmov-ed directly instead of including it in
+ the main load-and-mla polynomial schedule. */
+ 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
+ -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f,
+ 0x1.abcb6p-4f, -0x1.6f0d5ep-5f},
+ .ln2 = 0x1.62e43p-1f,
+ .exp_bias = 0x1p-23f,
+ .four = 0x40800000,
+ .three_quarters = 0x3f400000};
+
+#define SignExponentMask 0xff800000
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (log1pf, x, y, special);
+}
+
+/* Vector log1pf approximation using polynomial on reduced interval. Worst-case
+ error is 1.27 ULP very close to 0.5.
+ _ZGVsMxv_log1pf(0x1.fffffep-2) got 0x1.9f324p-2
+ want 0x1.9f323ep-2. */
+svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ /* x < -1, Inf/Nan. */
+ svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
+ special = svorn_z (pg, special, svcmpge (pg, x, -1));
+
+ /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+ is in [-0.25, 0.5]):
+ log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+ We approximate log1p(m) with a polynomial, then scale by
+ k*log(2). Instead of doing this directly, we use an intermediate
+ scale factor s = 4*k*log(2) to ensure the scale is representable
+ as a normalised fp32 number. */
+ svfloat32_t m = svadd_x (pg, x, 1);
+
+ /* Choose k to scale x to the range [-1/4, 1/2]. */
+ svint32_t k
+ = svand_x (pg, svsub_x (pg, svreinterpret_s32 (m), d->three_quarters),
+ sv_s32 (SignExponentMask));
+
+ /* Scale x by exponent manipulation. */
+ svfloat32_t m_scale = svreinterpret_f32 (
+ svsub_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (k)));
+
+ /* Scale up to ensure that the scale factor is representable as normalised
+ fp32 number, and scale m down accordingly. */
+ svfloat32_t s = svreinterpret_f32 (svsubr_x (pg, k, d->four));
+ m_scale = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1), s, 0.25));
+
+ /* Evaluate polynomial on reduced interval. */
+ svfloat32_t ms2 = svmul_x (pg, m_scale, m_scale),
+ ms4 = svmul_x (pg, ms2, ms2);
+ svfloat32_t p = sv_estrin_7_f32_x (pg, m_scale, ms2, ms4, d->poly);
+ p = svmad_x (pg, m_scale, p, -0.5);
+ p = svmla_x (pg, m_scale, m_scale, svmul_x (pg, m_scale, p));
+
+ /* The scale factor to be applied back at the end - by multiplying float(k)
+ by 2^-23 we get the unbiased exponent of k. */
+ svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->exp_bias);
+
+ /* Apply the scaling back. */
+ svfloat32_t y = svmla_x (pg, p, scale_back, d->ln2);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, y, special);
+
+ return y;
+}
+
+PL_SIG (SV, F, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (SV_NAME_F1 (log1p), 0.77)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0, 0x1p-23, 5000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0x1p-23, 1, 5000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log1p), 1, inf, 10000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log1p), -1, -inf, 10)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log1pf_inline.h b/contrib/arm-optimized-routines/pl/math/sv_log1pf_inline.h
new file mode 100644
index 000000000000..d13b094f6b5d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_log1pf_inline.h
@@ -0,0 +1,65 @@
+/*
+ * Helper for SVE routines which calculate log(1 + x) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_SV_LOG1PF_INLINE_H
+#define PL_MATH_SV_LOG1PF_INLINE_H
+
+#include "v_math.h"
+#include "math_config.h"
+#include "poly_sve_f32.h"
+
+static const struct sv_log1pf_data
+{
+ float32_t poly[9];
+ float32_t ln2;
+ float32_t scale_back;
+} sv_log1pf_data = {
+ /* Polynomial generated using FPMinimax in [-0.25, 0.5]. */
+ .poly = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
+ -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 0x1.abcb6p-4f,
+ -0x1.6f0d5ep-5f },
+ .scale_back = 0x1.0p-23f,
+ .ln2 = 0x1.62e43p-1f,
+};
+
+static inline svfloat32_t
+eval_poly (svfloat32_t m, const float32_t *c, svbool_t pg)
+{
+ svfloat32_t p_12 = svmla_x (pg, sv_f32 (c[0]), m, sv_f32 (c[1]));
+ svfloat32_t m2 = svmul_x (pg, m, m);
+ svfloat32_t q = svmla_x (pg, m, m2, p_12);
+ svfloat32_t p = sv_pw_horner_6_f32_x (pg, m, m2, c + 2);
+ p = svmul_x (pg, m2, p);
+
+ return svmla_x (pg, q, m2, p);
+}
+
+static inline svfloat32_t
+sv_log1pf_inline (svfloat32_t x, svbool_t pg)
+{
+ const struct sv_log1pf_data *d = ptr_barrier (&sv_log1pf_data);
+
+ svfloat32_t m = svadd_x (pg, x, 1.0f);
+
+ svint32_t ks = svsub_x (pg, svreinterpret_s32 (m),
+ svreinterpret_s32 (svdup_f32 (0.75f)));
+ ks = svand_x (pg, ks, 0xff800000);
+ svuint32_t k = svreinterpret_u32 (ks);
+ svfloat32_t s = svreinterpret_f32 (
+ svsub_x (pg, svreinterpret_u32 (svdup_f32 (4.0f)), k));
+
+ svfloat32_t m_scale
+ = svreinterpret_f32 (svsub_x (pg, svreinterpret_u32 (x), k));
+ m_scale
+ = svadd_x (pg, m_scale, svmla_x (pg, sv_f32 (-1.0f), sv_f32 (0.25f), s));
+ svfloat32_t p = eval_poly (m_scale, d->poly, pg);
+ svfloat32_t scale_back = svmul_x (pg, svcvt_f32_x (pg, k), d->scale_back);
+ return svmla_x (pg, p, scale_back, d->ln2);
+}
+
+#endif // PL_MATH_SV_LOG1PF_INLINE_H \ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log2_3u.c b/contrib/arm-optimized-routines/pl/math/sv_log2_3u.c
new file mode 100644
index 000000000000..0775a39cc85d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_log2_3u.c
@@ -0,0 +1,73 @@
+/*
+ * Double-precision SVE log2 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f64.h"
+
+#define N (1 << V_LOG2_TABLE_BITS)
+#define Off 0x3fe6900900000000
+#define Max (0x7ff0000000000000)
+#define Min (0x0010000000000000)
+#define Thresh (0x7fe0000000000000) /* Max - Min. */
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
+{
+ return sv_call_f64 (log2, x, y, cmp);
+}
+
+/* Double-precision SVE log2 routine.
+ Implements the same algorithm as AdvSIMD log10, with coefficients and table
+ entries scaled in extended precision.
+ The maximum observed error is 2.58 ULP:
+ SV_NAME_D1 (log2)(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
+ want 0x1.fffb34198d9ddp-5. */
+svfloat64_t SV_NAME_D1 (log2) (svfloat64_t x, const svbool_t pg)
+{
+ svuint64_t ix = svreinterpret_u64 (x);
+ svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh);
+
+ /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ svuint64_t tmp = svsub_x (pg, ix, Off);
+ svuint64_t i = svlsr_x (pg, tmp, 51 - V_LOG2_TABLE_BITS);
+ i = svand_x (pg, i, (N - 1) << 1);
+ svfloat64_t k = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52));
+ svfloat64_t z = svreinterpret_f64 (
+ svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)));
+
+ svfloat64_t invc = svld1_gather_index (pg, &__v_log2_data.table[0].invc, i);
+ svfloat64_t log2c
+ = svld1_gather_index (pg, &__v_log2_data.table[0].log2c, i);
+
+ /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */
+
+ svfloat64_t r = svmad_x (pg, invc, z, -1.0);
+ svfloat64_t w = svmla_x (pg, log2c, r, __v_log2_data.invln2);
+
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t y = sv_pw_horner_4_f64_x (pg, r, r2, __v_log2_data.poly);
+ w = svadd_x (pg, k, w);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svmla_x (svnot_z (pg, special), w, r2, y),
+ special);
+ return svmla_x (pg, w, r2, y);
+}
+
+PL_SIG (SV, D, 1, log2, 0.01, 11.1)
+PL_TEST_ULP (SV_NAME_D1 (log2), 2.09)
+PL_TEST_EXPECT_FENV_ALWAYS (SV_NAME_D1 (log2))
+PL_TEST_INTERVAL (SV_NAME_D1 (log2), -0.0, -0x1p126, 1000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0.0, 0x1p-126, 4000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log2), 1.0, 100, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log2), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log2f_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_log2f_2u5.c
new file mode 100644
index 000000000000..9e96c62bbcc6
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_log2f_2u5.c
@@ -0,0 +1,86 @@
+/*
+ * Single-precision vector/SVE log2 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float poly_02468[5];
+ float poly_1357[4];
+} data = {
+ .poly_1357 = {
+ /* Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs
+ 1, 3, 5 and 7 can be loaded as a single quad-word, hence used with _lane
+ variant of MLA intrinsic. */
+ -0x1.715458p-1f, -0x1.7171a4p-2f, -0x1.e5143ep-3f, -0x1.c675bp-3f
+ },
+ .poly_02468 = { 0x1.715476p0f, 0x1.ec701cp-2f, 0x1.27a0b8p-2f,
+ 0x1.9d8ecap-3f, 0x1.9e495p-3f },
+};
+
+#define Min (0x00800000)
+#define Max (0x7f800000)
+#define Thres (0x7f000000) /* Max - Min. */
+#define MantissaMask (0x007fffff)
+#define Off (0x3f2aaaab) /* 0.666667. */
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (log2f, x, y, cmp);
+}
+
+/* Optimised implementation of SVE log2f, using the same algorithm
+ and polynomial as AdvSIMD log2f.
+ Maximum error is 2.48 ULPs:
+ SV_NAME_F1 (log2)(0x1.558174p+0) got 0x1.a9be84p-2
+ want 0x1.a9be8p-2. */
+svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint32_t u = svreinterpret_u32 (x);
+ svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres);
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ u = svsub_x (pg, u, Off);
+ svfloat32_t n = svcvt_f32_x (
+ pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */
+ u = svand_x (pg, u, MantissaMask);
+ u = svadd_x (pg, u, Off);
+ svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
+
+ /* y = log2(1+r) + n. */
+ svfloat32_t r2 = svmul_x (pg, r, r);
+
+ /* Evaluate polynomial using pairwise Horner scheme. */
+ svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[0]);
+ svfloat32_t q_01 = svmla_lane (sv_f32 (d->poly_02468[0]), r, p_1357, 0);
+ svfloat32_t q_23 = svmla_lane (sv_f32 (d->poly_02468[1]), r, p_1357, 1);
+ svfloat32_t q_45 = svmla_lane (sv_f32 (d->poly_02468[2]), r, p_1357, 2);
+ svfloat32_t q_67 = svmla_lane (sv_f32 (d->poly_02468[3]), r, p_1357, 3);
+ svfloat32_t y = svmla_x (pg, q_67, r2, sv_f32 (d->poly_02468[4]));
+ y = svmla_x (pg, q_45, r2, y);
+ y = svmla_x (pg, q_23, r2, y);
+ y = svmla_x (pg, q_01, r2, y);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svmla_x (svnot_z (pg, special), n, r, y), special);
+ return svmla_x (pg, n, r, y);
+}
+
+PL_SIG (SV, F, 1, log2, 0.01, 11.1)
+PL_TEST_ULP (SV_NAME_F1 (log2), 1.99)
+PL_TEST_EXPECT_FENV_ALWAYS (SV_NAME_F1 (log2))
+PL_TEST_INTERVAL (SV_NAME_F1 (log2), -0.0, -0x1p126, 4000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0.0, 0x1p-126, 4000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log2), 1.0, 100, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log2), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_log_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_log_2u5.c
new file mode 100644
index 000000000000..2530c9e3f62c
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_log_2u5.c
@@ -0,0 +1,76 @@
+/*
+ * Double-precision SVE log(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define P(i) sv_f64 (__v_log_data.poly[i])
+#define N (1 << V_LOG_TABLE_BITS)
+#define Off (0x3fe6900900000000)
+#define MaxTop (0x7ff)
+#define MinTop (0x001)
+#define ThreshTop (0x7fe) /* MaxTop - MinTop. */
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
+{
+ return sv_call_f64 (log, x, y, cmp);
+}
+
+/* SVE port of AdvSIMD log algorithm.
+ Maximum measured error is 2.17 ulp:
+ SV_NAME_D1 (log)(0x1.a6129884398a3p+0) got 0x1.ffffff1cca043p-2
+ want 0x1.ffffff1cca045p-2. */
+svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg)
+{
+ svuint64_t ix = svreinterpret_u64 (x);
+ svuint64_t top = svlsr_x (pg, ix, 52);
+ svbool_t cmp = svcmpge (pg, svsub_x (pg, top, MinTop), sv_u64 (ThreshTop));
+
+ /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ svuint64_t tmp = svsub_x (pg, ix, Off);
+ /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N.
+ The actual value of i is double this due to table layout. */
+ svuint64_t i
+ = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1);
+ svint64_t k
+ = svasr_x (pg, svreinterpret_s64 (tmp), 52); /* Arithmetic shift. */
+ svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52));
+ svfloat64_t z = svreinterpret_f64 (iz);
+ /* Lookup in 2 global lists (length N). */
+ svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i);
+ svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ svfloat64_t r = svmad_x (pg, invc, z, -1);
+ svfloat64_t kd = svcvt_f64_x (pg, k);
+ /* hi = r + log(c) + k*Ln2. */
+ svfloat64_t hi = svmla_x (pg, svadd_x (pg, logc, r), kd, __v_log_data.ln2);
+ /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t y = svmla_x (pg, P (2), r, P (3));
+ svfloat64_t p = svmla_x (pg, P (0), r, P (1));
+ y = svmla_x (pg, y, r2, P (4));
+ y = svmla_x (pg, p, r2, y);
+
+ if (unlikely (svptest_any (pg, cmp)))
+ return special_case (x, svmla_x (svnot_z (pg, cmp), hi, r2, y), cmp);
+ return svmla_x (pg, hi, r2, y);
+}
+
+PL_SIG (SV, D, 1, log, 0.01, 11.1)
+PL_TEST_ULP (SV_NAME_D1 (log), 1.68)
+PL_TEST_INTERVAL (SV_NAME_D1 (log), -0.0, -inf, 1000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log), 0, 0x1p-149, 1000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log), 1.0, 100, 50000)
+PL_TEST_INTERVAL (SV_NAME_D1 (log), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_logf_3u4.c b/contrib/arm-optimized-routines/pl/math/sv_logf_3u4.c
new file mode 100644
index 000000000000..967355247036
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_logf_3u4.c
@@ -0,0 +1,86 @@
+/*
+ * Single-precision vector log function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float poly_0135[4];
+ float poly_246[3];
+ float ln2;
+} data = {
+ .poly_0135 = {
+ /* Coefficients copied from the AdvSIMD routine in math/, then rearranged so
+ that coeffs 0, 1, 3 and 5 can be loaded as a single quad-word, hence used
+ with _lane variant of MLA intrinsic. */
+ -0x1.3e737cp-3f, 0x1.5a9aa2p-3f, 0x1.961348p-3f, 0x1.555d7cp-2f
+ },
+ .poly_246 = { -0x1.4f9934p-3f, -0x1.00187cp-2f, -0x1.ffffc8p-2f },
+ .ln2 = 0x1.62e43p-1f
+};
+
+#define Min (0x00800000)
+#define Max (0x7f800000)
+#define Thresh (0x7f000000) /* Max - Min. */
+#define Mask (0x007fffff)
+#define Off (0x3f2aaaab) /* 0.666667. */
+
+float optr_aor_log_f32 (float);
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (optr_aor_log_f32, x, y, cmp);
+}
+
+/* Optimised implementation of SVE logf, using the same algorithm and
+ polynomial as the AdvSIMD routine. Maximum error is 3.34 ULPs:
+ SV_NAME_F1 (log)(0x1.557298p+0) got 0x1.26edecp-2
+ want 0x1.26ede6p-2. */
+svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint32_t u = svreinterpret_u32 (x);
+ svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh);
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ u = svsub_x (pg, u, Off);
+ svfloat32_t n = svcvt_f32_x (
+ pg, svasr_x (pg, svreinterpret_s32 (u), 23)); /* Sign-extend. */
+ u = svand_x (pg, u, Mask);
+ u = svadd_x (pg, u, Off);
+ svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), 1.0f);
+
+ /* y = log(1+r) + n*ln2. */
+ svfloat32_t r2 = svmul_x (pg, r, r);
+ /* n*ln2 + r + r2*(P6 + r*P5 + r2*(P4 + r*P3 + r2*(P2 + r*P1 + r2*P0))). */
+ svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[0]);
+ svfloat32_t p = svmla_lane (sv_f32 (d->poly_246[0]), r, p_0135, 1);
+ svfloat32_t q = svmla_lane (sv_f32 (d->poly_246[1]), r, p_0135, 2);
+ svfloat32_t y = svmla_lane (sv_f32 (d->poly_246[2]), r, p_0135, 3);
+ p = svmla_lane (p, r2, p_0135, 0);
+
+ q = svmla_x (pg, q, r2, p);
+ y = svmla_x (pg, y, r2, q);
+ p = svmla_x (pg, r, n, d->ln2);
+
+ if (unlikely (svptest_any (pg, cmp)))
+ return special_case (x, svmla_x (svnot_z (pg, cmp), p, r2, y), cmp);
+ return svmla_x (pg, p, r2, y);
+}
+
+PL_SIG (SV, F, 1, log, 0.01, 11.1)
+PL_TEST_ULP (SV_NAME_F1 (log), 2.85)
+PL_TEST_INTERVAL (SV_NAME_F1 (log), -0.0, -inf, 100)
+PL_TEST_INTERVAL (SV_NAME_F1 (log), 0, 0x1p-126, 100)
+PL_TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log), 1.0, 100, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (log), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_math.h b/contrib/arm-optimized-routines/pl/math/sv_math.h
new file mode 100644
index 000000000000..f67fe91803ba
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_math.h
@@ -0,0 +1,133 @@
+/*
+ * Wrapper functions for SVE ACLE.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef SV_MATH_H
+#define SV_MATH_H
+
+#ifndef WANT_VMATH
+/* Enable the build of vector math code. */
+# define WANT_VMATH 1
+#endif
+
+#if WANT_VMATH
+
+# include <arm_sve.h>
+# include <stdbool.h>
+
+# include "math_config.h"
+
+/* Double precision. */
+static inline svint64_t
+sv_s64 (int64_t x)
+{
+ return svdup_s64 (x);
+}
+
+static inline svuint64_t
+sv_u64 (uint64_t x)
+{
+ return svdup_u64 (x);
+}
+
+static inline svfloat64_t
+sv_f64 (double x)
+{
+ return svdup_f64 (x);
+}
+
+static inline svfloat64_t
+sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ double elem = svclastb (p, 0, x);
+ elem = (*f) (elem);
+ svfloat64_t y2 = sv_f64 (elem);
+ y = svsel (p, y2, y);
+ p = svpnext_b64 (cmp, p);
+ }
+ return y;
+}
+
+static inline svfloat64_t
+sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2,
+ svfloat64_t y, svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ double elem1 = svclastb (p, 0, x1);
+ double elem2 = svclastb (p, 0, x2);
+ double ret = (*f) (elem1, elem2);
+ svfloat64_t y2 = sv_f64 (ret);
+ y = svsel (p, y2, y);
+ p = svpnext_b64 (cmp, p);
+ }
+ return y;
+}
+
+static inline svuint64_t
+sv_mod_n_u64_x (svbool_t pg, svuint64_t x, uint64_t y)
+{
+ svuint64_t q = svdiv_x (pg, x, y);
+ return svmls_x (pg, x, q, y);
+}
+
+/* Single precision. */
+static inline svint32_t
+sv_s32 (int32_t x)
+{
+ return svdup_s32 (x);
+}
+
+static inline svuint32_t
+sv_u32 (uint32_t x)
+{
+ return svdup_u32 (x);
+}
+
+static inline svfloat32_t
+sv_f32 (float x)
+{
+ return svdup_f32 (x);
+}
+
+static inline svfloat32_t
+sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ float elem = svclastb (p, 0, x);
+ elem = (*f) (elem);
+ svfloat32_t y2 = sv_f32 (elem);
+ y = svsel (p, y2, y);
+ p = svpnext_b32 (cmp, p);
+ }
+ return y;
+}
+
+static inline svfloat32_t
+sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
+ svfloat32_t y, svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ float elem1 = svclastb (p, 0, x1);
+ float elem2 = svclastb (p, 0, x2);
+ float ret = (*f) (elem1, elem2);
+ svfloat32_t y2 = sv_f32 (ret);
+ y = svsel (p, y2, y);
+ p = svpnext_b32 (cmp, p);
+ }
+ return y;
+}
+#endif
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/sv_pow_1u5.c b/contrib/arm-optimized-routines/pl/math/sv_pow_1u5.c
new file mode 100644
index 000000000000..0838810206a1
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_pow_1u5.c
@@ -0,0 +1,444 @@
+/*
+ * Double-precision SVE pow(x, y) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* This version share a similar algorithm as AOR scalar pow.
+
+ The core computation consists in computing pow(x, y) as
+
+ exp (y * log (x)).
+
+ The algorithms for exp and log are very similar to scalar exp and log.
+ The log relies on table lookup for 3 variables and an order 8 polynomial.
+ It returns a high and a low contribution that are then passed to the exp,
+ to minimise the loss of accuracy in both routines.
+ The exp is based on 8-bit table lookup for scale and order-4 polynomial.
+ The SVE algorithm drops the tail in the exp computation at the price of
+ a lower accuracy, slightly above 1ULP.
+ The SVE algorithm also drops the special treatement of small (< 2^-65) and
+ large (> 2^63) finite values of |y|, as they only affect non-round to nearest
+ modes.
+
+ Maximum measured error is 1.04 ULPs:
+ SV_NAME_D2 (pow) (0x1.3d2d45bc848acp+63, -0x1.a48a38b40cd43p-12)
+ got 0x1.f7116284221fcp-1
+ want 0x1.f7116284221fdp-1. */
+
+/* Data is defined in v_pow_log_data.c. */
+#define N_LOG (1 << V_POW_LOG_TABLE_BITS)
+#define A __v_pow_log_data.poly
+#define Off 0x3fe6955500000000
+
+/* Data is defined in v_pow_exp_data.c. */
+#define N_EXP (1 << V_POW_EXP_TABLE_BITS)
+#define SignBias (0x800 << V_POW_EXP_TABLE_BITS)
+#define C __v_pow_exp_data.poly
+#define SmallExp 0x3c9 /* top12(0x1p-54). */
+#define BigExp 0x408 /* top12(512.). */
+#define ThresExp 0x03f /* BigExp - SmallExp. */
+#define HugeExp 0x409 /* top12(1024.). */
+
+/* Constants associated with pow. */
+#define SmallPowX 0x001 /* top12(0x1p-126). */
+#define BigPowX 0x7ff /* top12(INFINITY). */
+#define ThresPowX 0x7fe /* BigPowX - SmallPowX. */
+#define SmallPowY 0x3be /* top12(0x1.e7b6p-65). */
+#define BigPowY 0x43e /* top12(0x1.749p62). */
+#define ThresPowY 0x080 /* BigPowY - SmallPowY. */
+
+/* Check if x is an integer. */
+static inline svbool_t
+sv_isint (svbool_t pg, svfloat64_t x)
+{
+ return svcmpeq (pg, svrintz_z (pg, x), x);
+}
+
+/* Check if x is real not integer valued. */
+static inline svbool_t
+sv_isnotint (svbool_t pg, svfloat64_t x)
+{
+ return svcmpne (pg, svrintz_z (pg, x), x);
+}
+
+/* Check if x is an odd integer. */
+static inline svbool_t
+sv_isodd (svbool_t pg, svfloat64_t x)
+{
+ svfloat64_t y = svmul_x (pg, x, 0.5);
+ return sv_isnotint (pg, y);
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
+ the bit representation of a non-zero finite floating-point value. */
+static inline int
+checkint (uint64_t iy)
+{
+ int e = iy >> 52 & 0x7ff;
+ if (e < 0x3ff)
+ return 0;
+ if (e > 0x3ff + 52)
+ return 2;
+ if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
+ return 0;
+ if (iy & (1ULL << (0x3ff + 52 - e)))
+ return 1;
+ return 2;
+}
+
+/* Top 12 bits (sign and exponent of each double float lane). */
+static inline svuint64_t
+sv_top12 (svfloat64_t x)
+{
+ return svlsr_x (svptrue_b64 (), svreinterpret_u64 (x), 52);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan. */
+static inline int
+zeroinfnan (uint64_t i)
+{
+ return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan. */
+static inline svbool_t
+sv_zeroinfnan (svbool_t pg, svuint64_t i)
+{
+ return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2), 1),
+ 2 * asuint64 (INFINITY) - 1);
+}
+
+/* Handle cases that may overflow or underflow when computing the result that
+ is scale*(1+TMP) without intermediate rounding. The bit representation of
+ scale is in SBITS, however it has a computed exponent that may have
+ overflown into the sign bit so that needs to be adjusted before using it as
+ a double. (int32_t)KI is the k used in the argument reduction and exponent
+ adjustment of scale, positive k here means the result may overflow and
+ negative k means the result may underflow. */
+static inline double
+specialcase (double tmp, uint64_t sbits, uint64_t ki)
+{
+ double scale;
+ if ((ki & 0x80000000) == 0)
+ {
+ /* k > 0, the exponent of scale might have overflowed by <= 460. */
+ sbits -= 1009ull << 52;
+ scale = asdouble (sbits);
+ return 0x1p1009 * (scale + scale * tmp);
+ }
+ /* k < 0, need special care in the subnormal range. */
+ sbits += 1022ull << 52;
+ /* Note: sbits is signed scale. */
+ scale = asdouble (sbits);
+ double y = scale + scale * tmp;
+ return 0x1p-1022 * y;
+}
+
+/* Scalar fallback for special cases of SVE pow's exp. */
+static inline svfloat64_t
+sv_call_specialcase (svfloat64_t x1, svuint64_t u1, svuint64_t u2,
+ svfloat64_t y, svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ double sx1 = svclastb (p, 0, x1);
+ uint64_t su1 = svclastb (p, 0, u1);
+ uint64_t su2 = svclastb (p, 0, u2);
+ double elem = specialcase (sx1, su1, su2);
+ svfloat64_t y2 = sv_f64 (elem);
+ y = svsel (p, y2, y);
+ p = svpnext_b64 (cmp, p);
+ }
+ return y;
+}
+
+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
+ additional 15 bits precision. IX is the bit representation of x, but
+ normalized in the subnormal range using the sign bit for the exponent. */
+static inline svfloat64_t
+sv_log_inline (svbool_t pg, svuint64_t ix, svfloat64_t *tail)
+{
+ /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ svuint64_t tmp = svsub_x (pg, ix, Off);
+ svuint64_t i = svand_x (pg, svlsr_x (pg, tmp, 52 - V_POW_LOG_TABLE_BITS),
+ sv_u64 (N_LOG - 1));
+ svint64_t k = svasr_x (pg, svreinterpret_s64 (tmp), 52);
+ svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, sv_u64 (0xfffULL << 52)));
+ svfloat64_t z = svreinterpret_f64 (iz);
+ svfloat64_t kd = svcvt_f64_x (pg, k);
+
+ /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
+ /* SVE lookup requires 3 separate lookup tables, as opposed to scalar version
+ that uses array of structures. We also do the lookup earlier in the code to
+ make sure it finishes as early as possible. */
+ svfloat64_t invc = svld1_gather_index (pg, __v_pow_log_data.invc, i);
+ svfloat64_t logc = svld1_gather_index (pg, __v_pow_log_data.logc, i);
+ svfloat64_t logctail = svld1_gather_index (pg, __v_pow_log_data.logctail, i);
+
+ /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+ |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
+ svfloat64_t r = svmad_x (pg, z, invc, -1.0);
+ /* k*Ln2 + log(c) + r. */
+ svfloat64_t t1 = svmla_x (pg, logc, kd, __v_pow_log_data.ln2_hi);
+ svfloat64_t t2 = svadd_x (pg, t1, r);
+ svfloat64_t lo1 = svmla_x (pg, logctail, kd, __v_pow_log_data.ln2_lo);
+ svfloat64_t lo2 = svadd_x (pg, svsub_x (pg, t1, t2), r);
+
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ svfloat64_t ar = svmul_x (pg, r, -0.5); /* A[0] = -0.5. */
+ svfloat64_t ar2 = svmul_x (pg, r, ar);
+ svfloat64_t ar3 = svmul_x (pg, r, ar2);
+ /* k*Ln2 + log(c) + r + A[0]*r*r. */
+ svfloat64_t hi = svadd_x (pg, t2, ar2);
+ svfloat64_t lo3 = svmla_x (pg, svneg_x (pg, ar2), ar, r);
+ svfloat64_t lo4 = svadd_x (pg, svsub_x (pg, t2, hi), ar2);
+ /* p = log1p(r) - r - A[0]*r*r. */
+ /* p = (ar3 * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r *
+ A[6])))). */
+ svfloat64_t a56 = svmla_x (pg, sv_f64 (A[5]), r, A[6]);
+ svfloat64_t a34 = svmla_x (pg, sv_f64 (A[3]), r, A[4]);
+ svfloat64_t a12 = svmla_x (pg, sv_f64 (A[1]), r, A[2]);
+ svfloat64_t p = svmla_x (pg, a34, ar2, a56);
+ p = svmla_x (pg, a12, ar2, p);
+ p = svmul_x (pg, ar3, p);
+ svfloat64_t lo = svadd_x (
+ pg, svadd_x (pg, svadd_x (pg, svadd_x (pg, lo1, lo2), lo3), lo4), p);
+ svfloat64_t y = svadd_x (pg, hi, lo);
+ *tail = svadd_x (pg, svsub_x (pg, hi, y), lo);
+ return y;
+}
+
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
+ The sign_bias argument is SignBias or 0 and sets the sign to -1 or 1. */
+static inline svfloat64_t
+sv_exp_inline (svbool_t pg, svfloat64_t x, svfloat64_t xtail,
+ svuint64_t sign_bias)
+{
+ /* 3 types of special cases: tiny (uflow and spurious uflow), huge (oflow)
+ and other cases of large values of x (scale * (1 + TMP) oflow). */
+ svuint64_t abstop = svand_x (pg, sv_top12 (x), 0x7ff);
+ /* |x| is large (|x| >= 512) or tiny (|x| <= 0x1p-54). */
+ svbool_t uoflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), ThresExp);
+
+ /* Conditions special, uflow and oflow are all expressed as uoflow &&
+ something, hence do not bother computing anything if no lane in uoflow is
+ true. */
+ svbool_t special = svpfalse_b ();
+ svbool_t uflow = svpfalse_b ();
+ svbool_t oflow = svpfalse_b ();
+ if (unlikely (svptest_any (pg, uoflow)))
+ {
+ /* |x| is tiny (|x| <= 0x1p-54). */
+ uflow = svcmpge (pg, svsub_x (pg, abstop, SmallExp), 0x80000000);
+ uflow = svand_z (pg, uoflow, uflow);
+ /* |x| is huge (|x| >= 1024). */
+ oflow = svcmpge (pg, abstop, HugeExp);
+ oflow = svand_z (pg, uoflow, svbic_z (pg, oflow, uflow));
+ /* For large |x| values (512 < |x| < 1024) scale * (1 + TMP) can overflow
+ or underflow. */
+ special = svbic_z (pg, uoflow, svorr_z (pg, uflow, oflow));
+ }
+
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
+ svfloat64_t z = svmul_x (pg, x, __v_pow_exp_data.n_over_ln2);
+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
+ svfloat64_t shift = sv_f64 (__v_pow_exp_data.shift);
+ svfloat64_t kd = svadd_x (pg, z, shift);
+ svuint64_t ki = svreinterpret_u64 (kd);
+ kd = svsub_x (pg, kd, shift);
+ svfloat64_t r = x;
+ r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_hi);
+ r = svmls_x (pg, r, kd, __v_pow_exp_data.ln2_over_n_lo);
+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
+ r = svadd_x (pg, r, xtail);
+ /* 2^(k/N) ~= scale. */
+ svuint64_t idx = svand_x (pg, ki, N_EXP - 1);
+ svuint64_t top
+ = svlsl_x (pg, svadd_x (pg, ki, sign_bias), 52 - V_POW_EXP_TABLE_BITS);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ svuint64_t sbits = svld1_gather_index (pg, __v_pow_exp_data.sbits, idx);
+ sbits = svadd_x (pg, sbits, top);
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t tmp = svmla_x (pg, sv_f64 (C[1]), r, C[2]);
+ tmp = svmla_x (pg, sv_f64 (C[0]), r, tmp);
+ tmp = svmla_x (pg, r, r2, tmp);
+ svfloat64_t scale = svreinterpret_f64 (sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ z = svmla_x (pg, scale, scale, tmp);
+
+ /* Update result with special and large cases. */
+ if (unlikely (svptest_any (pg, special)))
+ z = sv_call_specialcase (tmp, sbits, ki, z, special);
+
+ /* Handle underflow and overflow. */
+ svuint64_t sign_bit = svlsr_x (pg, svreinterpret_u64 (x), 63);
+ svbool_t x_is_neg = svcmpne (pg, sign_bit, 0);
+ svuint64_t sign_mask = svlsl_x (pg, sign_bias, 52 - V_POW_EXP_TABLE_BITS);
+ svfloat64_t res_uoflow = svsel (x_is_neg, sv_f64 (0.0), sv_f64 (INFINITY));
+ res_uoflow = svreinterpret_f64 (
+ svorr_x (pg, svreinterpret_u64 (res_uoflow), sign_mask));
+ z = svsel (oflow, res_uoflow, z);
+ /* Avoid spurious underflow for tiny x. */
+ svfloat64_t res_spurious_uflow
+ = svreinterpret_f64 (svorr_x (pg, sign_mask, 0x3ff0000000000000));
+ z = svsel (uflow, res_spurious_uflow, z);
+
+ return z;
+}
+
+static inline double
+pow_sc (double x, double y)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t iy = asuint64 (y);
+ /* Special cases: |x| or |y| is 0, inf or nan. */
+ if (unlikely (zeroinfnan (iy)))
+ {
+ if (2 * iy == 0)
+ return issignaling_inline (x) ? x + y : 1.0;
+ if (ix == asuint64 (1.0))
+ return issignaling_inline (y) ? x + y : 1.0;
+ if (2 * ix > 2 * asuint64 (INFINITY) || 2 * iy > 2 * asuint64 (INFINITY))
+ return x + y;
+ if (2 * ix == 2 * asuint64 (1.0))
+ return 1.0;
+ if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
+ return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
+ return y * y;
+ }
+ if (unlikely (zeroinfnan (ix)))
+ {
+ double_t x2 = x * x;
+ if (ix >> 63 && checkint (iy) == 1)
+ x2 = -x2;
+ /* Without the barrier some versions of clang hoist the 1/x2 and
+ thus division by zero exception can be signaled spuriously. */
+ return (iy >> 63) ? opt_barrier_double (1 / x2) : x2;
+ }
+ return x;
+}
+
+svfloat64_t SV_NAME_D2 (pow) (svfloat64_t x, svfloat64_t y, const svbool_t pg)
+{
+ /* This preamble handles special case conditions used in the final scalar
+ fallbacks. It also updates ix and sign_bias, that are used in the core
+ computation too, i.e., exp( y * log (x) ). */
+ svuint64_t vix0 = svreinterpret_u64 (x);
+ svuint64_t viy0 = svreinterpret_u64 (y);
+ svuint64_t vtopx0 = svlsr_x (svptrue_b64 (), vix0, 52);
+
+ /* Negative x cases. */
+ svuint64_t sign_bit = svlsr_m (pg, vix0, 63);
+ svbool_t xisneg = svcmpeq (pg, sign_bit, 1);
+
+ /* Set sign_bias and ix depending on sign of x and nature of y. */
+ svbool_t yisnotint_xisneg = svpfalse_b ();
+ svuint64_t sign_bias = sv_u64 (0);
+ svuint64_t vix = vix0;
+ svuint64_t vtopx1 = vtopx0;
+ if (unlikely (svptest_any (pg, xisneg)))
+ {
+ /* Determine nature of y. */
+ yisnotint_xisneg = sv_isnotint (xisneg, y);
+ svbool_t yisint_xisneg = sv_isint (xisneg, y);
+ svbool_t yisodd_xisneg = sv_isodd (xisneg, y);
+ /* ix set to abs(ix) if y is integer. */
+ vix = svand_m (yisint_xisneg, vix0, 0x7fffffffffffffff);
+ vtopx1 = svand_m (yisint_xisneg, vtopx0, 0x7ff);
+ /* Set to SignBias if x is negative and y is odd. */
+ sign_bias = svsel (yisodd_xisneg, sv_u64 (SignBias), sv_u64 (0));
+ }
+
+ /* Special cases of x or y: zero, inf and nan. */
+ svbool_t xspecial = sv_zeroinfnan (pg, vix0);
+ svbool_t yspecial = sv_zeroinfnan (pg, viy0);
+ svbool_t special = svorr_z (pg, xspecial, yspecial);
+
+ /* Small cases of x: |x| < 0x1p-126. */
+ svuint64_t vabstopx0 = svand_x (pg, vtopx0, 0x7ff);
+ svbool_t xsmall = svcmplt (pg, vabstopx0, SmallPowX);
+ if (unlikely (svptest_any (pg, xsmall)))
+ {
+ /* Normalize subnormal x so exponent becomes negative. */
+ svbool_t topx_is_null = svcmpeq (xsmall, vtopx1, 0);
+
+ svuint64_t vix_norm = svreinterpret_u64 (svmul_m (xsmall, x, 0x1p52));
+ vix_norm = svand_m (xsmall, vix_norm, 0x7fffffffffffffff);
+ vix_norm = svsub_m (xsmall, vix_norm, 52ULL << 52);
+ vix = svsel (topx_is_null, vix_norm, vix);
+ }
+
+ /* y_hi = log(ix, &y_lo). */
+ svfloat64_t vlo;
+ svfloat64_t vhi = sv_log_inline (pg, vix, &vlo);
+
+ /* z = exp(y_hi, y_lo, sign_bias). */
+ svfloat64_t vehi = svmul_x (pg, y, vhi);
+ svfloat64_t velo = svmul_x (pg, y, vlo);
+ svfloat64_t vemi = svmls_x (pg, vehi, y, vhi);
+ velo = svsub_x (pg, velo, vemi);
+ svfloat64_t vz = sv_exp_inline (pg, vehi, velo, sign_bias);
+
+ /* Cases of finite y and finite negative x. */
+ vz = svsel (yisnotint_xisneg, sv_f64 (__builtin_nan ("")), vz);
+
+ /* Cases of zero/inf/nan x or y. */
+ if (unlikely (svptest_any (pg, special)))
+ vz = sv_call2_f64 (pow_sc, x, y, vz, special);
+
+ return vz;
+}
+
+PL_SIG (SV, D, 2, pow)
+PL_TEST_ULP (SV_NAME_D2 (pow), 0.55)
+/* Wide intervals spanning the whole domain but shared between x and y. */
+#define SV_POW_INTERVAL2(xlo, xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n) \
+ PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
+#define EXPAND(str) str##000000000
+#define SHL52(str) EXPAND (str)
+SV_POW_INTERVAL2 (0, SHL52 (SmallPowX), 0, inf, 40000)
+SV_POW_INTERVAL2 (SHL52 (SmallPowX), SHL52 (BigPowX), 0, inf, 40000)
+SV_POW_INTERVAL2 (SHL52 (BigPowX), inf, 0, inf, 40000)
+SV_POW_INTERVAL2 (0, inf, 0, SHL52 (SmallPowY), 40000)
+SV_POW_INTERVAL2 (0, inf, SHL52 (SmallPowY), SHL52 (BigPowY), 40000)
+SV_POW_INTERVAL2 (0, inf, SHL52 (BigPowY), inf, 40000)
+SV_POW_INTERVAL2 (0, inf, 0, inf, 1000)
+/* x~1 or y~1. */
+SV_POW_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000)
+SV_POW_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
+SV_POW_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
+/* around estimated argmaxs of ULP error. */
+SV_POW_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
+SV_POW_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
+/* x is negative, y is odd or even integer, or y is real not integer. */
+PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
+/* |x| is inf, y is odd or even integer, or y is real not integer. */
+SV_POW_INTERVAL2 (inf, inf, 0.5, 0.5, 1)
+SV_POW_INTERVAL2 (inf, inf, 1.0, 1.0, 1)
+SV_POW_INTERVAL2 (inf, inf, 2.0, 2.0, 1)
+SV_POW_INTERVAL2 (inf, inf, 3.0, 3.0, 1)
+/* 0.0^y. */
+SV_POW_INTERVAL2 (0.0, 0.0, 0.0, 0x1p120, 1000)
+/* 1.0^y. */
+PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
+PL_TEST_INTERVAL2 (SV_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_powf_2u6.c b/contrib/arm-optimized-routines/pl/math/sv_powf_2u6.c
new file mode 100644
index 000000000000..2db0636aea62
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_powf_2u6.c
@@ -0,0 +1,360 @@
+/*
+ * Single-precision SVE powf function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* The following data is used in the SVE pow core computation
+ and special case detection. */
+#define Tinvc __v_powf_data.invc
+#define Tlogc __v_powf_data.logc
+#define Texp __v_powf_data.scale
+#define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))
+#define Shift 0x1.8p52
+#define Norm 0x1p23f /* 0x4b000000. */
+
+/* Overall ULP error bound for pow is 2.6 ulp
+ ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2). */
+static const struct data
+{
+ double log_poly[4];
+ double exp_poly[3];
+ float uflow_bound, oflow_bound, small_bound;
+ uint32_t sign_bias, sign_mask, subnormal_bias, off;
+} data = {
+ /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of
+ V_POWF_EXP2_N. */
+ .log_poly = { -0x1.6ff5daa3b3d7cp+3, 0x1.ec81d03c01aebp+3,
+ -0x1.71547bb43f101p+4, 0x1.7154764a815cbp+5 },
+ /* rel err: 1.69 * 2^-34. */
+ .exp_poly = {
+ 0x1.c6af84b912394p-20, /* A0 / V_POWF_EXP2_N^3. */
+ 0x1.ebfce50fac4f3p-13, /* A1 / V_POWF_EXP2_N^2. */
+ 0x1.62e42ff0c52d6p-6, /* A3 / V_POWF_EXP2_N. */
+ },
+ .uflow_bound = -0x1.2cp+12f, /* -150.0 * V_POWF_EXP2_N. */
+ .oflow_bound = 0x1p+12f, /* 128.0 * V_POWF_EXP2_N. */
+ .small_bound = 0x1p-126f,
+ .off = 0x3f35d000,
+ .sign_bias = SignBias,
+ .sign_mask = 0x80000000,
+ .subnormal_bias = 0x0b800000, /* 23 << 23. */
+};
+
+#define A(i) sv_f64 (d->log_poly[i])
+#define C(i) sv_f64 (d->exp_poly[i])
+
+/* Check if x is an integer. */
+static inline svbool_t
+svisint (svbool_t pg, svfloat32_t x)
+{
+ return svcmpeq (pg, svrintz_z (pg, x), x);
+}
+
+/* Check if x is real not integer valued. */
+static inline svbool_t
+svisnotint (svbool_t pg, svfloat32_t x)
+{
+ return svcmpne (pg, svrintz_z (pg, x), x);
+}
+
+/* Check if x is an odd integer. */
+static inline svbool_t
+svisodd (svbool_t pg, svfloat32_t x)
+{
+ svfloat32_t y = svmul_x (pg, x, 0.5f);
+ return svisnotint (pg, y);
+}
+
+/* Check if zero, inf or nan. */
+static inline svbool_t
+sv_zeroinfnan (svbool_t pg, svuint32_t i)
+{
+ return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2u), 1),
+ 2u * 0x7f800000 - 1);
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
+ the bit representation of a non-zero finite floating-point value. */
+static inline int
+checkint (uint32_t iy)
+{
+ int e = iy >> 23 & 0xff;
+ if (e < 0x7f)
+ return 0;
+ if (e > 0x7f + 23)
+ return 2;
+ if (iy & ((1 << (0x7f + 23 - e)) - 1))
+ return 0;
+ if (iy & (1 << (0x7f + 23 - e)))
+ return 1;
+ return 2;
+}
+
+/* Check if zero, inf or nan. */
+static inline int
+zeroinfnan (uint32_t ix)
+{
+ return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
+}
+
+/* A scalar subroutine used to fix main power special cases. Similar to the
+ preamble of finite_powf except that we do not update ix and sign_bias. This
+ is done in the preamble of the SVE powf. */
+static inline float
+powf_specialcase (float x, float y, float z)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iy = asuint (y);
+ /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
+ if (unlikely (zeroinfnan (iy)))
+ {
+ if (2 * iy == 0)
+ return issignalingf_inline (x) ? x + y : 1.0f;
+ if (ix == 0x3f800000)
+ return issignalingf_inline (y) ? x + y : 1.0f;
+ if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
+ return x + y;
+ if (2 * ix == 2 * 0x3f800000)
+ return 1.0f;
+ if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+ return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
+ return y * y;
+ }
+ if (unlikely (zeroinfnan (ix)))
+ {
+ float_t x2 = x * x;
+ if (ix & 0x80000000 && checkint (iy) == 1)
+ x2 = -x2;
+ return iy & 0x80000000 ? 1 / x2 : x2;
+ }
+ /* We need a return here in case x<0 and y is integer, but all other tests
+ need to be run. */
+ return z;
+}
+
+/* Scalar fallback for special case routines with custom signature. */
+static inline svfloat32_t
+sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
+{
+ svbool_t p = svpfirst (cmp, svpfalse ());
+ while (svptest_any (cmp, p))
+ {
+ float sx1 = svclastb (p, 0, x1);
+ float sx2 = svclastb (p, 0, x2);
+ float elem = svclastb (p, 0, y);
+ elem = powf_specialcase (sx1, sx2, elem);
+ svfloat32_t y2 = sv_f32 (elem);
+ y = svsel (p, y2, y);
+ p = svpnext_b32 (cmp, p);
+ }
+ return y;
+}
+
+/* Compute core for half of the lanes in double precision. */
+static inline svfloat64_t
+sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
+ svfloat64_t y, svuint64_t sign_bias, svfloat64_t *pylogx,
+ const struct data *d)
+{
+ svfloat64_t invc = svld1_gather_index (pg, Tinvc, i);
+ svfloat64_t logc = svld1_gather_index (pg, Tlogc, i);
+
+ /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k. */
+ svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), z, invc);
+ svfloat64_t y0 = svadd_x (pg, logc, svcvt_f64_x (pg, k));
+
+ /* Polynomial to approximate log1p(r)/ln2. */
+ svfloat64_t logx = A (0);
+ logx = svmla_x (pg, A (1), r, logx);
+ logx = svmla_x (pg, A (2), r, logx);
+ logx = svmla_x (pg, A (3), r, logx);
+ logx = svmla_x (pg, y0, r, logx);
+ *pylogx = svmul_x (pg, y, logx);
+
+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
+ svfloat64_t kd = svadd_x (pg, *pylogx, Shift);
+ svuint64_t ki = svreinterpret_u64 (kd);
+ kd = svsub_x (pg, kd, Shift);
+
+ r = svsub_x (pg, *pylogx, kd);
+
+ /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */
+ svuint64_t t
+ = svld1_gather_index (pg, Texp, svand_x (pg, ki, V_POWF_EXP2_N - 1));
+ svuint64_t ski = svadd_x (pg, ki, sign_bias);
+ t = svadd_x (pg, t, svlsl_x (pg, ski, 52 - V_POWF_EXP2_TABLE_BITS));
+ svfloat64_t s = svreinterpret_f64 (t);
+
+ svfloat64_t p = C (0);
+ p = svmla_x (pg, C (1), p, r);
+ p = svmla_x (pg, C (2), p, r);
+ p = svmla_x (pg, s, p, svmul_x (pg, s, r));
+
+ return p;
+}
+
+/* Widen vector to double precision and compute core on both halves of the
+ vector. Lower cost of promotion by considering all lanes active. */
+static inline svfloat32_t
+sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
+ svfloat32_t y, svuint32_t sign_bias, svfloat32_t *pylogx,
+ const struct data *d)
+{
+ const svbool_t ptrue = svptrue_b64 ();
+
+ /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in
+ order to perform core computation in double precision. */
+ const svbool_t pg_lo = svunpklo (pg);
+ const svbool_t pg_hi = svunpkhi (pg);
+ svfloat64_t y_lo = svcvt_f64_x (
+ ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
+ svfloat64_t y_hi = svcvt_f64_x (
+ ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
+ svfloat32_t z = svreinterpret_f32 (iz);
+ svfloat64_t z_lo = svcvt_f64_x (
+ ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z))));
+ svfloat64_t z_hi = svcvt_f64_x (
+ ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z))));
+ svuint64_t i_lo = svunpklo (i);
+ svuint64_t i_hi = svunpkhi (i);
+ svint64_t k_lo = svunpklo (k);
+ svint64_t k_hi = svunpkhi (k);
+ svuint64_t sign_bias_lo = svunpklo (sign_bias);
+ svuint64_t sign_bias_hi = svunpkhi (sign_bias);
+
+ /* Compute each part in double precision. */
+ svfloat64_t ylogx_lo, ylogx_hi;
+ svfloat64_t lo = sv_powf_core_ext (pg_lo, i_lo, z_lo, k_lo, y_lo,
+ sign_bias_lo, &ylogx_lo, d);
+ svfloat64_t hi = sv_powf_core_ext (pg_hi, i_hi, z_hi, k_hi, y_hi,
+ sign_bias_hi, &ylogx_hi, d);
+
+ /* Convert back to single-precision and interleave. */
+ svfloat32_t ylogx_lo_32 = svcvt_f32_x (ptrue, ylogx_lo);
+ svfloat32_t ylogx_hi_32 = svcvt_f32_x (ptrue, ylogx_hi);
+ *pylogx = svuzp1 (ylogx_lo_32, ylogx_hi_32);
+ svfloat32_t lo_32 = svcvt_f32_x (ptrue, lo);
+ svfloat32_t hi_32 = svcvt_f32_x (ptrue, hi);
+ return svuzp1 (lo_32, hi_32);
+}
+
+/* Implementation of SVE powf.
+ Provides the same accuracy as AdvSIMD powf, since it relies on the same
+ algorithm. The theoretical maximum error is under 2.60 ULPs.
+ Maximum measured error is 2.56 ULPs:
+ SV_NAME_F2 (pow) (0x1.004118p+0, 0x1.5d14a4p+16) got 0x1.fd4bp+127
+ want 0x1.fd4b06p+127. */
+svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint32_t vix0 = svreinterpret_u32 (x);
+ svuint32_t viy0 = svreinterpret_u32 (y);
+
+ /* Negative x cases. */
+ svuint32_t sign_bit = svand_m (pg, vix0, d->sign_mask);
+ svbool_t xisneg = svcmpeq (pg, sign_bit, d->sign_mask);
+
+ /* Set sign_bias and ix depending on sign of x and nature of y. */
+ svbool_t yisnotint_xisneg = svpfalse_b ();
+ svuint32_t sign_bias = sv_u32 (0);
+ svuint32_t vix = vix0;
+ if (unlikely (svptest_any (pg, xisneg)))
+ {
+ /* Determine nature of y. */
+ yisnotint_xisneg = svisnotint (xisneg, y);
+ svbool_t yisint_xisneg = svisint (xisneg, y);
+ svbool_t yisodd_xisneg = svisodd (xisneg, y);
+ /* ix set to abs(ix) if y is integer. */
+ vix = svand_m (yisint_xisneg, vix0, 0x7fffffff);
+ /* Set to SignBias if x is negative and y is odd. */
+ sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0));
+ }
+
+ /* Special cases of x or y: zero, inf and nan. */
+ svbool_t xspecial = sv_zeroinfnan (pg, vix0);
+ svbool_t yspecial = sv_zeroinfnan (pg, viy0);
+ svbool_t cmp = svorr_z (pg, xspecial, yspecial);
+
+ /* Small cases of x: |x| < 0x1p-126. */
+ svbool_t xsmall = svaclt (pg, x, d->small_bound);
+ if (unlikely (svptest_any (pg, xsmall)))
+ {
+ /* Normalize subnormal x so exponent becomes negative. */
+ svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm));
+ vix_norm = svand_x (xsmall, vix_norm, 0x7fffffff);
+ vix_norm = svsub_x (xsmall, vix_norm, d->subnormal_bias);
+ vix = svsel (xsmall, vix_norm, vix);
+ }
+ /* Part of core computation carried in working precision. */
+ svuint32_t tmp = svsub_x (pg, vix, d->off);
+ svuint32_t i = svand_x (pg, svlsr_x (pg, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
+ V_POWF_LOG2_N - 1);
+ svuint32_t top = svand_x (pg, tmp, 0xff800000);
+ svuint32_t iz = svsub_x (pg, vix, top);
+ svint32_t k
+ = svasr_x (pg, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS));
+
+ /* Compute core in extended precision and return intermediate ylogx results to
+ handle cases of underflow and underflow in exp. */
+ svfloat32_t ylogx;
+ svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d);
+
+ /* Handle exp special cases of underflow and overflow. */
+ svuint32_t sign = svlsl_x (pg, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
+ svfloat32_t ret_oflow
+ = svreinterpret_f32 (svorr_x (pg, sign, asuint (INFINITY)));
+ svfloat32_t ret_uflow = svreinterpret_f32 (sign);
+ ret = svsel (svcmple (pg, ylogx, d->uflow_bound), ret_uflow, ret);
+ ret = svsel (svcmpgt (pg, ylogx, d->oflow_bound), ret_oflow, ret);
+
+ /* Cases of finite y and finite negative x. */
+ ret = svsel (yisnotint_xisneg, sv_f32 (__builtin_nanf ("")), ret);
+
+ if (unlikely (svptest_any (pg, cmp)))
+ return sv_call_powf_sc (x, y, ret, cmp);
+
+ return ret;
+}
+
+PL_SIG (SV, F, 2, pow)
+PL_TEST_ULP (SV_NAME_F2 (pow), 2.06)
+/* Wide intervals spanning the whole domain but shared between x and y. */
+#define SV_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, -ylo, -yhi, n) \
+ PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, -ylo, -yhi, n)
+SV_POWF_INTERVAL2 (0, 0x1p-126, 0, inf, 40000)
+SV_POWF_INTERVAL2 (0x1p-126, 1, 0, inf, 50000)
+SV_POWF_INTERVAL2 (1, inf, 0, inf, 50000)
+/* x~1 or y~1. */
+SV_POWF_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000)
+SV_POWF_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
+SV_POWF_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
+/* around estimated argmaxs of ULP error. */
+SV_POWF_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
+SV_POWF_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
+/* x is negative, y is odd or even integer, or y is real not integer. */
+PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
+/* |x| is inf, y is odd or even integer, or y is real not integer. */
+SV_POWF_INTERVAL2 (inf, inf, 0.5, 0.5, 1)
+SV_POWF_INTERVAL2 (inf, inf, 1.0, 1.0, 1)
+SV_POWF_INTERVAL2 (inf, inf, 2.0, 2.0, 1)
+SV_POWF_INTERVAL2 (inf, inf, 3.0, 3.0, 1)
+/* 0.0^y. */
+SV_POWF_INTERVAL2 (0.0, 0.0, 0.0, 0x1p120, 1000)
+/* 1.0^y. */
+PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
+PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_powi.c b/contrib/arm-optimized-routines/pl/math/sv_powi.c
new file mode 100644
index 000000000000..e53bf2195533
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_powi.c
@@ -0,0 +1,48 @@
+/*
+ * Double-precision SVE powi(x, n) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+
+/* Optimized double-precision vector powi (double base, long integer power).
+ powi is developed for environments in which accuracy is of much less
+ importance than performance, hence we provide no estimate for worst-case
+ error. */
+svfloat64_t
+_ZGVsMxvv_powk (svfloat64_t as, svint64_t ns, svbool_t p)
+{
+ /* Compute powi by successive squaring, right to left. */
+ svfloat64_t acc = sv_f64 (1.0);
+ svbool_t want_recip = svcmplt (p, ns, 0);
+ svuint64_t ns_abs = svreinterpret_u64 (svabs_x (p, ns));
+
+ /* We use a max to avoid needing to check whether any lane != 0 on each
+ iteration. */
+ uint64_t max_n = svmaxv (p, ns_abs);
+
+ svfloat64_t c = as;
+ /* Successively square c, and use merging predication (_m) to determine
+ whether or not to perform the multiplication or keep the previous
+ iteration. */
+ while (true)
+ {
+ svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1ull), 1ull);
+ acc = svmul_m (px, acc, c);
+ max_n >>= 1;
+ if (max_n == 0)
+ break;
+
+ ns_abs = svlsr_x (p, ns_abs, 1);
+ c = svmul_x (p, c, c);
+ }
+
+ /* Negative powers are handled by computing the abs(n) version and then
+ taking the reciprocal. */
+ if (svptest_any (want_recip, want_recip))
+ acc = svdivr_m (want_recip, acc, 1.0);
+
+ return acc;
+}
diff --git a/contrib/arm-optimized-routines/pl/math/sv_powif.c b/contrib/arm-optimized-routines/pl/math/sv_powif.c
new file mode 100644
index 000000000000..7e032fd86a20
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_powif.c
@@ -0,0 +1,48 @@
+/*
+ * Single-precision SVE powi(x, n) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+
+/* Optimized single-precision vector powi (float base, integer power).
+ powi is developed for environments in which accuracy is of much less
+ importance than performance, hence we provide no estimate for worst-case
+ error. */
+svfloat32_t
+_ZGVsMxvv_powi (svfloat32_t as, svint32_t ns, svbool_t p)
+{
+ /* Compute powi by successive squaring, right to left. */
+ svfloat32_t acc = sv_f32 (1.f);
+ svbool_t want_recip = svcmplt (p, ns, 0);
+ svuint32_t ns_abs = svreinterpret_u32 (svabs_x (p, ns));
+
+ /* We use a max to avoid needing to check whether any lane != 0 on each
+ iteration. */
+ uint32_t max_n = svmaxv (p, ns_abs);
+
+ svfloat32_t c = as;
+ /* Successively square c, and use merging predication (_m) to determine
+ whether or not to perform the multiplication or keep the previous
+ iteration. */
+ while (true)
+ {
+ svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1), 1);
+ acc = svmul_m (px, acc, c);
+ max_n >>= 1;
+ if (max_n == 0)
+ break;
+
+ ns_abs = svlsr_x (p, ns_abs, 1);
+ c = svmul_x (p, c, c);
+ }
+
+ /* Negative powers are handled by computing the abs(n) version and then
+ taking the reciprocal. */
+ if (svptest_any (want_recip, want_recip))
+ acc = svdivr_m (want_recip, acc, 1.0f);
+
+ return acc;
+}
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sin_3u5.c b/contrib/arm-optimized-routines/pl/math/sv_sin_3u5.c
new file mode 100644
index 000000000000..a81f3fc80f3d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sin_3u5.c
@@ -0,0 +1,96 @@
+/*
+ * Double-precision SVE sin(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ double inv_pi, pi_1, pi_2, pi_3, shift, range_val;
+ double poly[7];
+} data = {
+ .poly = { -0x1.555555555547bp-3, 0x1.1111111108a4dp-7, -0x1.a01a019936f27p-13,
+ 0x1.71de37a97d93ep-19, -0x1.ae633919987c6p-26,
+ 0x1.60e277ae07cecp-33, -0x1.9e9540300a1p-41, },
+
+ .inv_pi = 0x1.45f306dc9c883p-2,
+ .pi_1 = 0x1.921fb54442d18p+1,
+ .pi_2 = 0x1.1a62633145c06p-53,
+ .pi_3 = 0x1.c1cd129024e09p-106,
+ .shift = 0x1.8p52,
+ .range_val = 0x1p23,
+};
+
+#define C(i) sv_f64 (d->poly[i])
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
+{
+ return sv_call_f64 (sin, x, y, cmp);
+}
+
+/* A fast SVE implementation of sin.
+ Maximum observed error in [-pi/2, pi/2], where argument is not reduced,
+ is 2.87 ULP:
+ _ZGVsMxv_sin (0x1.921d5c6a07142p+0) got 0x1.fffffffa7dc02p-1
+ want 0x1.fffffffa7dc05p-1
+ Maximum observed error in the entire non-special domain ([-2^23, 2^23])
+ is 3.22 ULP:
+ _ZGVsMxv_sin (0x1.5702447b6f17bp+22) got 0x1.ffdcd125c84fbp-3
+ want 0x1.ffdcd125c84f8p-3. */
+svfloat64_t SV_NAME_D1 (sin) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Load some values in quad-word chunks to minimise memory access. */
+ const svbool_t ptrue = svptrue_b64 ();
+ svfloat64_t shift = sv_f64 (d->shift);
+ svfloat64_t inv_pi_and_pi1 = svld1rq (ptrue, &d->inv_pi);
+ svfloat64_t pi2_and_pi3 = svld1rq (ptrue, &d->pi_2);
+
+ /* n = rint(|x|/pi). */
+ svfloat64_t n = svmla_lane (shift, x, inv_pi_and_pi1, 0);
+ svuint64_t odd = svlsl_x (pg, svreinterpret_u64 (n), 63);
+ n = svsub_x (pg, n, shift);
+
+ /* r = |x| - n*(pi/2) (range reduction into -pi/2 .. pi/2). */
+ svfloat64_t r = x;
+ r = svmls_lane (r, n, inv_pi_and_pi1, 1);
+ r = svmls_lane (r, n, pi2_and_pi3, 0);
+ r = svmls_lane (r, n, pi2_and_pi3, 1);
+
+ /* sin(r) poly approx. */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t r3 = svmul_x (pg, r2, r);
+ svfloat64_t r4 = svmul_x (pg, r2, r2);
+
+ svfloat64_t t1 = svmla_x (pg, C (4), C (5), r2);
+ svfloat64_t t2 = svmla_x (pg, C (2), C (3), r2);
+ svfloat64_t t3 = svmla_x (pg, C (0), C (1), r2);
+
+ svfloat64_t y = svmla_x (pg, t1, C (6), r4);
+ y = svmla_x (pg, t2, y, r4);
+ y = svmla_x (pg, t3, y, r4);
+ y = svmla_x (pg, r, y, r3);
+
+ svbool_t cmp = svacle (pg, x, d->range_val);
+ cmp = svnot_z (pg, cmp);
+ if (unlikely (svptest_any (pg, cmp)))
+ return special_case (x,
+ svreinterpret_f64 (sveor_z (
+ svnot_z (pg, cmp), svreinterpret_u64 (y), odd)),
+ cmp);
+
+ /* Copy sign. */
+ return svreinterpret_f64 (sveor_z (pg, svreinterpret_u64 (y), odd));
+}
+
+PL_SIG (SV, D, 1, sin, -3.1, 3.1)
+PL_TEST_ULP (SV_NAME_D1 (sin), 2.73)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0, 0x1p23, 1000000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sin), 0x1p23, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sincos_3u5.c b/contrib/arm-optimized-routines/pl/math/sv_sincos_3u5.c
new file mode 100644
index 000000000000..f73550082d5b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sincos_3u5.c
@@ -0,0 +1,61 @@
+/*
+ * Double-precision vector sincos function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/* Define _GNU_SOURCE in order to include sincos declaration. If building
+ pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
+ be linked against the scalar sincosf from math/. */
+#define _GNU_SOURCE
+#include <math.h>
+#undef _GNU_SOURCE
+
+#include "sv_sincos_common.h"
+#include "sv_math.h"
+#include "pl_test.h"
+
+static void NOINLINE
+special_case (svfloat64_t x, svbool_t special, double *out_sin,
+ double *out_cos)
+{
+ svbool_t p = svptrue_pat_b64 (SV_VL1);
+ for (int i = 0; i < svcntd (); i++)
+ {
+ if (svptest_any (special, p))
+ sincos (svlastb (p, x), out_sin + i, out_cos + i);
+ p = svpnext_b64 (svptrue_b64 (), p);
+ }
+}
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate polynomials.
+ Largest observed error is for sin, 3.22 ULP:
+ sv_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
+ want -0x1.ffe9537d5dbb4p-3. */
+void
+_ZGVsMxvl8l8_sincos (svfloat64_t x, double *out_sin, double *out_cos,
+ svbool_t pg)
+{
+ const struct sv_sincos_data *d = ptr_barrier (&sv_sincos_data);
+ svbool_t special = check_ge_rangeval (pg, x, d);
+
+ svfloat64x2_t sc = sv_sincos_inline (pg, x, d);
+
+ svst1 (pg, out_sin, svget2 (sc, 0));
+ svst1 (pg, out_cos, svget2 (sc, 1));
+
+ if (unlikely (svptest_any (pg, special)))
+ special_case (x, special, out_sin, out_cos);
+}
+
+PL_TEST_ULP (_ZGVsMxv_sincos_sin, 2.73)
+PL_TEST_ULP (_ZGVsMxv_sincos_cos, 2.73)
+#define SV_SINCOS_INTERVAL(lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVsMxv_sincos_sin, lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVsMxv_sincos_cos, lo, hi, n)
+SV_SINCOS_INTERVAL (0, 0x1p23, 500000)
+SV_SINCOS_INTERVAL (-0, -0x1p23, 500000)
+SV_SINCOS_INTERVAL (0x1p23, inf, 10000)
+SV_SINCOS_INTERVAL (-0x1p23, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sincos_common.h b/contrib/arm-optimized-routines/pl/math/sv_sincos_common.h
new file mode 100644
index 000000000000..f7b58deb90bd
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sincos_common.h
@@ -0,0 +1,85 @@
+/*
+ * Core approximation for double-precision vector sincos
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+
+static const struct sv_sincos_data
+{
+ double sin_poly[7], cos_poly[6], pio2[3];
+ double inv_pio2, shift, range_val;
+} sv_sincos_data = {
+ .inv_pio2 = 0x1.45f306dc9c882p-1,
+ .pio2 = { 0x1.921fb50000000p+0, 0x1.110b460000000p-26,
+ 0x1.1a62633145c07p-54 },
+ .shift = 0x1.8p52,
+ .sin_poly = { /* Computed using Remez in [-pi/2, pi/2]. */
+ -0x1.555555555547bp-3, 0x1.1111111108a4dp-7,
+ -0x1.a01a019936f27p-13, 0x1.71de37a97d93ep-19,
+ -0x1.ae633919987c6p-26, 0x1.60e277ae07cecp-33,
+ -0x1.9e9540300a1p-41 },
+ .cos_poly = { /* Computed using Remez in [-pi/4, pi/4]. */
+ 0x1.555555555554cp-5, -0x1.6c16c16c1521fp-10,
+ 0x1.a01a019cbf62ap-16, -0x1.27e4f812b681ep-22,
+ 0x1.1ee9f152a57cdp-29, -0x1.8fb131098404bp-37 },
+ .range_val = 0x1p23, };
+
+static inline svbool_t
+check_ge_rangeval (svbool_t pg, svfloat64_t x, const struct sv_sincos_data *d)
+{
+ svbool_t in_bounds = svaclt (pg, x, d->range_val);
+ return svnot_z (pg, in_bounds);
+}
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate polynomials.
+ Largest observed error is for sin, 3.22 ULP:
+ v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
+ want -0x1.ffe9537d5dbb4p-3. */
+static inline svfloat64x2_t
+sv_sincos_inline (svbool_t pg, svfloat64_t x, const struct sv_sincos_data *d)
+{
+ /* q = nearest integer to 2 * x / pi. */
+ svfloat64_t q = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_pio2),
+ d->shift);
+ svint64_t n = svcvt_s64_x (pg, q);
+
+ /* Reduce x such that r is in [ -pi/4, pi/4 ]. */
+ svfloat64_t r = x;
+ r = svmls_x (pg, r, q, d->pio2[0]);
+ r = svmls_x (pg, r, q, d->pio2[1]);
+ r = svmls_x (pg, r, q, d->pio2[2]);
+
+ svfloat64_t r2 = svmul_x (pg, r, r), r3 = svmul_x (pg, r2, r),
+ r4 = svmul_x (pg, r2, r2);
+
+ /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2). */
+ svfloat64_t s = sv_pw_horner_6_f64_x (pg, r2, r4, d->sin_poly);
+ s = svmla_x (pg, r, r3, s);
+
+ /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2). */
+ svfloat64_t c = sv_pw_horner_5_f64_x (pg, r2, r4, d->cos_poly);
+ c = svmad_x (pg, c, r2, -0.5);
+ c = svmad_x (pg, c, r2, 1);
+
+ svuint64_t un = svreinterpret_u64 (n);
+ /* If odd quadrant, swap cos and sin. */
+ svbool_t swap = svcmpeq (pg, svlsl_x (pg, un, 63), 0);
+ svfloat64_t ss = svsel (swap, s, c);
+ svfloat64_t cc = svsel (swap, c, s);
+
+ /* Fix signs according to quadrant.
+ ss = asdouble(asuint64(ss) ^ ((n & 2) << 62))
+ cc = asdouble(asuint64(cc) & (((n + 1) & 2) << 62)). */
+ svuint64_t sin_sign = svlsl_x (pg, svand_x (pg, un, 2), 62);
+ svuint64_t cos_sign = svlsl_x (
+ pg, svand_x (pg, svreinterpret_u64 (svadd_x (pg, n, 1)), 2), 62);
+ ss = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ss), sin_sign));
+ cc = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (cc), cos_sign));
+
+ return svcreate2 (ss, cc);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sincosf_1u8.c b/contrib/arm-optimized-routines/pl/math/sv_sincosf_1u8.c
new file mode 100644
index 000000000000..c335de8d3dbb
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sincosf_1u8.c
@@ -0,0 +1,62 @@
+/*
+ * Single-precision vector sincos function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/* Define _GNU_SOURCE in order to include sincosf declaration. If building
+ pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
+ be linked against the scalar sincosf from math/. */
+#define _GNU_SOURCE
+#include <math.h>
+#undef _GNU_SOURCE
+
+#include "sv_sincosf_common.h"
+#include "sv_math.h"
+#include "pl_test.h"
+
+static void NOINLINE
+special_case (svfloat32_t x, svbool_t special, float *out_sin, float *out_cos)
+{
+ svbool_t p = svptrue_pat_b32 (SV_VL1);
+ for (int i = 0; i < svcntw (); i++)
+ {
+ if (svptest_any (special, p))
+ sincosf (svlastb (p, x), out_sin + i, out_cos + i);
+ p = svpnext_b32 (svptrue_b32 (), p);
+ }
+}
+
+/* Single-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate low-order
+ polynomials.
+ Worst-case error for sin is 1.67 ULP:
+ sv_sincosf_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
+ Worst-case error for cos is 1.81 ULP:
+ sv_sincosf_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6. */
+void
+_ZGVsMxvl4l4_sincosf (svfloat32_t x, float *out_sin, float *out_cos,
+ svbool_t pg)
+{
+ const struct sv_sincosf_data *d = ptr_barrier (&sv_sincosf_data);
+ svbool_t special = check_ge_rangeval (pg, x, d);
+
+ svfloat32x2_t sc = sv_sincosf_inline (pg, x, d);
+
+ svst1_f32 (pg, out_sin, svget2 (sc, 0));
+ svst1_f32 (pg, out_cos, svget2 (sc, 1));
+
+ if (unlikely (svptest_any (pg, special)))
+ special_case (x, special, out_sin, out_cos);
+}
+
+PL_TEST_ULP (_ZGVsMxv_sincosf_sin, 1.17)
+PL_TEST_ULP (_ZGVsMxv_sincosf_cos, 1.31)
+#define SV_SINCOSF_INTERVAL(lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVsMxv_sincosf_sin, lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVsMxv_sincosf_cos, lo, hi, n)
+SV_SINCOSF_INTERVAL (0, 0x1p20, 500000)
+SV_SINCOSF_INTERVAL (-0, -0x1p20, 500000)
+SV_SINCOSF_INTERVAL (0x1p20, inf, 10000)
+SV_SINCOSF_INTERVAL (-0x1p20, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sincosf_common.h b/contrib/arm-optimized-routines/pl/math/sv_sincosf_common.h
new file mode 100644
index 000000000000..714e996443b3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sincosf_common.h
@@ -0,0 +1,81 @@
+/*
+ * Core approximation for single-precision vector sincos
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+
+const static struct sv_sincosf_data
+{
+ float poly_sin[3], poly_cos[3], pio2[3], inv_pio2, shift, range_val;
+} sv_sincosf_data = {
+ .poly_sin = { /* Generated using Remez, odd coeffs only, in [-pi/4, pi/4]. */
+ -0x1.555546p-3, 0x1.11076p-7, -0x1.994eb4p-13 },
+ .poly_cos = { /* Generated using Remez, even coeffs only, in [-pi/4, pi/4]. */
+ 0x1.55554ap-5, -0x1.6c0c1ap-10, 0x1.99e0eep-16 },
+ .pio2 = { 0x1.921fb6p+0f, -0x1.777a5cp-25f, -0x1.ee59dap-50f },
+ .inv_pio2 = 0x1.45f306p-1f,
+ .shift = 0x1.8p23,
+ .range_val = 0x1p20
+};
+
+static inline svbool_t
+check_ge_rangeval (svbool_t pg, svfloat32_t x, const struct sv_sincosf_data *d)
+{
+ svbool_t in_bounds = svaclt (pg, x, d->range_val);
+ return svnot_z (pg, in_bounds);
+}
+
+/* Single-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate low-order
+ polynomials.
+ Worst-case error for sin is 1.67 ULP:
+ sv_sincosf_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
+ Worst-case error for cos is 1.81 ULP:
+ sv_sincosf_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6. */
+static inline svfloat32x2_t
+sv_sincosf_inline (svbool_t pg, svfloat32_t x, const struct sv_sincosf_data *d)
+{
+ /* n = rint ( x / (pi/2) ). */
+ svfloat32_t q = svmla_x (pg, sv_f32 (d->shift), x, d->inv_pio2);
+ q = svsub_x (pg, q, d->shift);
+ svint32_t n = svcvt_s32_x (pg, q);
+
+ /* Reduce x such that r is in [ -pi/4, pi/4 ]. */
+ svfloat32_t r = x;
+ r = svmls_x (pg, r, q, d->pio2[0]);
+ r = svmls_x (pg, r, q, d->pio2[1]);
+ r = svmls_x (pg, r, q, d->pio2[2]);
+
+ /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2). */
+ svfloat32_t r2 = svmul_x (pg, r, r), r3 = svmul_x (pg, r, r2);
+ svfloat32_t s = svmla_x (pg, sv_f32 (d->poly_sin[1]), r2, d->poly_sin[2]);
+ s = svmad_x (pg, r2, s, d->poly_sin[0]);
+ s = svmla_x (pg, r, r3, s);
+
+ /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2). */
+ svfloat32_t r4 = svmul_x (pg, r2, r2);
+ svfloat32_t p = svmla_x (pg, sv_f32 (d->poly_cos[1]), r2, d->poly_cos[2]);
+ svfloat32_t c = svmad_x (pg, sv_f32 (d->poly_cos[0]), r2, -0.5);
+ c = svmla_x (pg, c, r4, p);
+ c = svmad_x (pg, r2, c, 1);
+
+ svuint32_t un = svreinterpret_u32 (n);
+ /* If odd quadrant, swap cos and sin. */
+ svbool_t swap = svcmpeq (pg, svlsl_x (pg, un, 31), 0);
+ svfloat32_t ss = svsel (swap, s, c);
+ svfloat32_t cc = svsel (swap, c, s);
+
+ /* Fix signs according to quadrant.
+ ss = asfloat(asuint(ss) ^ ((n & 2) << 30))
+ cc = asfloat(asuint(cc) & (((n + 1) & 2) << 30)). */
+ svuint32_t sin_sign = svlsl_x (pg, svand_x (pg, un, 2), 30);
+ svuint32_t cos_sign = svlsl_x (
+ pg, svand_x (pg, svreinterpret_u32 (svadd_x (pg, n, 1)), 2), 30);
+ ss = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ss), sin_sign));
+ cc = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (cc), cos_sign));
+
+ return svcreate2 (ss, cc);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinf_1u9.c b/contrib/arm-optimized-routines/pl/math/sv_sinf_1u9.c
new file mode 100644
index 000000000000..675d7b2480f7
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sinf_1u9.c
@@ -0,0 +1,93 @@
+/*
+ * Single-precision SVE sin(x) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float poly[4];
+ /* Pi-related values to be loaded as one quad-word and used with
+ svmla_lane. */
+ float negpi1, negpi2, negpi3, invpi;
+ float shift;
+} data = {
+ .poly = {
+ /* Non-zero coefficients from the degree 9 Taylor series expansion of
+ sin. */
+ -0x1.555548p-3f, 0x1.110df4p-7f, -0x1.9f42eap-13f, 0x1.5b2e76p-19f
+ },
+ .negpi1 = -0x1.921fb6p+1f,
+ .negpi2 = 0x1.777a5cp-24f,
+ .negpi3 = 0x1.ee59dap-49f,
+ .invpi = 0x1.45f306p-2f,
+ .shift = 0x1.8p+23f
+};
+
+#define RangeVal 0x49800000 /* asuint32 (0x1p20f). */
+#define C(i) sv_f32 (d->poly[i])
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (sinf, x, y, cmp);
+}
+
+/* A fast SVE implementation of sinf.
+ Maximum error: 1.89 ULPs.
+ This maximum error is achieved at multiple values in [-2^18, 2^18]
+ but one example is:
+ SV_NAME_F1 (sin)(0x1.9247a4p+0) got 0x1.fffff6p-1 want 0x1.fffffap-1. */
+svfloat32_t SV_NAME_F1 (sin) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t sign
+ = sveor_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (ax));
+ svbool_t cmp = svcmpge (pg, svreinterpret_u32 (ax), RangeVal);
+
+ /* pi_vals are a quad-word of helper values - the first 3 elements contain
+ -pi in extended precision, the last contains 1 / pi. */
+ svfloat32_t pi_vals = svld1rq (svptrue_b32 (), &d->negpi1);
+
+ /* n = rint(|x|/pi). */
+ svfloat32_t n = svmla_lane (sv_f32 (d->shift), ax, pi_vals, 3);
+ svuint32_t odd = svlsl_x (pg, svreinterpret_u32 (n), 31);
+ n = svsub_x (pg, n, d->shift);
+
+ /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */
+ svfloat32_t r;
+ r = svmla_lane (ax, n, pi_vals, 0);
+ r = svmla_lane (r, n, pi_vals, 1);
+ r = svmla_lane (r, n, pi_vals, 2);
+
+ /* sin(r) approx using a degree 9 polynomial from the Taylor series
+ expansion. Note that only the odd terms of this are non-zero. */
+ svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t y;
+ y = svmla_x (pg, C (2), r2, C (3));
+ y = svmla_x (pg, C (1), r2, y);
+ y = svmla_x (pg, C (0), r2, y);
+ y = svmla_x (pg, r, r, svmul_x (pg, y, r2));
+
+ /* sign = y^sign^odd. */
+ sign = sveor_x (pg, sign, odd);
+
+ if (unlikely (svptest_any (pg, cmp)))
+ return special_case (x,
+ svreinterpret_f32 (sveor_x (
+ svnot_z (pg, cmp), svreinterpret_u32 (y), sign)),
+ cmp);
+ return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
+}
+
+PL_SIG (SV, F, 1, sin, -3.1, 3.1)
+PL_TEST_ULP (SV_NAME_F1 (sin), 1.40)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0, 0x1p23, 1000000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sin), 0x1p23, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinh_3u.c b/contrib/arm-optimized-routines/pl/math/sv_sinh_3u.c
new file mode 100644
index 000000000000..a01e19caecda
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sinh_3u.c
@@ -0,0 +1,103 @@
+/*
+ * Double-precision SVE sinh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64_t poly[11];
+ float64_t inv_ln2, m_ln2_hi, m_ln2_lo, shift;
+ uint64_t halff;
+ int64_t onef;
+ uint64_t large_bound;
+} data = {
+ /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
+ .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
+ 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
+ 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
+ 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
+ 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
+
+ .inv_ln2 = 0x1.71547652b82fep0,
+ .m_ln2_hi = -0x1.62e42fefa39efp-1,
+ .m_ln2_lo = -0x1.abc9e3b39803fp-56,
+ .shift = 0x1.8p52,
+
+ .halff = 0x3fe0000000000000,
+ .onef = 0x3ff0000000000000,
+ /* 2^9. expm1 helper overflows for large input. */
+ .large_bound = 0x4080000000000000,
+};
+
+static inline svfloat64_t
+expm1_inline (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Reduce argument:
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where i = round(x / ln2)
+ and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */
+ svfloat64_t j
+ = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
+ svint64_t i = svcvt_s64_x (pg, j);
+ svfloat64_t f = svmla_x (pg, x, j, d->m_ln2_hi);
+ f = svmla_x (pg, f, j, d->m_ln2_lo);
+ /* Approximate expm1(f) using polynomial. */
+ svfloat64_t f2 = svmul_x (pg, f, f);
+ svfloat64_t f4 = svmul_x (pg, f2, f2);
+ svfloat64_t f8 = svmul_x (pg, f4, f4);
+ svfloat64_t p
+ = svmla_x (pg, f, f2, sv_estrin_10_f64_x (pg, f, f2, f4, f8, d->poly));
+ /* t = 2^i. */
+ svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
+ /* expm1(x) ~= p * t + (t - 1). */
+ return svmla_x (pg, svsub_x (pg, t, 1.0), p, t);
+}
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svbool_t pg)
+{
+ return sv_call_f64 (sinh, x, x, pg);
+}
+
+/* Approximation for SVE double-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The greatest observed error is 2.57 ULP:
+ _ZGVsMxv_sinh (0x1.a008538399931p-2) got 0x1.ab929fc64bd66p-2
+ want 0x1.ab929fc64bd63p-2. */
+svfloat64_t SV_NAME_D1 (sinh) (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat64_t ax = svabs_x (pg, x);
+ svuint64_t sign
+ = sveor_x (pg, svreinterpret_u64 (x), svreinterpret_u64 (ax));
+ svfloat64_t halfsign = svreinterpret_f64 (svorr_x (pg, sign, d->halff));
+
+ svbool_t special = svcmpge (pg, svreinterpret_u64 (ax), d->large_bound);
+
+ /* Fall back to scalar variant for all lanes if any are special. */
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, pg);
+
+ /* Up to the point that expm1 overflows, we can use it to calculate sinh
+ using a slight rearrangement of the definition of sinh. This allows us to
+ retain acceptable accuracy for very small inputs. */
+ svfloat64_t t = expm1_inline (ax, pg);
+ t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
+ return svmul_x (pg, t, halfsign);
+}
+
+PL_SIG (SV, D, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_D1 (sinh), 2.08)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0, 0x1p-26, 1000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinh), 0x1p9, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinhf_2u3.c b/contrib/arm-optimized-routines/pl/math/sv_sinhf_2u3.c
new file mode 100644
index 000000000000..e34ecf378ad3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sinhf_2u3.c
@@ -0,0 +1,64 @@
+/*
+ * Single-precision SVE sinh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#include "sv_expm1f_inline.h"
+
+static const struct data
+{
+ struct sv_expm1f_data expm1f_consts;
+ uint32_t halff, large_bound;
+} data = {
+ .expm1f_consts = SV_EXPM1F_DATA,
+ .halff = 0x3f000000,
+ /* 0x1.61814ep+6, above which expm1f helper overflows. */
+ .large_bound = 0x42b0c0a7,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t pg)
+{
+ return sv_call_f32 (sinhf, x, y, pg);
+}
+
+/* Approximation for SVE single-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The maximum error is 2.26 ULP:
+ _ZGVsMxv_sinhf (0x1.e34a9ep-4) got 0x1.e469ep-4
+ want 0x1.e469e4p-4. */
+svfloat32_t SV_NAME_F1 (sinh) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t sign
+ = sveor_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (ax));
+ svfloat32_t halfsign = svreinterpret_f32 (svorr_x (pg, sign, d->halff));
+
+ svbool_t special = svcmpge (pg, svreinterpret_u32 (ax), d->large_bound);
+
+ /* Up to the point that expm1f overflows, we can use it to calculate sinhf
+ using a slight rearrangement of the definition of asinh. This allows us to
+ retain acceptable accuracy for very small inputs. */
+ svfloat32_t t = expm1f_inline (ax, pg, &d->expm1f_consts);
+ t = svadd_x (pg, t, svdiv_x (pg, t, svadd_x (pg, t, 1.0)));
+
+ /* Fall back to the scalar variant for any lanes which would cause
+ expm1f to overflow. */
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svmul_x (pg, t, halfsign), special);
+
+ return svmul_x (pg, t, halfsign);
+}
+
+PL_SIG (SV, F, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_F1 (sinh), 1.76)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0, 0x1.6a09e8p-32, 1000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x1.6a09e8p-32, 0x42b0c0a7, 100000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinpi_3u1.c b/contrib/arm-optimized-routines/pl/math/sv_sinpi_3u1.c
new file mode 100644
index 000000000000..c9f23da1b19b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sinpi_3u1.c
@@ -0,0 +1,57 @@
+/*
+ * Double-precision SVE sinpi(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f64.h"
+
+static const struct data
+{
+ double poly[10];
+} data = {
+ /* Polynomial coefficients generated using Remez algorithm,
+ see sinpi.sollya for details. */
+ .poly = { 0x1.921fb54442d184p1, -0x1.4abbce625be53p2, 0x1.466bc6775ab16p1,
+ -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
+ 0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16,
+ 0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 },
+};
+
+/* A fast SVE implementation of sinpi.
+ Maximum error 3.10 ULP:
+ _ZGVsMxv_sinpi(0x1.df1a14f1b235p-2) got 0x1.fd64f541606cp-1
+ want 0x1.fd64f541606c3p-1. */
+svfloat64_t SV_NAME_D1 (sinpi) (svfloat64_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* range reduction into -1/2 .. 1/2)
+ with n = rint(x) and r = r - n. */
+ svfloat64_t n = svrinta_x (pg, x);
+ svfloat64_t r = svsub_x (pg, x, n);
+
+ /* Result should be negated based on if n is odd or not. */
+ svuint64_t intn = svreinterpret_u64 (svcvt_s64_x (pg, n));
+ svuint64_t sign = svlsl_z (pg, intn, 63);
+
+ /* y = sin(r). */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t r4 = svmul_x (pg, r2, r2);
+ svfloat64_t y = sv_pw_horner_9_f64_x (pg, r2, r4, d->poly);
+ y = svmul_x (pg, y, r);
+
+ return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+}
+
+PL_SIG (SV, D, 1, sinpi, -0.9, 0.9)
+PL_TEST_ULP (SV_NAME_D1 (sinpi), 2.61)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (sinpi), 0x1p51, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_sinpif_2u5.c b/contrib/arm-optimized-routines/pl/math/sv_sinpif_2u5.c
new file mode 100644
index 000000000000..ac3f924bed68
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_sinpif_2u5.c
@@ -0,0 +1,53 @@
+/*
+ * Single-precision SVE sinpi(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_sve_f32.h"
+
+static const struct data
+{
+ float poly[6];
+} data = {
+ /* Taylor series coefficents for sin(pi * x). */
+ .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
+ 0x1.50783p-4f, -0x1.e30750p-8f },
+};
+
+/* A fast SVE implementation of sinpif.
+ Maximum error 2.48 ULP:
+ _ZGVsMxv_sinpif(0x1.d062b6p-2) got 0x1.fa8c06p-1
+ want 0x1.fa8c02p-1. */
+svfloat32_t SV_NAME_F1 (sinpi) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* range reduction into -1/2 .. 1/2
+ with n = rint(x) and r = r - n. */
+ svfloat32_t n = svrinta_x (pg, x);
+ svfloat32_t r = svsub_x (pg, x, n);
+
+ /* Result should be negated based on if n is odd or not. */
+ svuint32_t intn = svreinterpret_u32 (svcvt_s32_x (pg, n));
+ svuint32_t sign = svlsl_z (pg, intn, 31);
+
+ /* y = sin(r). */
+ svfloat32_t r2 = svmul_x (pg, r, r);
+ svfloat32_t y = sv_horner_5_f32_x (pg, r2, d->poly);
+ y = svmul_x (pg, y, r);
+
+ return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
+}
+
+PL_SIG (SV, F, 1, sinpi, -0.9, 0.9)
+PL_TEST_ULP (SV_NAME_F1 (sinpi), 1.99)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0.5, 0x1p22f, 10000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (sinpi), 0x1p22f, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_tan_3u5.c b/contrib/arm-optimized-routines/pl/math/sv_tan_3u5.c
new file mode 100644
index 000000000000..746396e98a10
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_tan_3u5.c
@@ -0,0 +1,99 @@
+/*
+ * Double-precision SVE tan(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ double poly[9];
+ double half_pi_hi, half_pi_lo, inv_half_pi, range_val, shift;
+} data = {
+ /* Polynomial generated with FPMinimax. */
+ .poly = { 0x1.5555555555556p-2, 0x1.1111111110a63p-3, 0x1.ba1ba1bb46414p-5,
+ 0x1.664f47e5b5445p-6, 0x1.226e5e5ecdfa3p-7, 0x1.d6c7ddbf87047p-9,
+ 0x1.7ea75d05b583ep-10, 0x1.289f22964a03cp-11,
+ 0x1.4e4fd14147622p-12, },
+ .half_pi_hi = 0x1.921fb54442d18p0,
+ .half_pi_lo = 0x1.1a62633145c07p-54,
+ .inv_half_pi = 0x1.45f306dc9c883p-1,
+ .range_val = 0x1p23,
+ .shift = 0x1.8p52,
+};
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (tan, x, y, special);
+}
+
+/* Vector approximation for double-precision tan.
+ Maximum measured error is 3.48 ULP:
+ _ZGVsMxv_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
+ want -0x1.f6ccd8ecf7deap+37. */
+svfloat64_t SV_NAME_D1 (tan) (svfloat64_t x, svbool_t pg)
+{
+ const struct data *dat = ptr_barrier (&data);
+
+ /* Invert condition to catch NaNs and Infs as well as large values. */
+ svbool_t special = svnot_z (pg, svaclt (pg, x, dat->range_val));
+
+ /* q = nearest integer to 2 * x / pi. */
+ svfloat64_t shift = sv_f64 (dat->shift);
+ svfloat64_t q = svmla_x (pg, shift, x, dat->inv_half_pi);
+ q = svsub_x (pg, q, shift);
+ svint64_t qi = svcvt_s64_x (pg, q);
+
+ /* Use q to reduce x to r in [-pi/4, pi/4], by:
+ r = x - q * pi/2, in extended precision. */
+ svfloat64_t r = x;
+ svfloat64_t half_pi = svld1rq (svptrue_b64 (), &dat->half_pi_hi);
+ r = svmls_lane (r, q, half_pi, 0);
+ r = svmls_lane (r, q, half_pi, 1);
+ /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
+ formula. */
+ r = svmul_x (pg, r, 0.5);
+
+ /* Approximate tan(r) using order 8 polynomial.
+ tan(x) is odd, so polynomial has the form:
+ tan(x) ~= x + C0 * x^3 + C1 * x^5 + C3 * x^7 + ...
+ Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ...
+ Then compute the approximation by:
+ tan(r) ~= r + r^3 * (C0 + r^2 * P(r)). */
+ svfloat64_t r2 = svmul_x (pg, r, r);
+ svfloat64_t r4 = svmul_x (pg, r2, r2);
+ svfloat64_t r8 = svmul_x (pg, r4, r4);
+ /* Use offset version coeff array by 1 to evaluate from C1 onwards. */
+ svfloat64_t p = sv_estrin_7_f64_x (pg, r2, r4, r8, dat->poly + 1);
+ p = svmad_x (pg, p, r2, dat->poly[0]);
+ p = svmla_x (pg, r, r2, svmul_x (pg, p, r));
+
+ /* Recombination uses double-angle formula:
+ tan(2x) = 2 * tan(x) / (1 - (tan(x))^2)
+ and reciprocity around pi/2:
+ tan(x) = 1 / (tan(pi/2 - x))
+ to assemble result using change-of-sign and conditional selection of
+ numerator/denominator dependent on odd/even-ness of q (hence quadrant). */
+ svbool_t use_recip
+ = svcmpeq (pg, svand_x (pg, svreinterpret_u64 (qi), 1), 0);
+
+ svfloat64_t n = svmad_x (pg, p, p, -1);
+ svfloat64_t d = svmul_x (pg, p, 2);
+ svfloat64_t swap = n;
+ n = svneg_m (n, use_recip, d);
+ d = svsel (use_recip, swap, d);
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svdiv_x (svnot_z (pg, special), n, d), special);
+ return svdiv_x (pg, n, d);
+}
+
+PL_SIG (SV, D, 1, tan, -3.1, 3.1)
+PL_TEST_ULP (SV_NAME_D1 (tan), 2.99)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0, 0x1p23, 500000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tan), 0x1p23, inf, 5000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_tanf_3u5.c b/contrib/arm-optimized-routines/pl/math/sv_tanf_3u5.c
new file mode 100644
index 000000000000..6b8cd1e64b44
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_tanf_3u5.c
@@ -0,0 +1,119 @@
+/*
+ * Single-precision vector tan(x) function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float pio2_1, pio2_2, pio2_3, invpio2;
+ float c1, c3, c5;
+ float c0, c2, c4, range_val, shift;
+} data = {
+ /* Coefficients generated using:
+ poly = fpminimax((tan(sqrt(x))-sqrt(x))/x^(3/2),
+ deg,
+ [|single ...|],
+ [a*a;b*b]);
+ optimize relative error
+ final prec : 23 bits
+ deg : 5
+ a : 0x1p-126 ^ 2
+ b : ((pi) / 0x1p2) ^ 2
+ dirty rel error: 0x1.f7c2e4p-25
+ dirty abs error: 0x1.f7c2ecp-25. */
+ .c0 = 0x1.55555p-2, .c1 = 0x1.11166p-3,
+ .c2 = 0x1.b88a78p-5, .c3 = 0x1.7b5756p-6,
+ .c4 = 0x1.4ef4cep-8, .c5 = 0x1.0e1e74p-7,
+
+ .pio2_1 = 0x1.921fb6p+0f, .pio2_2 = -0x1.777a5cp-25f,
+ .pio2_3 = -0x1.ee59dap-50f, .invpio2 = 0x1.45f306p-1f,
+ .range_val = 0x1p15f, .shift = 0x1.8p+23f
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
+{
+ return sv_call_f32 (tanf, x, y, cmp);
+}
+
+/* Fast implementation of SVE tanf.
+ Maximum error is 3.45 ULP:
+ SV_NAME_F1 (tan)(-0x1.e5f0cap+13) got 0x1.ff9856p-1
+ want 0x1.ff9850p-1. */
+svfloat32_t SV_NAME_F1 (tan) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Determine whether input is too large to perform fast regression. */
+ svbool_t cmp = svacge (pg, x, d->range_val);
+
+ svfloat32_t odd_coeffs = svld1rq (svptrue_b32 (), &d->c1);
+ svfloat32_t pi_vals = svld1rq (svptrue_b32 (), &d->pio2_1);
+
+ /* n = rint(x/(pi/2)). */
+ svfloat32_t q = svmla_lane (sv_f32 (d->shift), x, pi_vals, 3);
+ svfloat32_t n = svsub_x (pg, q, d->shift);
+ /* n is already a signed integer, simply convert it. */
+ svint32_t in = svcvt_s32_x (pg, n);
+ /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */
+ svint32_t alt = svand_x (pg, in, 1);
+ svbool_t pred_alt = svcmpne (pg, alt, 0);
+
+ /* r = x - n * (pi/2) (range reduction into 0 .. pi/4). */
+ svfloat32_t r;
+ r = svmls_lane (x, n, pi_vals, 0);
+ r = svmls_lane (r, n, pi_vals, 1);
+ r = svmls_lane (r, n, pi_vals, 2);
+
+ /* If x lives in an interval, where |tan(x)|
+ - is finite, then use a polynomial approximation of the form
+ tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
+ - grows to infinity then use symmetries of tangent and the identity
+ tan(r) = cotan(pi/2 - r) to express tan(x) as 1/tan(-r). Finally, use
+ the same polynomial approximation of tan as above. */
+
+ /* Perform additional reduction if required. */
+ svfloat32_t z = svneg_m (r, pred_alt, r);
+
+ /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4],
+ using Estrin on z^2. */
+ svfloat32_t z2 = svmul_x (pg, z, z);
+ svfloat32_t p01 = svmla_lane (sv_f32 (d->c0), z2, odd_coeffs, 0);
+ svfloat32_t p23 = svmla_lane (sv_f32 (d->c2), z2, odd_coeffs, 1);
+ svfloat32_t p45 = svmla_lane (sv_f32 (d->c4), z2, odd_coeffs, 2);
+
+ svfloat32_t z4 = svmul_x (pg, z2, z2);
+ svfloat32_t p = svmla_x (pg, p01, z4, p23);
+
+ svfloat32_t z8 = svmul_x (pg, z4, z4);
+ p = svmla_x (pg, p, z8, p45);
+
+ svfloat32_t y = svmla_x (pg, z, p, svmul_x (pg, z, z2));
+
+ /* Transform result back, if necessary. */
+ svfloat32_t inv_y = svdivr_x (pg, y, 1.0f);
+
+ /* No need to pass pg to specialcase here since cmp is a strict subset,
+ guaranteed by the cmpge above. */
+ if (unlikely (svptest_any (pg, cmp)))
+ return special_case (x, svsel (pred_alt, inv_y, y), cmp);
+
+ return svsel (pred_alt, inv_y, y);
+}
+
+PL_SIG (SV, F, 1, tan, -3.1, 3.1)
+PL_TEST_ULP (SV_NAME_F1 (tan), 2.96)
+PL_TEST_INTERVAL (SV_NAME_F1 (tan), -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p-23, 0.7, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0.7, 1.5, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (tan), 1.5, 100, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (tan), 100, 0x1p17, 50000)
+PL_TEST_INTERVAL (SV_NAME_F1 (tan), 0x1p17, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_tanh_3u.c b/contrib/arm-optimized-routines/pl/math/sv_tanh_3u.c
new file mode 100644
index 000000000000..f54139f1ddbc
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_tanh_3u.c
@@ -0,0 +1,96 @@
+/*
+ * Double-precision SVE tanh(x) function.
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64_t poly[11];
+ float64_t inv_ln2, ln2_hi, ln2_lo, shift;
+ uint64_t thresh, tiny_bound;
+} data = {
+ /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
+ .poly = { 0x1p-1, 0x1.5555555555559p-3, 0x1.555555555554bp-5,
+ 0x1.111111110f663p-7, 0x1.6c16c16c1b5f3p-10,
+ 0x1.a01a01affa35dp-13, 0x1.a01a018b4ecbbp-16,
+ 0x1.71ddf82db5bb4p-19, 0x1.27e517fc0d54bp-22,
+ 0x1.af5eedae67435p-26, 0x1.1f143d060a28ap-29, },
+
+ .inv_ln2 = 0x1.71547652b82fep0,
+ .ln2_hi = -0x1.62e42fefa39efp-1,
+ .ln2_lo = -0x1.abc9e3b39803fp-56,
+ .shift = 0x1.8p52,
+
+ .tiny_bound = 0x3e40000000000000, /* asuint64 (0x1p-27). */
+ /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound). */
+ .thresh = 0x01f241bf835f9d5f,
+};
+
+static inline svfloat64_t
+expm1_inline (svfloat64_t x, const svbool_t pg, const struct data *d)
+{
+ /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
+ the scalar variant of tanh. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ svfloat64_t j
+ = svsub_x (pg, svmla_x (pg, sv_f64 (d->shift), x, d->inv_ln2), d->shift);
+ svint64_t i = svcvt_s64_x (pg, j);
+ svfloat64_t f = svmla_x (pg, x, j, d->ln2_hi);
+ f = svmla_x (pg, f, j, d->ln2_lo);
+
+ /* Approximate expm1(f) using polynomial. */
+ svfloat64_t f2 = svmul_x (pg, f, f);
+ svfloat64_t f4 = svmul_x (pg, f2, f2);
+ svfloat64_t p = svmla_x (
+ pg, f, f2,
+ sv_estrin_10_f64_x (pg, f, f2, f4, svmul_x (pg, f4, f4), d->poly));
+
+ /* t = 2 ^ i. */
+ svfloat64_t t = svscale_x (pg, sv_f64 (1), i);
+ /* expm1(x) = p * t + (t - 1). */
+ return svmla_x (pg, svsub_x (pg, t, 1), p, t);
+}
+
+static svfloat64_t NOINLINE
+special_case (svfloat64_t x, svfloat64_t y, svbool_t special)
+{
+ return sv_call_f64 (tanh, x, y, special);
+}
+
+/* SVE approximation for double-precision tanh(x), using a simplified
+ version of expm1. The greatest observed error is 2.77 ULP:
+ _ZGVsMxv_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
+ want -0x1.bd6a21a163624p-3. */
+svfloat64_t SV_NAME_D1 (tanh) (svfloat64_t x, svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svuint64_t ia = svreinterpret_u64 (svabs_x (pg, x));
+
+ /* Trigger special-cases for tiny, boring and infinity/NaN. */
+ svbool_t special = svcmpgt (pg, svsub_x (pg, ia, d->tiny_bound), d->thresh);
+
+ svfloat64_t u = svadd_x (pg, x, x);
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ svfloat64_t q = expm1_inline (u, pg, d);
+ svfloat64_t qp2 = svadd_x (pg, q, 2);
+
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svdiv_x (pg, q, qp2), special);
+ return svdiv_x (pg, q, qp2);
+}
+
+PL_SIG (SV, D, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_D1 (tanh), 2.27)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0, 0x1p-27, 5000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
+PL_TEST_SYM_INTERVAL (SV_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/sv_tanhf_2u6.c b/contrib/arm-optimized-routines/pl/math/sv_tanhf_2u6.c
new file mode 100644
index 000000000000..988a56de0b2e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/sv_tanhf_2u6.c
@@ -0,0 +1,59 @@
+/*
+ * Single-precision SVE tanh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "sv_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#include "sv_expm1f_inline.h"
+
+static const struct data
+{
+ struct sv_expm1f_data expm1f_consts;
+ uint32_t boring_bound, onef;
+} data = {
+ .expm1f_consts = SV_EXPM1F_DATA,
+ /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */
+ .boring_bound = 0x41102cb3,
+ .onef = 0x3f800000,
+};
+
+static svfloat32_t NOINLINE
+special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
+{
+ return sv_call_f32 (tanhf, x, y, special);
+}
+
+/* Approximation for single-precision SVE tanh(x), using a simplified
+ version of expm1f. The maximum error is 2.57 ULP:
+ _ZGVsMxv_tanhf (0x1.fc1832p-5) got 0x1.fb71a4p-5
+ want 0x1.fb71aap-5. */
+svfloat32_t SV_NAME_F1 (tanh) (svfloat32_t x, const svbool_t pg)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ svfloat32_t ax = svabs_x (pg, x);
+ svuint32_t iax = svreinterpret_u32 (ax);
+ svuint32_t sign = sveor_x (pg, svreinterpret_u32 (x), iax);
+ svbool_t is_boring = svcmpgt (pg, iax, d->boring_bound);
+ svfloat32_t boring = svreinterpret_f32 (svorr_x (pg, sign, d->onef));
+
+ svbool_t special = svcmpgt (pg, iax, 0x7f800000);
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ svfloat32_t q = expm1f_inline (svmul_x (pg, x, 2.0), pg, &d->expm1f_consts);
+ svfloat32_t y = svdiv_x (pg, q, svadd_x (pg, q, 2.0));
+ if (unlikely (svptest_any (pg, special)))
+ return special_case (x, svsel_f32 (is_boring, boring, y), special);
+ return svsel_f32 (is_boring, boring, y);
+}
+
+PL_SIG (SV, F, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (SV_NAME_F1 (tanh), 2.07)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0, 0x1p-23, 1000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0x1p-23, 0x1.205966p+3, 100000)
+PL_TEST_SYM_INTERVAL (SV_NAME_F1 (tanh), 0x1.205966p+3, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/tanf_3u3.c b/contrib/arm-optimized-routines/pl/math/tanf_3u3.c
new file mode 100644
index 000000000000..30c86fa89730
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tanf_3u3.c
@@ -0,0 +1,193 @@
+/*
+ * Single-precision scalar tan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_scalar_f32.h"
+
+/* Useful constants. */
+#define NegPio2_1 (-0x1.921fb6p+0f)
+#define NegPio2_2 (0x1.777a5cp-25f)
+#define NegPio2_3 (0x1.ee59dap-50f)
+/* Reduced from 0x1p20 to 0x1p17 to ensure 3.5ulps. */
+#define RangeVal (0x1p17f)
+#define InvPio2 ((0x1.45f306p-1f))
+#define Shift (0x1.8p+23f)
+#define AbsMask (0x7fffffff)
+#define Pio4 (0x1.921fb6p-1)
+/* 2PI * 2^-64. */
+#define Pio2p63 (0x1.921FB54442D18p-62)
+
+static inline float
+eval_P (float z)
+{
+ return pw_horner_5_f32 (z, z * z, __tanf_poly_data.poly_tan);
+}
+
+static inline float
+eval_Q (float z)
+{
+ return pairwise_poly_3_f32 (z, z * z, __tanf_poly_data.poly_cotan);
+}
+
+/* Reduction of the input argument x using Cody-Waite approach, such that x = r
+ + n * pi/2 with r lives in [-pi/4, pi/4] and n is a signed integer. */
+static inline float
+reduce (float x, int32_t *in)
+{
+ /* n = rint(x/(pi/2)). */
+ float r = x;
+ float q = fmaf (InvPio2, r, Shift);
+ float n = q - Shift;
+ /* There is no rounding here, n is representable by a signed integer. */
+ *in = (int32_t) n;
+ /* r = x - n * (pi/2) (range reduction into -pi/4 .. pi/4). */
+ r = fmaf (NegPio2_1, n, r);
+ r = fmaf (NegPio2_2, n, r);
+ r = fmaf (NegPio2_3, n, r);
+ return r;
+}
+
+/* Table with 4/PI to 192 bit precision. To avoid unaligned accesses
+ only 8 new bits are added per entry, making the table 4 times larger. */
+static const uint32_t __inv_pio4[24]
+ = {0x000000a2, 0x0000a2f9, 0x00a2f983, 0xa2f9836e, 0xf9836e4e, 0x836e4e44,
+ 0x6e4e4415, 0x4e441529, 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
+ 0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, 0x34ddc0db, 0xddc0db62,
+ 0xc0db6295, 0xdb629599, 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041};
+
+/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
+ XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
+ Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
+ Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
+ multiply computes the exact 2.62-bit fixed-point modulo. Since the result
+ can have at most 29 leading zeros after the binary point, the double
+ precision result is accurate to 33 bits. */
+static inline double
+reduce_large (uint32_t xi, int *np)
+{
+ const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
+ int shift = (xi >> 23) & 7;
+ uint64_t n, res0, res1, res2;
+
+ xi = (xi & 0xffffff) | 0x800000;
+ xi <<= shift;
+
+ res0 = xi * arr[0];
+ res1 = (uint64_t) xi * arr[4];
+ res2 = (uint64_t) xi * arr[8];
+ res0 = (res2 >> 32) | (res0 << 32);
+ res0 += res1;
+
+ n = (res0 + (1ULL << 61)) >> 62;
+ res0 -= n << 62;
+ double x = (int64_t) res0;
+ *np = n;
+ return x * Pio2p63;
+}
+
+/* Top 12 bits of the float representation with the sign bit cleared. */
+static inline uint32_t
+top12 (float x)
+{
+ return (asuint (x) >> 20);
+}
+
+/* Fast single-precision tan implementation.
+ Maximum ULP error: 3.293ulps.
+ tanf(0x1.c849eap+16) got -0x1.fe8d98p-1 want -0x1.fe8d9ep-1. */
+float
+tanf (float x)
+{
+ /* Get top words. */
+ uint32_t ix = asuint (x);
+ uint32_t ia = ix & AbsMask;
+ uint32_t ia12 = ia >> 20;
+
+ /* Dispatch between no reduction (small numbers), fast reduction and
+ slow large numbers reduction. The reduction step determines r float
+ (|r| < pi/4) and n signed integer such that x = r + n * pi/2. */
+ int32_t n;
+ float r;
+ if (ia12 < top12 (Pio4))
+ {
+ /* Optimize small values. */
+ if (unlikely (ia12 < top12 (0x1p-12f)))
+ {
+ if (unlikely (ia12 < top12 (0x1p-126f)))
+ /* Force underflow for tiny x. */
+ force_eval_float (x * x);
+ return x;
+ }
+
+ /* tan (x) ~= x + x^3 * P(x^2). */
+ float x2 = x * x;
+ float y = eval_P (x2);
+ return fmaf (x2, x * y, x);
+ }
+ /* Similar to other trigonometric routines, fast inaccurate reduction is
+ performed for values of x from pi/4 up to RangeVal. In order to keep errors
+ below 3.5ulps, we set the value of RangeVal to 2^17. This might differ for
+ other trigonometric routines. Above this value more advanced but slower
+ reduction techniques need to be implemented to reach a similar accuracy.
+ */
+ else if (ia12 < top12 (RangeVal))
+ {
+ /* Fast inaccurate reduction. */
+ r = reduce (x, &n);
+ }
+ else if (ia12 < 0x7f8)
+ {
+ /* Slow accurate reduction. */
+ uint32_t sign = ix & ~AbsMask;
+ double dar = reduce_large (ia, &n);
+ float ar = (float) dar;
+ r = asfloat (asuint (ar) ^ sign);
+ }
+ else
+ {
+ /* tan(Inf or NaN) is NaN. */
+ return __math_invalidf (x);
+ }
+
+ /* If x lives in an interval where |tan(x)|
+ - is finite then use an approximation of tangent in the form
+ tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
+ - grows to infinity then use an approximation of cotangent in the form
+ cotan(z) ~ 1/z + z * Q(z^2), where the reciprocal can be computed early.
+ Using symmetries of tangent and the identity tan(r) = cotan(pi/2 - r),
+ we only need to change the sign of r to obtain tan(x) from cotan(r).
+ This 2-interval approach requires 2 different sets of coefficients P and
+ Q, where Q is a lower order polynomial than P. */
+
+ /* Determine if x lives in an interval where |tan(x)| grows to infinity. */
+ uint32_t alt = (uint32_t) n & 1;
+
+ /* Perform additional reduction if required. */
+ float z = alt ? -r : r;
+
+ /* Prepare backward transformation. */
+ float z2 = r * r;
+ float offset = alt ? 1.0f / z : z;
+ float scale = alt ? z : z * z2;
+
+ /* Evaluate polynomial approximation of tan or cotan. */
+ float p = alt ? eval_Q (z2) : eval_P (z2);
+
+ /* A unified way of assembling the result on both interval types. */
+ return fmaf (scale, p, offset);
+}
+
+PL_SIG (S, F, 1, tan, -3.1, 3.1)
+PL_TEST_ULP (tanf, 2.80)
+PL_TEST_INTERVAL (tanf, 0, 0xffff0000, 10000)
+PL_TEST_SYM_INTERVAL (tanf, 0x1p-127, 0x1p-14, 50000)
+PL_TEST_SYM_INTERVAL (tanf, 0x1p-14, 0.7, 50000)
+PL_TEST_SYM_INTERVAL (tanf, 0.7, 1.5, 50000)
+PL_TEST_SYM_INTERVAL (tanf, 1.5, 0x1p17, 50000)
+PL_TEST_SYM_INTERVAL (tanf, 0x1p17, 0x1p54, 50000)
+PL_TEST_SYM_INTERVAL (tanf, 0x1p54, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/tanf_data.c b/contrib/arm-optimized-routines/pl/math/tanf_data.c
new file mode 100644
index 000000000000..a6b9d512eed2
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tanf_data.c
@@ -0,0 +1,45 @@
+/*
+ * Data used in single-precision tan(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct tanf_poly_data __tanf_poly_data = {
+.poly_tan = {
+/* Coefficients generated using:
+ poly = fpminimax((tan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a*a;b*b]);
+ optimize relative error
+ final prec : 23 bits
+ deg : 5
+ a : 0x1p-126 ^ 2
+ b : ((pi) / 0x1p2) ^ 2
+ dirty rel error: 0x1.f7c2e4p-25
+ dirty abs error: 0x1.f7c2ecp-25. */
+0x1.55555p-2,
+0x1.11166p-3,
+0x1.b88a78p-5,
+0x1.7b5756p-6,
+0x1.4ef4cep-8,
+0x1.0e1e74p-7
+},
+.poly_cotan = {
+/* Coefficients generated using:
+ fpminimax(f(x) = (0x1p0 / tan(sqrt(x)) - 0x1p0 / sqrt(x)) / sqrt(x), deg, [|dtype ...|], [a;b])
+ optimize a single polynomial
+ optimize absolute error
+ final prec : 23 bits
+ working prec : 128 bits
+ deg : 3
+ a : 0x1p-126
+ b : (pi) / 0x1p2
+ dirty rel error : 0x1.81298cp-25
+ dirty abs error : 0x1.a8acf4p-25. */
+-0x1.55555p-2, /* -0.33333325. */
+-0x1.6c23e4p-6, /* -2.2225354e-2. */
+-0x1.12dbap-9, /* -2.0969994e-3. */
+-0x1.05a1c2p-12, /* -2.495116e-4. */
+}
+};
diff --git a/contrib/arm-optimized-routines/pl/math/tanh_3u.c b/contrib/arm-optimized-routines/pl/math/tanh_3u.c
new file mode 100644
index 000000000000..86f2904afc32
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tanh_3u.c
@@ -0,0 +1,78 @@
+/*
+ * Double-precision tanh(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+#include "poly_scalar_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define AbsMask 0x7fffffffffffffff
+#define InvLn2 0x1.71547652b82fep0
+#define Ln2hi 0x1.62e42fefa39efp-1
+#define Ln2lo 0x1.abc9e3b39803fp-56
+#define Shift 0x1.8p52
+
+#define BoringBound 0x403241bf835f9d5f /* asuint64 (0x1.241bf835f9d5fp+4). */
+#define TinyBound 0x3e40000000000000 /* asuint64 (0x1p-27). */
+#define One 0x3ff0000000000000
+
+static inline double
+expm1_inline (double x)
+{
+ /* Helper routine for calculating exp(x) - 1. Copied from expm1_2u5.c, with
+ several simplifications:
+ - No special-case handling for tiny or special values.
+ - Simpler combination of p and t in final stage of the algorithm.
+ - Use shift-and-add instead of ldexp to calculate t. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ double j = fma (InvLn2, x, Shift) - Shift;
+ int64_t i = j;
+ double f = fma (j, -Ln2hi, x);
+ f = fma (j, -Ln2lo, f);
+
+ /* Approximate expm1(f) using polynomial. */
+ double f2 = f * f;
+ double f4 = f2 * f2;
+ double p = fma (f2, estrin_10_f64 (f, f2, f4, f4 * f4, __expm1_poly), f);
+
+ /* t = 2 ^ i. */
+ double t = asdouble ((uint64_t) (i + 1023) << 52);
+ /* expm1(x) = p * t + (t - 1). */
+ return fma (p, t, t - 1);
+}
+
+/* Approximation for double-precision tanh(x), using a simplified version of
+ expm1. The greatest observed error is 2.77 ULP:
+ tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
+ want -0x1.bd6a21a163624p-3. */
+double
+tanh (double x)
+{
+ uint64_t ix = asuint64 (x);
+ uint64_t ia = ix & AbsMask;
+ uint64_t sign = ix & ~AbsMask;
+
+ if (unlikely (ia > BoringBound))
+ {
+ if (ia > 0x7ff0000000000000)
+ return __math_invalid (x);
+ return asdouble (One | sign);
+ }
+
+ if (unlikely (ia < TinyBound))
+ return x;
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ double q = expm1_inline (2 * x);
+ return q / (q + 2);
+}
+
+PL_SIG (S, D, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (tanh, 2.27)
+PL_TEST_SYM_INTERVAL (tanh, 0, TinyBound, 1000)
+PL_TEST_SYM_INTERVAL (tanh, TinyBound, BoringBound, 100000)
+PL_TEST_SYM_INTERVAL (tanh, BoringBound, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/tanhf_2u6.c b/contrib/arm-optimized-routines/pl/math/tanhf_2u6.c
new file mode 100644
index 000000000000..93ea3cf5d865
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tanhf_2u6.c
@@ -0,0 +1,88 @@
+/*
+ * Single-precision tanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "math_config.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define BoringBound \
+ 0x41102cb3 /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for \
+ negative). */
+#define AbsMask 0x7fffffff
+#define One 0x3f800000
+
+#define Shift (0x1.8p23f)
+#define InvLn2 (0x1.715476p+0f)
+#define Ln2hi (0x1.62e4p-1f)
+#define Ln2lo (0x1.7f7d1cp-20f)
+
+#define C(i) __expm1f_poly[i]
+
+static inline float
+expm1f_inline (float x)
+{
+ /* Helper routine for calculating exp(x) - 1.
+ Copied from expm1f_1u6.c, with several simplifications:
+ - No special-case handling for tiny or special values, instead return early
+ from the main routine.
+ - No special handling for large values:
+ - No early return for infinity.
+ - Simpler combination of p and t in final stage of algorithm.
+ - |i| < 27, so can calculate t by simpler shift-and-add, instead of
+ ldexpf (same as vector algorithm). */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ float j = fmaf (InvLn2, x, Shift) - Shift;
+ int32_t i = j;
+ float f = fmaf (j, -Ln2hi, x);
+ f = fmaf (j, -Ln2lo, f);
+
+ /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
+ Uses Estrin scheme, where the main expm1f routine uses Horner. */
+ float f2 = f * f;
+ float p_01 = fmaf (f, C (1), C (0));
+ float p_23 = fmaf (f, C (3), C (2));
+ float p = fmaf (f2, p_23, p_01);
+ p = fmaf (f2 * f2, C (4), p);
+ p = fmaf (f2, p, f);
+
+ /* t = 2^i. */
+ float t = asfloat ((uint32_t) (i + 127) << 23);
+ /* expm1(x) ~= p * t + (t - 1). */
+ return fmaf (p, t, t - 1);
+}
+
+/* Approximation for single-precision tanh(x), using a simplified version of
+ expm1f. The maximum error is 2.58 ULP:
+ tanhf(0x1.fa5eep-5) got 0x1.f9ba02p-5
+ want 0x1.f9ba08p-5. */
+float
+tanhf (float x)
+{
+ uint32_t ix = asuint (x);
+ uint32_t iax = ix & AbsMask;
+ uint32_t sign = ix & ~AbsMask;
+
+ if (unlikely (iax > BoringBound))
+ {
+ if (iax > 0x7f800000)
+ return __math_invalidf (x);
+ return asfloat (One | sign);
+ }
+
+ if (unlikely (iax < 0x34000000))
+ return x;
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ float q = expm1f_inline (2 * x);
+ return q / (q + 2);
+}
+
+PL_SIG (S, F, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (tanhf, 2.09)
+PL_TEST_SYM_INTERVAL (tanhf, 0, 0x1p-23, 1000)
+PL_TEST_SYM_INTERVAL (tanhf, 0x1p-23, 0x1.205966p+3, 100000)
+PL_TEST_SYM_INTERVAL (tanhf, 0x1.205966p+3, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/test/mathbench_funcs.h b/contrib/arm-optimized-routines/pl/math/test/mathbench_funcs.h
new file mode 100644
index 000000000000..f2710a979d40
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/mathbench_funcs.h
@@ -0,0 +1,87 @@
+// clang-format off
+/*
+ * Function entries for mathbench.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#define _ZSF1(fun, a, b) F(fun##f, a, b)
+#define _ZSD1(f, a, b) D(f, a, b)
+
+#if defined(__vpcs) && __aarch64__
+
+#define _ZVF1(fun, a, b) VNF(_ZGVnN4v_##fun##f, a, b)
+#define _ZVD1(f, a, b) VND(_ZGVnN2v_##f, a, b)
+
+#else
+
+#define _ZVF1(f, a, b)
+#define _ZVD1(f, a, b)
+
+#endif
+
+#if WANT_SVE_MATH
+
+#define _ZSVF1(fun, a, b) SVF(_ZGVsMxv_##fun##f, a, b)
+#define _ZSVD1(f, a, b) SVD(_ZGVsMxv_##f, a, b)
+
+#else
+
+#define _ZSVF1(f, a, b)
+#define _ZSVD1(f, a, b)
+
+#endif
+
+/* No auto-generated wrappers for binary functions - they have be
+ manually defined in mathbench_wrappers.h. We have to define silent
+ macros for them anyway as they will be emitted by PL_SIG. */
+#define _ZSF2(...)
+#define _ZSD2(...)
+#define _ZVF2(...)
+#define _ZVD2(...)
+#define _ZSVF2(...)
+#define _ZSVD2(...)
+
+#include "mathbench_funcs_gen.h"
+
+/* PL_SIG only emits entries for unary functions, since if a function
+ needs to be wrapped in mathbench there is no way for it to know the
+ same of the wrapper. Add entries for binary functions, or any other
+ exotic signatures that need wrapping, below. */
+
+{"atan2f", 'f', 0, -10.0, 10.0, {.f = atan2f_wrap}},
+{"atan2", 'd', 0, -10.0, 10.0, {.d = atan2_wrap}},
+{"powi", 'd', 0, 0.01, 11.1, {.d = powi_wrap}},
+
+{"_ZGVnN4vv_atan2f", 'f', 'n', -10.0, 10.0, {.vnf = _Z_atan2f_wrap}},
+{"_ZGVnN2vv_atan2", 'd', 'n', -10.0, 10.0, {.vnd = _Z_atan2_wrap}},
+{"_ZGVnN4vv_hypotf", 'f', 'n', -10.0, 10.0, {.vnf = _Z_hypotf_wrap}},
+{"_ZGVnN2vv_hypot", 'd', 'n', -10.0, 10.0, {.vnd = _Z_hypot_wrap}},
+{"_ZGVnN2vv_pow", 'd', 'n', -10.0, 10.0, {.vnd = xy_Z_pow}},
+{"x_ZGVnN2vv_pow", 'd', 'n', -10.0, 10.0, {.vnd = x_Z_pow}},
+{"y_ZGVnN2vv_pow", 'd', 'n', -10.0, 10.0, {.vnd = y_Z_pow}},
+{"_ZGVnN4vl4l4_sincosf", 'f', 'n', -3.1, 3.1, {.vnf = _Z_sincosf_wrap}},
+{"_ZGVnN2vl8l8_sincos", 'd', 'n', -3.1, 3.1, {.vnd = _Z_sincos_wrap}},
+{"_ZGVnN4v_cexpif", 'f', 'n', -3.1, 3.1, {.vnf = _Z_cexpif_wrap}},
+{"_ZGVnN2v_cexpi", 'd', 'n', -3.1, 3.1, {.vnd = _Z_cexpi_wrap}},
+
+#if WANT_SVE_MATH
+{"_ZGVsMxvv_atan2f", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_atan2f_wrap}},
+{"_ZGVsMxvv_atan2", 'd', 's', -10.0, 10.0, {.svd = _Z_sv_atan2_wrap}},
+{"_ZGVsMxvv_hypotf", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_hypotf_wrap}},
+{"_ZGVsMxvv_hypot", 'd', 's', -10.0, 10.0, {.svd = _Z_sv_hypot_wrap}},
+{"_ZGVsMxvv_powi", 'f', 's', -10.0, 10.0, {.svf = _Z_sv_powi_wrap}},
+{"_ZGVsMxvv_powk", 'd', 's', -10.0, 10.0, {.svd = _Z_sv_powk_wrap}},
+{"_ZGVsMxvv_powf", 'f', 's', -10.0, 10.0, {.svf = xy_Z_sv_powf}},
+{"x_ZGVsMxvv_powf", 'f', 's', -10.0, 10.0, {.svf = x_Z_sv_powf}},
+{"y_ZGVsMxvv_powf", 'f', 's', -10.0, 10.0, {.svf = y_Z_sv_powf}},
+{"_ZGVsMxvv_pow", 'd', 's', -10.0, 10.0, {.svd = xy_Z_sv_pow}},
+{"x_ZGVsMxvv_pow", 'd', 's', -10.0, 10.0, {.svd = x_Z_sv_pow}},
+{"y_ZGVsMxvv_pow", 'd', 's', -10.0, 10.0, {.svd = y_Z_sv_pow}},
+{"_ZGVsMxvl4l4_sincosf", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_sincosf_wrap}},
+{"_ZGVsMxvl8l8_sincos", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_sincos_wrap}},
+{"_ZGVsMxv_cexpif", 'f', 's', -3.1, 3.1, {.svf = _Z_sv_cexpif_wrap}},
+{"_ZGVsMxv_cexpi", 'd', 's', -3.1, 3.1, {.svd = _Z_sv_cexpi_wrap}},
+#endif
+ // clang-format on
diff --git a/contrib/arm-optimized-routines/pl/math/test/mathbench_wrappers.h b/contrib/arm-optimized-routines/pl/math/test/mathbench_wrappers.h
new file mode 100644
index 000000000000..fe7f8963cdee
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/mathbench_wrappers.h
@@ -0,0 +1,206 @@
+/*
+ * Function wrappers for mathbench.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+static double
+atan2_wrap (double x)
+{
+ return atan2 (5.0, x);
+}
+
+static float
+atan2f_wrap (float x)
+{
+ return atan2f (5.0f, x);
+}
+
+static double
+powi_wrap (double x)
+{
+ return __builtin_powi (x, (int) round (x));
+}
+
+#if __aarch64__ && defined(__vpcs)
+
+__vpcs static v_double
+_Z_atan2_wrap (v_double x)
+{
+ return _ZGVnN2vv_atan2 (v_double_dup (5.0), x);
+}
+
+__vpcs static v_float
+_Z_atan2f_wrap (v_float x)
+{
+ return _ZGVnN4vv_atan2f (v_float_dup (5.0f), x);
+}
+
+__vpcs static v_float
+_Z_hypotf_wrap (v_float x)
+{
+ return _ZGVnN4vv_hypotf (v_float_dup (5.0f), x);
+}
+
+__vpcs static v_double
+_Z_hypot_wrap (v_double x)
+{
+ return _ZGVnN2vv_hypot (v_double_dup (5.0), x);
+}
+
+__vpcs static v_double
+xy_Z_pow (v_double x)
+{
+ return _ZGVnN2vv_pow (x, x);
+}
+
+__vpcs static v_double
+x_Z_pow (v_double x)
+{
+ return _ZGVnN2vv_pow (x, v_double_dup (23.4));
+}
+
+__vpcs static v_double
+y_Z_pow (v_double x)
+{
+ return _ZGVnN2vv_pow (v_double_dup (2.34), x);
+}
+
+__vpcs static v_float
+_Z_sincosf_wrap (v_float x)
+{
+ v_float s, c;
+ _ZGVnN4vl4l4_sincosf (x, &s, &c);
+ return s + c;
+}
+
+__vpcs static v_float
+_Z_cexpif_wrap (v_float x)
+{
+ __f32x4x2_t sc = _ZGVnN4v_cexpif (x);
+ return sc.val[0] + sc.val[1];
+}
+
+__vpcs static v_double
+_Z_sincos_wrap (v_double x)
+{
+ v_double s, c;
+ _ZGVnN2vl8l8_sincos (x, &s, &c);
+ return s + c;
+}
+
+__vpcs static v_double
+_Z_cexpi_wrap (v_double x)
+{
+ __f64x2x2_t sc = _ZGVnN2v_cexpi (x);
+ return sc.val[0] + sc.val[1];
+}
+
+#endif // __arch64__ && __vpcs
+
+#if WANT_SVE_MATH
+
+static sv_float
+_Z_sv_atan2f_wrap (sv_float x, sv_bool pg)
+{
+ return _ZGVsMxvv_atan2f (x, svdup_f32 (5.0f), pg);
+}
+
+static sv_double
+_Z_sv_atan2_wrap (sv_double x, sv_bool pg)
+{
+ return _ZGVsMxvv_atan2 (x, svdup_f64 (5.0), pg);
+}
+
+static sv_float
+_Z_sv_hypotf_wrap (sv_float x, sv_bool pg)
+{
+ return _ZGVsMxvv_hypotf (x, svdup_f32 (5.0), pg);
+}
+
+static sv_double
+_Z_sv_hypot_wrap (sv_double x, sv_bool pg)
+{
+ return _ZGVsMxvv_hypot (x, svdup_f64 (5.0), pg);
+}
+
+static sv_float
+_Z_sv_powi_wrap (sv_float x, sv_bool pg)
+{
+ return _ZGVsMxvv_powi (x, svcvt_s32_f32_x (pg, x), pg);
+}
+
+static sv_double
+_Z_sv_powk_wrap (sv_double x, sv_bool pg)
+{
+ return _ZGVsMxvv_powk (x, svcvt_s64_f64_x (pg, x), pg);
+}
+
+static sv_float
+xy_Z_sv_powf (sv_float x, sv_bool pg)
+{
+ return _ZGVsMxvv_powf (x, x, pg);
+}
+
+static sv_float
+x_Z_sv_powf (sv_float x, sv_bool pg)
+{
+ return _ZGVsMxvv_powf (x, svdup_f32 (23.4f), pg);
+}
+
+static sv_float
+y_Z_sv_powf (sv_float x, sv_bool pg)
+{
+ return _ZGVsMxvv_powf (svdup_f32 (2.34f), x, pg);
+}
+
+static sv_double
+xy_Z_sv_pow (sv_double x, sv_bool pg)
+{
+ return _ZGVsMxvv_pow (x, x, pg);
+}
+
+static sv_double
+x_Z_sv_pow (sv_double x, sv_bool pg)
+{
+ return _ZGVsMxvv_pow (x, svdup_f64 (23.4), pg);
+}
+
+static sv_double
+y_Z_sv_pow (sv_double x, sv_bool pg)
+{
+ return _ZGVsMxvv_pow (svdup_f64 (2.34), x, pg);
+}
+
+static sv_float
+_Z_sv_sincosf_wrap (sv_float x, sv_bool pg)
+{
+ float s[svcntw ()], c[svcntw ()];
+ _ZGVsMxvl4l4_sincosf (x, s, c, pg);
+ return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
+}
+
+static sv_float
+_Z_sv_cexpif_wrap (sv_float x, sv_bool pg)
+{
+ svfloat32x2_t sc = _ZGVsMxv_cexpif (x, pg);
+ return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
+}
+
+static sv_double
+_Z_sv_sincos_wrap (sv_double x, sv_bool pg)
+{
+ double s[svcntd ()], c[svcntd ()];
+ _ZGVsMxvl8l8_sincos (x, s, c, pg);
+ return svadd_x (pg, svld1 (pg, s), svld1 (pg, s));
+}
+
+static sv_double
+_Z_sv_cexpi_wrap (sv_double x, sv_bool pg)
+{
+ svfloat64x2_t sc = _ZGVsMxv_cexpi (x, pg);
+ return svadd_x (pg, svget2 (sc, 0), svget2 (sc, 1));
+}
+
+#endif // WANT_SVE_MATH
diff --git a/contrib/arm-optimized-routines/pl/math/test/pl_test.h b/contrib/arm-optimized-routines/pl/math/test/pl_test.h
new file mode 100644
index 000000000000..e7ed4eed634e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/pl_test.h
@@ -0,0 +1,39 @@
+/*
+ * PL macros for emitting various details about routines for consumption by
+ * runulp.sh.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
+ */
+
+/* Emit the max ULP threshold, l, for routine f. Piggy-back PL_TEST_EXPECT_FENV
+ on PL_TEST_ULP to add EXPECT_FENV to all scalar routines. */
+#if WANT_VMATH || defined(IGNORE_SCALAR_FENV)
+# define PL_TEST_ULP(f, l) PL_TEST_ULP f l
+#else
+# define PL_TEST_ULP(f, l) \
+ PL_TEST_EXPECT_FENV_ALWAYS (f) \
+ PL_TEST_ULP f l
+#endif
+
+/* Emit routine name if e == 1 and f is expected to correctly trigger fenv
+ exceptions. e allows declaration to be emitted conditionally upon certain
+ build flags - defer expansion by one pass to allow those flags to be expanded
+ properly. */
+#define PL_TEST_EXPECT_FENV(f, e) PL_TEST_EXPECT_FENV_ (f, e)
+#define PL_TEST_EXPECT_FENV_(f, e) PL_TEST_EXPECT_FENV_##e (f)
+#define PL_TEST_EXPECT_FENV_1(f) PL_TEST_EXPECT_FENV_ENABLED f
+#define PL_TEST_EXPECT_FENV_ALWAYS(f) PL_TEST_EXPECT_FENV (f, 1)
+
+#define PL_TEST_INTERVAL(f, lo, hi, n) PL_TEST_INTERVAL f lo hi n
+#define PL_TEST_SYM_INTERVAL(f, lo, hi, n) \
+ PL_TEST_INTERVAL (f, lo, hi, n) \
+ PL_TEST_INTERVAL (f, -lo, -hi, n)
+#define PL_TEST_INTERVAL_C(f, lo, hi, n, c) PL_TEST_INTERVAL f lo hi n c
+#define PL_TEST_SYM_INTERVAL_C(f, lo, hi, n, c) \
+ PL_TEST_INTERVAL_C (f, lo, hi, n, c) \
+ PL_TEST_INTERVAL_C (f, -lo, -hi, n, c)
+// clang-format off
+#define PL_TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL f xlo,ylo xhi,yhi n
+// clang-format on
diff --git a/contrib/arm-optimized-routines/pl/math/test/runulp.sh b/contrib/arm-optimized-routines/pl/math/test/runulp.sh
new file mode 100755
index 000000000000..0f5a41f76b25
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/runulp.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# ULP error check script.
+#
+# Copyright (c) 2019-2023, Arm Limited.
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+#set -x
+set -eu
+
+# cd to bin directory.
+cd "${0%/*}"
+
+flags="${ULPFLAGS:--q}"
+emu="$@"
+
+# Enable SVE testing
+WANT_SVE_MATH=${WANT_SVE_MATH:-0}
+
+FAIL=0
+PASS=0
+
+t() {
+ routine=$1
+ L=$(cat $LIMITS | grep "^$routine " | awk '{print $2}')
+ [[ $L =~ ^[0-9]+\.[0-9]+$ ]]
+ extra_flags=
+ [[ -z "${5:-}" ]] || extra_flags="$extra_flags -c $5"
+ grep -q "^$routine$" $FENV || extra_flags="$extra_flags -f"
+ IFS=',' read -ra LO <<< "$2"
+ IFS=',' read -ra HI <<< "$3"
+ ITV="${LO[0]} ${HI[0]}"
+ for i in "${!LO[@]}"; do
+ [[ "$i" -eq "0" ]] || ITV="$ITV x ${LO[$i]} ${HI[$i]}"
+ done
+ # Add -z flag to ignore zero sign for vector routines
+ { echo $routine | grep -q "ZGV"; } && extra_flags="$extra_flags -z"
+ $emu ./ulp -e $L $flags ${extra_flags} $routine $ITV $4 && PASS=$((PASS+1)) || FAIL=$((FAIL+1))
+}
+
+check() {
+ $emu ./ulp -f -q "$@" #>/dev/null
+}
+
+if [ "$FUNC" == "atan2" ] || [ -z "$FUNC" ]; then
+ # Regression-test for correct NaN handling in atan2
+ check atan2 0x1p-1022 0x1p-1000 x 0 0x1p-1022 40000
+ check atan2 0x1.7887a0a717aefp+1017 0x1.7887a0a717aefp+1017 x -nan -nan
+ check atan2 nan nan x -nan -nan
+fi
+
+# vector functions
+flags="${ULPFLAGS:--q}"
+runsv=
+if [ $WANT_SVE_MATH -eq 1 ]; then
+# No guarantees about powi accuracy, so regression-test for exactness
+# w.r.t. the custom reference impl in ulp_wrappers.h
+check -q -f -e 0 _ZGVsMxvv_powi 0 inf x 0 1000 100000 && runsv=1
+check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x 0 1000 100000 && runsv=1
+check -q -f -e 0 _ZGVsMxvv_powi 0 inf x -0 -1000 100000 && runsv=1
+check -q -f -e 0 _ZGVsMxvv_powi -0 -inf x -0 -1000 100000 && runsv=1
+check -q -f -e 0 _ZGVsMxvv_powk 0 inf x 0 1000 100000 && runsv=1
+check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x 0 1000 100000 && runsv=1
+check -q -f -e 0 _ZGVsMxvv_powk 0 inf x -0 -1000 100000 && runsv=1
+check -q -f -e 0 _ZGVsMxvv_powk -0 -inf x -0 -1000 100000 && runsv=1
+fi
+
+while read F LO HI N C
+do
+ t $F $LO $HI $N $C
+done << EOF
+$(cat $INTERVALS | grep "\b$FUNC\b")
+EOF
+
+[ 0 -eq $FAIL ] || {
+ echo "FAILED $FAIL PASSED $PASS"
+ exit 1
+}
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acos.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acos.tst
new file mode 100644
index 000000000000..a73dcd25965b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acos.tst
@@ -0,0 +1,17 @@
+; acos.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=acos op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=acos op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=acos op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=acos op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=acos op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acos op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acos op1=00000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=acos op1=80000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=acos op1=3ff00000.00000000 result=00000000.00000000 errno=0
+func=acos op1=bff00000.00000000 result=400921fb.54442d18.469 errno=0
+func=acos op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=acos op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosf.tst
new file mode 100644
index 000000000000..9e453e3bff5e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosf.tst
@@ -0,0 +1,21 @@
+; acosf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=acosf op1=7fc00001 result=7fc00001 errno=0
+func=acosf op1=ffc00001 result=7fc00001 errno=0
+func=acosf op1=7f800001 result=7fc00001 errno=0 status=i
+func=acosf op1=ff800001 result=7fc00001 errno=0 status=i
+func=acosf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=acosf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=acosf op1=00000000 result=3fc90fda.a22 errno=0
+func=acosf op1=80000000 result=3fc90fda.a22 errno=0
+func=acosf op1=3f800000 result=00000000 errno=0
+func=acosf op1=bf800000 result=40490fda.a22 errno=0
+func=acosf op1=3f800001 result=7fc00001 errno=EDOM status=i
+func=acosf op1=bf800001 result=7fc00001 errno=EDOM status=i
+func=acosf op1=33000000 result=3fc90fda.622 error=0
+func=acosf op1=30000000 result=3fc90fda.a12 error=0
+func=acosf op1=2d000000 result=3fc90fda.a21 error=0
+func=acosf op1=2a000000 result=3fc90fda.a22 error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosh.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosh.tst
new file mode 100644
index 000000000000..dd962bd391da
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acosh.tst
@@ -0,0 +1,19 @@
+; acosh.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=acosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=acosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=acosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=acosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=acosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=acosh op1=3ff00000.00000000 result=00000000.00000000 errno=0
+func=acosh op1=3fefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=00000000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=80000000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=bfefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=acosh op1=7fe01ac0.7f03a83e result=40862e50.541778f1.8cc error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acoshf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acoshf.tst
new file mode 100644
index 000000000000..606c615f9b74
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/acoshf.tst
@@ -0,0 +1,19 @@
+; acoshf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=acoshf op1=7fc00001 result=7fc00001 errno=0
+func=acoshf op1=ffc00001 result=7fc00001 errno=0
+func=acoshf op1=7f800001 result=7fc00001 errno=0 status=i
+func=acoshf op1=ff800001 result=7fc00001 errno=0 status=i
+func=acoshf op1=7f800000 result=7f800000 errno=0
+func=acoshf op1=3f800000 result=00000000 errno=0
+func=acoshf op1=3f7fffff result=7fc00001 errno=EDOM status=i
+func=acoshf op1=00000000 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=80000000 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=bf7fffff result=7fc00001 errno=EDOM status=i
+func=acoshf op1=bf800000 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=bf800001 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=acoshf op1=7f767efe result=42b2c19d.83e error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asin.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asin.tst
new file mode 100644
index 000000000000..6180d7849d90
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asin.tst
@@ -0,0 +1,24 @@
+; asin.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=asin op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=asin op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=asin op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=asin op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=asin op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=asin op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=asin op1=00000000.00000000 result=00000000.00000000 errno=0
+func=asin op1=80000000.00000000 result=80000000.00000000 errno=0
+; Inconsistent behavior was detected for the following 2 cases.
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=asin op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=asin op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
+
+func=asin op1=3ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=asin op1=bff00000.00000000 result=bff921fb.54442d18.469 errno=0
+func=asin op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=asin op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinf.tst
new file mode 100644
index 000000000000..a85b2593768d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinf.tst
@@ -0,0 +1,24 @@
+; asinf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=asinf op1=7fc00001 result=7fc00001 errno=0
+func=asinf op1=ffc00001 result=7fc00001 errno=0
+func=asinf op1=7f800001 result=7fc00001 errno=0 status=i
+func=asinf op1=ff800001 result=7fc00001 errno=0 status=i
+func=asinf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=asinf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=asinf op1=00000000 result=00000000 errno=0
+func=asinf op1=80000000 result=80000000 errno=0
+; Inconsistent behavior was detected for the following 2 cases.
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=asinf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=asinf op1=80000001 result=80000001 errno=0 maybestatus=ux
+
+func=asinf op1=3f800000 result=3fc90fda.a22 errno=0
+func=asinf op1=bf800000 result=bfc90fda.a22 errno=0
+func=asinf op1=3f800001 result=7fc00001 errno=EDOM status=i
+func=asinf op1=bf800001 result=7fc00001 errno=EDOM status=i
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinh.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinh.tst
new file mode 100644
index 000000000000..1485dfeffecf
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinh.tst
@@ -0,0 +1,18 @@
+; asinh.tst
+;
+; Copyright (c) 2022-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=asinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=asinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=asinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=asinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=asinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=asinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
+func=asinh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=asinh op1=80000000.00000000 result=80000000.00000000 errno=0
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=asinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=asinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinhf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinhf.tst
new file mode 100644
index 000000000000..eb76a5892a70
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/asinhf.tst
@@ -0,0 +1,18 @@
+; asinhf.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=asinhf op1=7fc00001 result=7fc00001 errno=0
+func=asinhf op1=ffc00001 result=7fc00001 errno=0
+func=asinhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=asinhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=asinhf op1=7f800000 result=7f800000 errno=0
+func=asinhf op1=ff800000 result=ff800000 errno=0
+func=asinhf op1=00000000 result=00000000 errno=0
+func=asinhf op1=80000000 result=80000000 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=asinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=asinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan.tst
new file mode 100644
index 000000000000..4c670553d58f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan.tst
@@ -0,0 +1,22 @@
+; atan.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atan op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan op1=7ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan op1=fff00000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan op1=00000000.00000000 result=00000000.00000000 errno=0
+func=atan op1=80000000.00000000 result=80000000.00000000 errno=0
+; Inconsistent behavior was detected for the following 2 cases.
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=atan op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=atan op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
+
+func=atan op1=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
+func=atan op1=bff00000.00000000 result=bfe921fb.54442d18.469 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2.tst
new file mode 100644
index 000000000000..647b3764072c
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2.tst
@@ -0,0 +1,110 @@
+; atan2.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atan2 op1=7ff00000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff80000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff80000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff80000.00000001 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff80000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff80000.00000001 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff80000.00000001 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=7ff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=fff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=00000000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=80000000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=3ff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff80000.00000001 op2=bff00000.00000000 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=7ff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=7ff00000.00000000 op2=7ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
+func=atan2 op1=7ff00000.00000000 op2=fff00000.00000000 result=4002d97c.7f3321d2.34f errno=0
+func=atan2 op1=7ff00000.00000000 op2=00000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=7ff00000.00000000 op2=80000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=7ff00000.00000000 op2=3ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=7ff00000.00000000 op2=bff00000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=fff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=fff00000.00000000 op2=7ff00000.00000000 result=bfe921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=fff00000.00000000 result=c002d97c.7f3321d2.34f errno=0
+func=atan2 op1=fff00000.00000000 op2=00000000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=80000000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=3ff00000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=fff00000.00000000 op2=bff00000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=00000000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=00000000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=00000000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=00000000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=00000000.00000000 op2=7ff00000.00000000 result=00000000.00000000 errno=0
+func=atan2 op1=00000000.00000000 op2=fff00000.00000000 result=400921fb.54442d18.469 errno=0
+func=atan2 op1=00000000.00000000 op2=00000000.00000000 result=00000000.00000000 errno=0
+func=atan2 op1=00000000.00000000 op2=80000000.00000000 result=400921fb.54442d18.469 errno=0
+func=atan2 op1=00000000.00000000 op2=3ff00000.00000000 result=00000000.00000000 errno=0
+func=atan2 op1=00000000.00000000 op2=bff00000.00000000 result=400921fb.54442d18.469 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=atan2 op1=00000000.00000001 op2=3ff00000.00000000 result=00000000.00000001 errno=0 maybestatus=ux
+func=atan2 op1=80000000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=80000000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=80000000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=80000000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=80000000.00000000 op2=7ff00000.00000000 result=80000000.00000000 errno=0
+func=atan2 op1=80000000.00000000 op2=fff00000.00000000 result=c00921fb.54442d18.469 errno=0
+func=atan2 op1=80000000.00000000 op2=00000000.00000000 result=80000000.00000000 errno=0
+func=atan2 op1=80000000.00000000 op2=80000000.00000000 result=c00921fb.54442d18.469 errno=0
+func=atan2 op1=80000000.00000000 op2=3ff00000.00000000 result=80000000.00000000 errno=0
+func=atan2 op1=80000000.00000000 op2=bff00000.00000000 result=c00921fb.54442d18.469 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=atan2 op1=80000000.00000001 op2=3ff00000.00000000 result=80000000.00000001 errno=0 maybestatus=ux
+func=atan2 op1=3ff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=3ff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=3ff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=3ff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=3ff00000.00000000 op2=7ff00000.00000000 result=00000000.00000000 errno=0
+func=atan2 op1=3ff00000.00000000 op2=fff00000.00000000 result=400921fb.54442d18.469 errno=0
+func=atan2 op1=3ff00000.00000000 op2=00000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=3ff00000.00000000 op2=80000000.00000000 result=3ff921fb.54442d18.469 errno=0
+func=atan2 op1=3ff00000.00000000 op2=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
+func=atan2 op1=3ff00000.00000000 op2=bff00000.00000000 result=4002d97c.7f3321d2.34f errno=0
+func=atan2 op1=bff00000.00000000 op2=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=bff00000.00000000 op2=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atan2 op1=bff00000.00000000 op2=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=bff00000.00000000 op2=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atan2 op1=bff00000.00000000 op2=7ff00000.00000000 result=80000000.00000000 errno=0
+func=atan2 op1=bff00000.00000000 op2=fff00000.00000000 result=c00921fb.54442d18.469 errno=0
+func=atan2 op1=bff00000.00000000 op2=00000000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=bff00000.00000000 op2=80000000.00000000 result=bff921fb.54442d18.469 errno=0
+func=atan2 op1=bff00000.00000000 op2=3ff00000.00000000 result=bfe921fb.54442d18.469 errno=0
+func=atan2 op1=bff00000.00000000 op2=bff00000.00000000 result=c002d97c.7f3321d2.34f errno=0
+func=atan2 op1=3ff00000.00000000 op2=3ff00000.00000000 result=3fe921fb.54442d18 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2f.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2f.tst
new file mode 100644
index 000000000000..85c5c5d47e10
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atan2f.tst
@@ -0,0 +1,121 @@
+; atan2f.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atan2f op1=7f800001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=7fc00001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=ffc00001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=7f800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=ff800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=00000000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=80000000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=3f800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800001 op2=bf800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=7fc00001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=ffc00001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=7f800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=ff800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=00000000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=80000000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=3f800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800001 op2=bf800000 result=7fc00001 errno=0 status=i
+func=atan2f op1=7fc00001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7fc00001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7fc00001 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=ffc00001 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=7f800000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=ff800000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=00000000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=80000000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=3f800000 result=7fc00001 errno=0
+func=atan2f op1=7fc00001 op2=bf800000 result=7fc00001 errno=0
+func=atan2f op1=ffc00001 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ffc00001 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ffc00001 op2=7fc00001 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=7f800000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=ff800000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=00000000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=80000000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=3f800000 result=ffc00001 errno=0
+func=atan2f op1=ffc00001 op2=bf800000 result=ffc00001 errno=0
+func=atan2f op1=7f800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=7f800000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=7f800000 op2=ffc00001 result=7fc00001 errno=0
+func=atan2f op1=7f800000 op2=7f800000 result=3f490fda.a22 errno=0
+func=atan2f op1=7f800000 op2=ff800000 result=4016cbe3.f99 errno=0
+func=atan2f op1=7f800000 op2=00000000 result=3fc90fda.a22 errno=0
+func=atan2f op1=7f800000 op2=80000000 result=3fc90fda.a22 errno=0
+func=atan2f op1=7f800000 op2=3f800000 result=3fc90fda.a22 errno=0
+func=atan2f op1=7f800000 op2=bf800000 result=3fc90fda.a22 errno=0
+func=atan2f op1=ff800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=ff800000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=ff800000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=ff800000 op2=7f800000 result=bf490fda.a22 errno=0
+func=atan2f op1=ff800000 op2=ff800000 result=c016cbe3.f99 errno=0
+func=atan2f op1=ff800000 op2=00000000 result=bfc90fda.a22 errno=0
+func=atan2f op1=ff800000 op2=80000000 result=bfc90fda.a22 errno=0
+func=atan2f op1=ff800000 op2=3f800000 result=bfc90fda.a22 errno=0
+func=atan2f op1=ff800000 op2=bf800000 result=bfc90fda.a22 errno=0
+func=atan2f op1=00000000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=00000000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=00000000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=00000000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=00000000 op2=7f800000 result=00000000 errno=0
+func=atan2f op1=00000000 op2=ff800000 result=40490fda.a22 errno=0
+func=atan2f op1=00000000 op2=00000000 result=00000000 errno=0
+func=atan2f op1=00000000 op2=80000000 result=40490fda.a22 errno=0
+func=atan2f op1=00000000 op2=3f800000 result=00000000 errno=0
+func=atan2f op1=00000000 op2=bf800000 result=40490fda.a22 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=atan2f op1=00000001 op2=3f800000 result=00000001 errno=0 maybestatus=ux
+
+func=atan2f op1=80000000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=80000000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=80000000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=80000000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=80000000 op2=7f800000 result=80000000 errno=0
+func=atan2f op1=80000000 op2=ff800000 result=c0490fda.a22 errno=0
+func=atan2f op1=80000000 op2=00000000 result=80000000 errno=0
+func=atan2f op1=80000000 op2=80000000 result=c0490fda.a22 errno=0
+func=atan2f op1=80000000 op2=3f800000 result=80000000 errno=0
+func=atan2f op1=80000000 op2=bf800000 result=c0490fda.a22 errno=0
+; No exception is raised on certain machines (different version of glibc)
+; Same issue encountered with other function similar to x close to 0
+; Could be due to function so boring no flop is involved in some implementations
+func=atan2f op1=80000001 op2=3f800000 result=80000001 errno=0 maybestatus=ux
+
+func=atan2f op1=3f800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=3f800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=3f800000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=3f800000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=3f800000 op2=7f800000 result=00000000 errno=0
+func=atan2f op1=3f800000 op2=ff800000 result=40490fda.a22 errno=0
+func=atan2f op1=3f800000 op2=00000000 result=3fc90fda.a22 errno=0
+func=atan2f op1=3f800000 op2=80000000 result=3fc90fda.a22 errno=0
+func=atan2f op1=3f800000 op2=3f800000 result=3f490fda.a22 errno=0
+func=atan2f op1=3f800000 op2=bf800000 result=4016cbe3.f99 errno=0
+func=atan2f op1=bf800000 op2=7f800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=bf800000 op2=ff800001 result=7fc00001 errno=0 status=i
+func=atan2f op1=bf800000 op2=7fc00001 result=7fc00001 errno=0
+func=atan2f op1=bf800000 op2=ffc00001 result=ffc00001 errno=0
+func=atan2f op1=bf800000 op2=7f800000 result=80000000 errno=0
+func=atan2f op1=bf800000 op2=ff800000 result=c0490fda.a22 errno=0
+func=atan2f op1=bf800000 op2=00000000 result=bfc90fda.a22 errno=0
+func=atan2f op1=bf800000 op2=80000000 result=bfc90fda.a22 errno=0
+func=atan2f op1=bf800000 op2=3f800000 result=bf490fda.a22 errno=0
+func=atan2f op1=bf800000 op2=bf800000 result=c016cbe3.f99 errno=0
+func=atan2f op1=8005f16d op2=002bb601 result=be0a60a5.d88 error=0
+func=atan2f op1=80818ec8 op2=80ba5db9 result=c0222eda.f42 error=0
+
+func=atan2f op1=ff7fffff op2=ff7fffff result=c016cbe3.f99 errno=0
+func=atan2f op1=bfc00001 op2=7f7fffff result=80300000.700 errno=0 status=u
+func=atan2f op1=80800001 op2=40000000 result=80400000.800 errno=0 status=u
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanf.tst
new file mode 100644
index 000000000000..0a0bfc24c605
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanf.tst
@@ -0,0 +1,22 @@
+; atanf.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atanf op1=7fc00001 result=7fc00001 errno=0
+func=atanf op1=ffc00001 result=7fc00001 errno=0
+func=atanf op1=7f800001 result=7fc00001 errno=0 status=i
+func=atanf op1=ff800001 result=7fc00001 errno=0 status=i
+func=atanf op1=7f800000 result=3fc90fda.a22 errno=0
+func=atanf op1=ff800000 result=bfc90fda.a22 errno=0
+func=atanf op1=00000000 result=00000000 errno=0
+func=atanf op1=80000000 result=80000000 errno=0
+; Inconsistent behavior was detected for the following 2 cases.
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=atanf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=atanf op1=80000001 result=80000001 errno=0 maybestatus=ux
+
+func=atanf op1=3f800000 result=3f490fda.a22 errno=0
+func=atanf op1=bf800000 result=bf490fda.a22 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanh.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanh.tst
new file mode 100644
index 000000000000..d96ff327fcd9
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanh.tst
@@ -0,0 +1,22 @@
+; atanh.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=atanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=atanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=atanh op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=atanh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=atanh op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=atanh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=atanh op1=3ff00000.00000000 result=7ff00000.00000000 errno=ERANGE status=z
+func=atanh op1=bff00000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=atanh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=atanh op1=80000000.00000000 result=80000000.00000000 errno=0
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=atanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=atanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanhf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanhf.tst
new file mode 100644
index 000000000000..21a68a661a11
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/atanhf.tst
@@ -0,0 +1,23 @@
+; atanhf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=atanhf op1=7fc00001 result=7fc00001 errno=0
+func=atanhf op1=ffc00001 result=7fc00001 errno=0
+func=atanhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=atanhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=atanhf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=atanhf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=atanhf op1=3f800001 result=7fc00001 errno=EDOM status=i
+func=atanhf op1=bf800001 result=7fc00001 errno=EDOM status=i
+func=atanhf op1=3f800000 result=7f800000 errno=ERANGE status=z
+func=atanhf op1=bf800000 result=ff800000 errno=ERANGE status=z
+func=atanhf op1=00000000 result=00000000 errno=0
+func=atanhf op1=80000000 result=80000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=atanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=atanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cbrtf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cbrtf.tst
new file mode 100644
index 000000000000..0dd8d09f1d4f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cbrtf.tst
@@ -0,0 +1,29 @@
+; cbrtf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=cbrtf op1=7f800000 result=7f800000 errno=0
+func=cbrtf op1=ff800000 result=ff800000 errno=0
+func=cbrtf op1=7f800001 result=7fc00001 errno=0 status=i
+func=cbrtf op1=7fc00001 result=7fc00001 errno=0
+func=cbrtf op1=00000000 result=00000000 errno=0
+func=cbrtf op1=00000001 result=26a14517.cc7 errno=0
+func=cbrtf op1=00000002 result=26cb2ff5.29f errno=0
+func=cbrtf op1=00000003 result=26e89768.579 errno=0
+func=cbrtf op1=00000004 result=27000000.000 errno=0
+func=cbrtf op1=00400000 result=2a4b2ff5.29f errno=0
+func=cbrtf op1=00800000 result=2a800000.000 errno=0
+func=cbrtf op1=3f800000 result=3f800000.000 errno=0
+func=cbrtf op1=40000000 result=3fa14517.cc7 errno=0
+func=cbrtf op1=7f7fffff result=54cb2ff4.e63 errno=0
+func=cbrtf op1=80000000 result=80000000 errno=0
+func=cbrtf op1=80000001 result=a6a14517.cc7 errno=0
+func=cbrtf op1=80000002 result=a6cb2ff5.29f errno=0
+func=cbrtf op1=80000003 result=a6e89768.579 errno=0
+func=cbrtf op1=80000004 result=a7000000.000 errno=0
+func=cbrtf op1=80400000 result=aa4b2ff5.29f errno=0
+func=cbrtf op1=80800000 result=aa800000.000 errno=0
+func=cbrtf op1=bf800000 result=bf800000.000 errno=0
+func=cbrtf op1=c0000000 result=bfa14517.cc7 errno=0
+func=cbrtf op1=ff7fffff result=d4cb2ff4.e63 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cosh.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cosh.tst
new file mode 100644
index 000000000000..c4efacb7272d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/cosh.tst
@@ -0,0 +1,15 @@
+; cosh.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=cosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=cosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=cosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=cosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=cosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=cosh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=cosh op1=fff00000.00000000 result=7ff00000.00000000 errno=0
+func=cosh op1=ffefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=cosh op1=00000000.00000000 result=3ff00000.00000000 errno=0
+func=cosh op1=80000000.00000000 result=3ff00000.00000000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/coshf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/coshf.tst
new file mode 100644
index 000000000000..2b967e78f4b4
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/coshf.tst
@@ -0,0 +1,15 @@
+; coshf.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=coshf op1=7fc00001 result=7fc00001 errno=0
+func=coshf op1=ffc00001 result=7fc00001 errno=0
+func=coshf op1=7f800001 result=7fc00001 errno=0 status=i
+func=coshf op1=ff800001 result=7fc00001 errno=0 status=i
+func=coshf op1=7f800000 result=7f800000 errno=0
+func=coshf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
+func=coshf op1=ff800000 result=7f800000 errno=0
+func=coshf op1=ff7fffff result=7f800000 errno=ERANGE status=ox
+func=coshf op1=00000000 result=3f800000 errno=0
+func=coshf op1=80000000 result=3f800000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfc.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfc.tst
new file mode 100644
index 000000000000..c03fc591da47
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfc.tst
@@ -0,0 +1,23 @@
+; erfc.tst - Directed test cases for erfc
+;
+; Copyright (c) 2022-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=erfc op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=erfc op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=erfc op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=erfc op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=erfc op1=7ff00000.00000000 result=00000000.00000000 errno=0
+func=erfc op1=7fefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
+; We deliberately turned off errno setting in erf, as standard simply
+; state that errno `may` be set to ERANGE in case of underflow.
+; As a result the following condition on errno cannot be satisfied.
+;
+; func=erfc op1=403b44af.48b01531 result=00000000.00000000 errno=ERANGE status=ux
+;
+func=erfc op1=c03b44af.48b01531 result=40000000.00000000 errno=0
+func=erfc op1=403bffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
+func=erfc op1=c03bffff.ffffffff result=40000000.00000000 errno=0
+func=erfc op1=fff00000.00000000 result=40000000.00000000 errno=0
+func=erfc op1=00000000.00000000 result=3ff00000.00000000 errno=0
+func=erfc op1=80000000.00000000 result=3ff00000.00000000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfcf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfcf.tst
new file mode 100644
index 000000000000..719baccb2e45
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erfcf.tst
@@ -0,0 +1,14 @@
+; erfcf.tst - Directed test cases for erfcf
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=erfcf op1=7fc00001 result=7fc00001 errno=0
+func=erfcf op1=ffc00001 result=7fc00001 errno=0
+func=erfcf op1=7f800001 result=7fc00001 errno=0 status=i
+func=erfcf op1=ff800001 result=7fc00001 errno=0 status=i
+func=erfcf op1=7f800000 result=00000000 errno=0
+func=erfcf op1=7f7fffff result=00000000 errno=ERANGE status=ux
+func=erfcf op1=ff800000 result=40000000 errno=0
+func=erfcf op1=00000000 result=3f800000 errno=0
+func=erfcf op1=80000000 result=3f800000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erff.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erff.tst
new file mode 100644
index 000000000000..9b1d3d5114ae
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/erff.tst
@@ -0,0 +1,17 @@
+; erff.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=erff op1=7fc00001 result=7fc00001 errno=0
+func=erff op1=ffc00001 result=7fc00001 errno=0
+func=erff op1=7f800001 result=7fc00001 errno=0 status=i
+func=erff op1=ff800001 result=7fc00001 errno=0 status=i
+func=erff op1=7f800000 result=3f800000 errno=0
+func=erff op1=ff800000 result=bf800000 errno=0
+func=erff op1=00000000 result=00000000 errno=ERANGE
+func=erff op1=80000000 result=80000000 errno=ERANGE
+func=erff op1=00000001 result=00000001 errno=0 status=ux
+func=erff op1=80000001 result=80000001 errno=0 status=ux
+func=erff op1=3f800000 result=3f57bb3d.3a0 errno=0
+func=erff op1=bf800000 result=bf57bb3d.3a0 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1.tst
new file mode 100644
index 000000000000..609d6f479721
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1.tst
@@ -0,0 +1,21 @@
+; expm1.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=expm1 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=expm1 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=expm1 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=expm1 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=expm1 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=expm1 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=expm1 op1=fff00000.00000000 result=bff00000.00000000 errno=0
+func=expm1 op1=ffefffff.ffffffff result=bff00000.00000000 errno=0
+func=expm1 op1=00000000.00000000 result=00000000.00000000 errno=0
+func=expm1 op1=80000000.00000000 result=80000000.00000000 errno=0
+; Inconsistent behavior was detected for the following 2 cases.
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=expm1 op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=expm1 op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1f.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1f.tst
new file mode 100644
index 000000000000..44c38420a617
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/expm1f.tst
@@ -0,0 +1,57 @@
+; expm1f.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=expm1f op1=7fc00001 result=7fc00001 errno=0
+func=expm1f op1=ffc00001 result=7fc00001 errno=0
+func=expm1f op1=7f800001 result=7fc00001 errno=0 status=i
+func=expm1f op1=ff800001 result=7fc00001 errno=0 status=i
+func=expm1f op1=7f800000 result=7f800000 errno=0
+func=expm1f op1=7f7fffff result=7f800000 errno=ERANGE status=ox
+func=expm1f op1=ff800000 result=bf800000 errno=0
+func=expm1f op1=ff7fffff result=bf800000 errno=0
+func=expm1f op1=00000000 result=00000000 errno=0
+func=expm1f op1=80000000 result=80000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+
+func=expm1f op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=expm1f op1=80000001 result=80000001 errno=0 maybestatus=ux
+
+func=expm1f op1=42b145c0 result=7f6ac2dd.9b8 errno=0
+
+; Check both sides of the over/underflow thresholds in the code.
+func=expm1f op1=c2000000 result=bf7fffff.fff error=0
+func=expm1f op1=c2000001 result=bf7fffff.fff error=0
+func=expm1f op1=43000000 result=7f800000 error=overflow
+func=expm1f op1=43000001 result=7f800000 error=overflow
+func=expm1f op1=c2a80000 result=bf800000.000 error=0
+func=expm1f op1=c2a80001 result=bf800000.000 error=0
+
+; Check values for which exp goes denormal. expm1f should not report
+; spurious overflow.
+func=expm1f op1=c2b00f34 result=bf800000.000 error=0
+func=expm1f op1=c2ce8ed0 result=bf800000.000 error=0
+func=expm1f op1=c2dc6bba result=bf800000.000 error=0
+
+; Regression tests for significance loss when the two components of
+; the result have opposite sign but similar magnitude
+func=expm1f op1=be8516c1 result=be6a652b.0dc error=0
+func=expm1f op1=be851714 result=be6a65ab.0e5 error=0
+func=expm1f op1=be851cc7 result=be6a6e75.111 error=0
+func=expm1f op1=be851d1a result=be6a6ef5.102 error=0
+func=expm1f op1=be851d6d result=be6a6f75.0f2 error=0
+func=expm1f op1=be852065 result=be6a7409.0e4 error=0
+func=expm1f op1=be8520b8 result=be6a7489.0c7 error=0
+func=expm1f op1=be85210b result=be6a7509.0a8 error=0
+func=expm1f op1=be855401 result=be6ac39b.0d5 error=0
+func=expm1f op1=be933307 result=be7fdbf0.d8d error=0
+func=expm1f op1=be92ed6b result=be7f737a.d81 error=0
+func=expm1f op1=be933b90 result=be7fe8be.d76 error=0
+func=expm1f op1=3eb11364 result=3ed38deb.0c0 error=0
+func=expm1f op1=3f28e830 result=3f6f344b.0da error=0
+func=expm1f op1=3eb1578f result=3ed3ee47.13b error=0
+func=expm1f op1=3f50176a result=3fa08e36.fea error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10.tst
new file mode 100644
index 000000000000..34831436234a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10.tst
@@ -0,0 +1,16 @@
+; log10.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log10 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=log10 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=log10 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log10 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log10 op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=log10 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=log10 op1=3ff00000.00000000 result=00000000.00000000 errno=0
+func=log10 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log10 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log10 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log10 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10f.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10f.tst
new file mode 100644
index 000000000000..d5744a66f092
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log10f.tst
@@ -0,0 +1,69 @@
+; log10f.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log10f op1=7fc00001 result=7fc00001 errno=0
+func=log10f op1=ffc00001 result=7fc00001 errno=0
+func=log10f op1=7f800001 result=7fc00001 errno=0 status=i
+func=log10f op1=ff800001 result=7fc00001 errno=0 status=i
+func=log10f op1=ff810000 result=7fc00001 errno=0 status=i
+func=log10f op1=7f800000 result=7f800000 errno=0
+func=log10f op1=3f800000 result=00000000 errno=0
+func=log10f op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=log10f op1=00000000 result=ff800000 errno=ERANGE status=z
+func=log10f op1=80000000 result=ff800000 errno=ERANGE status=z
+func=log10f op1=80000001 result=7fc00001 errno=EDOM status=i
+
+; Directed tests for the special-case handling of log10 of things
+; very near 1
+func=log10f op1=3f81a618 result=3bb62472.b92 error=0
+func=log10f op1=3f876783 result=3cc811f4.26c error=0
+func=log10f op1=3f816af8 result=3b9cc4c7.057 error=0
+func=log10f op1=3f7bed7d result=bbe432cb.e23 error=0
+func=log10f op1=3f803ece result=3a59ff3a.a84 error=0
+func=log10f op1=3f80089f result=38ef9728.aa6 error=0
+func=log10f op1=3f86ab72 result=3cb4b711.457 error=0
+func=log10f op1=3f780854 result=bc60f953.904 error=0
+func=log10f op1=3f7c6d76 result=bbc7fd01.01c error=0
+func=log10f op1=3f85dff6 result=3c9fa76f.81f error=0
+func=log10f op1=3f7b87f4 result=bbfa9edc.be4 error=0
+func=log10f op1=3f81c710 result=3bc4457b.745 error=0
+func=log10f op1=3f80946d result=3b00a140.c06 error=0
+func=log10f op1=3f7e87ea result=bb23cd70.828 error=0
+func=log10f op1=3f811437 result=3b6ee960.b40 error=0
+func=log10f op1=3f858dcf result=3c971d9b.2ea error=0
+func=log10f op1=3f7f61a3 result=ba89b814.4e0 error=0
+func=log10f op1=3f82d642 result=3c1bfb8d.517 error=0
+func=log10f op1=3f80f3bc result=3b52ebe8.c75 error=0
+func=log10f op1=3f85eff9 result=3ca150d9.7e8 error=0
+func=log10f op1=3f843eb8 result=3c68263f.771 error=0
+func=log10f op1=3f78e691 result=bc481cf4.50a error=0
+func=log10f op1=3f87c56f result=3cd1b268.5e6 error=0
+func=log10f op1=3f83b711 result=3c4b94c5.918 error=0
+func=log10f op1=3f823b2b result=3bf5eb02.e2a error=0
+func=log10f op1=3f7f2c4e result=bab82c80.519 error=0
+func=log10f op1=3f83fc92 result=3c5a3ba1.543 error=0
+func=log10f op1=3f793956 result=bc3ee04e.03c error=0
+func=log10f op1=3f839ba5 result=3c45caca.92a error=0
+func=log10f op1=3f862f30 result=3ca7de76.16f error=0
+func=log10f op1=3f832a20 result=3c2dc6e9.afd error=0
+func=log10f op1=3f810296 result=3b5fb92a.429 error=0
+func=log10f op1=3f7e58c9 result=bb38655a.0a4 error=0
+func=log10f op1=3f8362e7 result=3c39cc65.d15 error=0
+func=log10f op1=3f7fdb85 result=b97d9016.40b error=0
+func=log10f op1=3f84484e result=3c6a29f2.f74 error=0
+func=log10f op1=3f861862 result=3ca5819e.f2d error=0
+func=log10f op1=3f7c027b result=bbdf912d.440 error=0
+func=log10f op1=3f867803 result=3caf6744.34d error=0
+func=log10f op1=3f789a89 result=bc509bce.458 error=0
+func=log10f op1=3f8361d9 result=3c399347.379 error=0
+func=log10f op1=3f7d3ac3 result=bb9ad93a.93d error=0
+func=log10f op1=3f7ee241 result=baf8bd12.a62 error=0
+func=log10f op1=3f83a1fd result=3c4721bd.0a4 error=0
+func=log10f op1=3f840da3 result=3c5dd375.675 error=0
+func=log10f op1=3f79c2fe result=bc2f8a60.8c5 error=0
+func=log10f op1=3f854a93 result=3c901cc9.add error=0
+func=log10f op1=3f87a50a result=3cce6125.cd6 error=0
+func=log10f op1=3f818bf5 result=3baaee68.a55 error=0
+func=log10f op1=3f830a44 result=3c2705c4.d87 error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1p.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1p.tst
new file mode 100644
index 000000000000..9ee8c62fc9c0
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1p.tst
@@ -0,0 +1,22 @@
+; log1p.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log1p op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=log1p op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=log1p op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log1p op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log1p op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i
+func=log1p op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others.
+; The main reason seems to be the handling of errno and exceptions.
+
+func=log1p op1=00000000.00000000 result=00000000.00000000 errno=0
+func=log1p op1=80000000.00000000 result=80000000.00000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=log1p op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=log1p op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1pf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1pf.tst
new file mode 100644
index 000000000000..aaa01d67c2b3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log1pf.tst
@@ -0,0 +1,130 @@
+; log1pf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log1pf op1=7fc00001 result=7fc00001 errno=0
+func=log1pf op1=ffc00001 result=7fc00001 errno=0
+func=log1pf op1=7f800001 result=7fc00001 errno=0 status=i
+func=log1pf op1=ff800001 result=7fc00001 errno=0 status=i
+func=log1pf op1=ff810000 result=7fc00001 errno=0 status=i
+func=log1pf op1=7f800000 result=7f800000 errno=0
+
+; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others.
+; The main reason seems to be the handling of errno and exceptions.
+
+func=log1pf op1=00000000 result=00000000 errno=0
+func=log1pf op1=80000000 result=80000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=log1pf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=log1pf op1=80000001 result=80000001 errno=0 maybestatus=ux
+
+func=log1pf op1=3f1e91ee result=3ef6d127.fdb errno=0
+func=log1pf op1=3f201046 result=3ef8a881.fba errno=0
+func=log1pf op1=3f21b916 result=3efab23b.f9f errno=0
+func=log1pf op1=3f21bde6 result=3efab821.fee errno=0
+func=log1pf op1=3f22a5ee result=3efbd435.ff2 errno=0
+func=log1pf op1=3f231b56 result=3efc63b7.e26 errno=0
+func=log1pf op1=3f23ce96 result=3efd3e83.fc8 errno=0
+func=log1pf op1=3eee18c6 result=3ec38576.02e errno=0
+func=log1pf op1=3eee2f41 result=3ec394ce.057 errno=0
+func=log1pf op1=3eee770d result=3ec3c5cc.00c errno=0
+func=log1pf op1=3eee7fed result=3ec3cbda.065 errno=0
+func=log1pf op1=3eee8fb2 result=3ec3d69c.008 errno=0
+func=log1pf op1=3eeeb8eb result=3ec3f2ba.061 errno=0
+func=log1pf op1=3eeeccfd result=3ec4006a.01d errno=0
+func=log1pf op1=3eeef5f0 result=3ec41c56.020 errno=0
+func=log1pf op1=3eeeff12 result=3ec42290.00c errno=0
+func=log1pf op1=3eef05cf result=3ec42728.052 errno=0
+func=log1pf op1=3eef13d3 result=3ec430b6.00e errno=0
+func=log1pf op1=3eef2e70 result=3ec442da.04a errno=0
+func=log1pf op1=3eef3fbf result=3ec44ea6.055 errno=0
+func=log1pf op1=3eef3feb result=3ec44ec4.021 errno=0
+func=log1pf op1=3eef4399 result=3ec45146.011 errno=0
+func=log1pf op1=3eef452e result=3ec4525a.049 errno=0
+func=log1pf op1=3eef4ea9 result=3ec458d0.020 errno=0
+func=log1pf op1=3eef7365 result=3ec471d8.05e errno=0
+func=log1pf op1=3eefa38f result=3ec492a8.003 errno=0
+func=log1pf op1=3eefb1f1 result=3ec49c74.015 errno=0
+func=log1pf op1=3eefb334 result=3ec49d50.023 errno=0
+func=log1pf op1=3eefb3c1 result=3ec49db0.0bf errno=0
+func=log1pf op1=3eefb591 result=3ec49eec.15d errno=0
+func=log1pf op1=3eefd736 result=3ec4b5d6.02d errno=0
+func=log1pf op1=3eefd797 result=3ec4b618.114 errno=0
+func=log1pf op1=3eefee5d result=3ec4c59a.071 errno=0
+func=log1pf op1=3eeffff4 result=3ec4d194.0a7 errno=0
+func=log1pf op1=3ef00cd1 result=3ec4da56.025 errno=0
+func=log1pf op1=3ef0163a result=3ec4e0be.07a errno=0
+func=log1pf op1=3ef01e89 result=3ec4e666.007 errno=0
+func=log1pf op1=3ef02004 result=3ec4e768.00a errno=0
+func=log1pf op1=3ef02c40 result=3ec4efbc.017 errno=0
+func=log1pf op1=3ef05b50 result=3ec50fc4.031 errno=0
+func=log1pf op1=3ef05bb1 result=3ec51006.05f errno=0
+func=log1pf op1=3ef0651b result=3ec5166e.0d9 errno=0
+func=log1pf op1=3ef06609 result=3ec51710.02a errno=0
+func=log1pf op1=3ef0666a result=3ec51752.049 errno=0
+func=log1pf op1=3ef0791e result=3ec5240c.0a8 errno=0
+func=log1pf op1=3ef07d46 result=3ec526e0.00e errno=0
+func=log1pf op1=3ef091fd result=3ec534f8.03c errno=0
+func=log1pf op1=3ef09602 result=3ec537b4.128 errno=0
+func=log1pf op1=3ef09848 result=3ec53940.044 errno=0
+func=log1pf op1=3ef0a04f result=3ec53eb6.07d errno=0
+func=log1pf op1=3ef0ab6a result=3ec54644.062 errno=0
+func=log1pf op1=3ef0ae49 result=3ec54838.002 errno=0
+func=log1pf op1=3ef0c1b8 result=3ec55570.000 errno=0
+func=log1pf op1=3ef0ca06 result=3ec55b16.00d errno=0
+func=log1pf op1=3ef0cc29 result=3ec55c8a.095 errno=0
+func=log1pf op1=3ef0d228 result=3ec5609e.04f errno=0
+func=log1pf op1=3ef0d8c0 result=3ec5651a.05e errno=0
+func=log1pf op1=3ef0dc0c result=3ec56758.029 errno=0
+func=log1pf op1=3ef0e0e8 result=3ec56aa6.02e errno=0
+func=log1pf op1=3ef0e502 result=3ec56d70.102 errno=0
+func=log1pf op1=3ef0e754 result=3ec56f04.017 errno=0
+func=log1pf op1=3ef0efe9 result=3ec574da.01c errno=0
+func=log1pf op1=3ef0f309 result=3ec576fa.016 errno=0
+func=log1pf op1=3ef0f499 result=3ec5780a.005 errno=0
+func=log1pf op1=3ef0f6c2 result=3ec57982.083 errno=0
+func=log1pf op1=3ef0f852 result=3ec57a92.05d errno=0
+func=log1pf op1=3ef0f9e2 result=3ec57ba2.02e errno=0
+func=log1pf op1=3ef119ee result=3ec5916c.024 errno=0
+func=log1pf op1=3ef11edf result=3ec594c8.03d errno=0
+func=log1pf op1=3ef128c4 result=3ec59b82.001 errno=0
+func=log1pf op1=3ef12ac1 result=3ec59cdc.04b errno=0
+func=log1pf op1=3ef12fea result=3ec5a05e.045 errno=0
+func=log1pf op1=3ef131e7 result=3ec5a1b8.05a errno=0
+func=log1pf op1=3ef134e1 result=3ec5a3be.00e errno=0
+func=log1pf op1=3ef1397a result=3ec5a6de.127 errno=0
+func=log1pf op1=3ef13ade result=3ec5a7d0.0f6 errno=0
+func=log1pf op1=3ef13c0d result=3ec5a89e.054 errno=0
+func=log1pf op1=3ef13d71 result=3ec5a990.016 errno=0
+func=log1pf op1=3ef14074 result=3ec5ab9c.12c errno=0
+func=log1pf op1=3ef146a0 result=3ec5afce.035 errno=0
+func=log1pf op1=3ef14a39 result=3ec5b240.024 errno=0
+func=log1pf op1=3ef14d39 result=3ec5b44a.00c errno=0
+func=log1pf op1=3ef152a3 result=3ec5b7f8.04d errno=0
+func=log1pf op1=3ef170a1 result=3ec5cc5a.021 errno=0
+func=log1pf op1=3ef17855 result=3ec5d196.0dc errno=0
+func=log1pf op1=3ef17ece result=3ec5d5fc.010 errno=0
+func=log1pf op1=3ef1810c result=3ec5d782.08e errno=0
+func=log1pf op1=3ef18da9 result=3ec5e014.0ae errno=0
+func=log1pf op1=3ef19054 result=3ec5e1e4.1a2 errno=0
+func=log1pf op1=3ef190ea result=3ec5e24a.048 errno=0
+func=log1pf op1=3ef1a739 result=3ec5f172.0d8 errno=0
+func=log1pf op1=3ef1a83c result=3ec5f222.018 errno=0
+func=log1pf op1=3ef1bbcc result=3ec5ff6c.09d errno=0
+func=log1pf op1=3ef1bd3c result=3ec60066.03a errno=0
+func=log1pf op1=3ef1d6ee result=3ec611da.056 errno=0
+func=log1pf op1=3ef1de36 result=3ec616cc.01b errno=0
+func=log1pf op1=3ef1e623 result=3ec61c2e.008 errno=0
+func=log1pf op1=3ef1e9b1 result=3ec61e98.029 errno=0
+func=log1pf op1=3ef1ee19 result=3ec62196.0d8 errno=0
+func=log1pf op1=3ef1f13a result=3ec623b6.039 errno=0
+func=log1pf op1=3ef1f1a7 result=3ec62400.091 errno=0
+func=log1pf op1=3ef1f214 result=3ec6244a.0e8 errno=0
+func=log1pf op1=3ef206e1 result=3ec6326a.09b errno=0
+func=log1pf op1=3ef21245 result=3ec63a26.012 errno=0
+func=log1pf op1=3ef217fd result=3ec63e08.048 errno=0
+func=log1pf op1=3ef2186a result=3ec63e52.063 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2.tst
new file mode 100644
index 000000000000..5d1eb9b877e8
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2.tst
@@ -0,0 +1,21 @@
+; Directed test cases for log2
+;
+; Copyright (c) 2018-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=log2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=log2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=log2 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=7fefffff.ffffffff result=408fffff.ffffffff.ffa errno=0
+func=log2 op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=3ff00000.00000000 result=00000000.00000000 errno=0
+func=log2 op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log2 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
+func=log2 op1=00000000.00000001 result=c090c800.00000000 errno=0
+func=log2 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
+func=log2 op1=40000000.00000000 result=3ff00000.00000000 errno=0
+func=log2 op1=3fe00000.00000000 result=bff00000.00000000 errno=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2f.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2f.tst
new file mode 100644
index 000000000000..4e08110878d6
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/log2f.tst
@@ -0,0 +1,27 @@
+; log2f.tst - Directed test cases for log2f
+;
+; Copyright (c) 2017-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=log2f op1=7fc00001 result=7fc00001 errno=0
+func=log2f op1=ffc00001 result=7fc00001 errno=0
+func=log2f op1=7f800001 result=7fc00001 errno=0 status=i
+func=log2f op1=ff800001 result=7fc00001 errno=0 status=i
+func=log2f op1=ff810000 result=7fc00001 errno=0 status=i
+func=log2f op1=7f800000 result=7f800000 errno=0
+func=log2f op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=log2f op1=3f800000 result=00000000 errno=0
+func=log2f op1=00000000 result=ff800000 errno=ERANGE status=z
+func=log2f op1=80000000 result=ff800000 errno=ERANGE status=z
+func=log2f op1=80000001 result=7fc00001 errno=EDOM status=i
+
+func=log2f op1=3f7d70a4 result=bc6d8f8b.7d4 error=0
+func=log2f op1=3f604189 result=be4394c8.395 error=0
+func=log2f op1=3f278034 result=bf1caa73.88e error=0
+func=log2f op1=3edd3c36 result=bf9af3b9.619 error=0
+func=log2f op1=3e61259a result=c00bdb95.650 error=0
+func=log2f op1=3f8147ae result=3c6b3267.d6a error=0
+func=log2f op1=3f8fbe77 result=3e2b5fe2.a1c error=0
+func=log2f op1=3fac3eea result=3edb4d5e.1fc error=0
+func=log2f op1=3fd6e632 result=3f3f5d3a.827 error=0
+func=log2f op1=40070838 result=3f89e055.a0a error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinh.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinh.tst
new file mode 100644
index 000000000000..d6a3da896693
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinh.tst
@@ -0,0 +1,21 @@
+; sinh.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=sinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=sinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=sinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=sinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=sinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
+func=sinh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
+func=sinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
+func=sinh op1=ffefffff.ffffffff result=fff00000.00000000 errno=ERANGE status=ox
+func=sinh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=sinh op1=80000000.00000000 result=80000000.00000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=sinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=sinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinhf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinhf.tst
new file mode 100644
index 000000000000..5f7bd1b04137
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/sinhf.tst
@@ -0,0 +1,21 @@
+; sinhf.tst
+;
+; Copyright (c) 2009-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=sinhf op1=7fc00001 result=7fc00001 errno=0
+func=sinhf op1=ffc00001 result=7fc00001 errno=0
+func=sinhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=sinhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=sinhf op1=7f800000 result=7f800000 errno=0
+func=sinhf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
+func=sinhf op1=ff800000 result=ff800000 errno=0
+func=sinhf op1=ff7fffff result=ff800000 errno=ERANGE status=ox
+func=sinhf op1=00000000 result=00000000 errno=0
+func=sinhf op1=80000000 result=80000000 errno=0
+
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=sinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=sinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanf.tst
new file mode 100644
index 000000000000..3161f70f4361
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanf.tst
@@ -0,0 +1,25 @@
+; tanf.tst
+;
+; Copyright (c) 2022-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=tanf op1=7fc00001 result=7fc00001 errno=0
+func=tanf op1=ffc00001 result=7fc00001 errno=0
+func=tanf op1=7f800001 result=7fc00001 errno=0 status=i
+func=tanf op1=ff800001 result=7fc00001 errno=0 status=i
+func=tanf op1=7f800000 result=7fc00001 errno=EDOM status=i
+func=tanf op1=ff800000 result=7fc00001 errno=EDOM status=i
+func=tanf op1=00000000 result=00000000 errno=0
+func=tanf op1=80000000 result=80000000 errno=0
+; SDCOMP-26094: check tanf in the cases for which the range reducer
+; returns values furthest beyond its nominal upper bound of pi/4.
+func=tanf op1=46427f1b result=3f80396d.599 error=0
+func=tanf op1=4647e568 result=3f8039a6.c9f error=0
+func=tanf op1=46428bac result=3f803a03.148 error=0
+func=tanf op1=4647f1f9 result=3f803a3c.852 error=0
+func=tanf op1=4647fe8a result=3f803ad2.410 error=0
+func=tanf op1=45d8d7f1 result=bf800669.901 error=0
+func=tanf op1=45d371a4 result=bf800686.3cd error=0
+func=tanf op1=45ce0b57 result=bf8006a2.e9a error=0
+func=tanf op1=45d35882 result=bf80071b.bc4 error=0
+func=tanf op1=45cdf235 result=bf800738.693 error=0
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanh.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanh.tst
new file mode 100644
index 000000000000..78776e6f3924
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanh.tst
@@ -0,0 +1,18 @@
+; tanh.tst
+;
+; Copyright (c) 1999-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=tanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
+func=tanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
+func=tanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=tanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
+func=tanh op1=7ff00000.00000000 result=3ff00000.00000000 errno=0
+func=tanh op1=fff00000.00000000 result=bff00000.00000000 errno=0
+func=tanh op1=00000000.00000000 result=00000000.00000000 errno=0
+func=tanh op1=80000000.00000000 result=80000000.00000000 errno=0
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+func=tanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
+func=tanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanhf.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanhf.tst
new file mode 100644
index 000000000000..603e3107e44f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/directed/tanhf.tst
@@ -0,0 +1,20 @@
+; tanhf.tst
+;
+; Copyright (c) 2007-2023, Arm Limited.
+; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+func=tanhf op1=7fc00001 result=7fc00001 errno=0
+func=tanhf op1=ffc00001 result=7fc00001 errno=0
+func=tanhf op1=7f800001 result=7fc00001 errno=0 status=i
+func=tanhf op1=ff800001 result=7fc00001 errno=0 status=i
+func=tanhf op1=7f800000 result=3f800000 errno=0
+func=tanhf op1=ff800000 result=bf800000 errno=0
+func=tanhf op1=00000000 result=00000000 errno=0
+func=tanhf op1=80000000 result=80000000 errno=0
+; No exception is raised with certain versions of glibc. Functions
+; approximated by x near zero may not generate/implement flops and
+; thus may not raise exceptions.
+; func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+; func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
+func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
+func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/random/double.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/random/double.tst
new file mode 100644
index 000000000000..d83283ef7864
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/random/double.tst
@@ -0,0 +1,6 @@
+!! double.tst - Random test case specification for DP functions
+!!
+!! Copyright (c) 1999-2023, Arm Limited.
+!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+test log10 10000
diff --git a/contrib/arm-optimized-routines/pl/math/test/testcases/random/float.tst b/contrib/arm-optimized-routines/pl/math/test/testcases/random/float.tst
new file mode 100644
index 000000000000..fa77efecfabb
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/testcases/random/float.tst
@@ -0,0 +1,8 @@
+!! float.tst - Random test case specification for SP functions
+!!
+!! Copyright (c) 2022-2023, Arm Limited.
+!! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+test erff 10000
+test log10f 10000
+test tanf 10000
diff --git a/contrib/arm-optimized-routines/pl/math/test/ulp_funcs.h b/contrib/arm-optimized-routines/pl/math/test/ulp_funcs.h
new file mode 100644
index 000000000000..4929b481ffe1
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/ulp_funcs.h
@@ -0,0 +1,70 @@
+/*
+ * Function entries for ulp.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#if defined(__vpcs) && __aarch64__
+
+#define _ZVF1(f) ZVF1 (f)
+#define _ZVD1(f) ZVD1 (f)
+#define _ZVF2(f) ZVF2 (f)
+#define _ZVD2(f) ZVD2 (f)
+
+#else
+
+#define _ZVF1(f)
+#define _ZVD1(f)
+#define _ZVF2(f)
+#define _ZVD2(f)
+
+#endif
+
+#if WANT_SVE_MATH
+
+#define _ZSVF1(f) ZSVF1 (f)
+#define _ZSVF2(f) ZSVF2 (f)
+#define _ZSVD1(f) ZSVD1 (f)
+#define _ZSVD2(f) ZSVD2 (f)
+
+#else
+
+#define _ZSVF1(f)
+#define _ZSVF2(f)
+#define _ZSVD1(f)
+#define _ZSVD2(f)
+
+#endif
+
+#define _ZSF1(f) F1 (f)
+#define _ZSF2(f) F2 (f)
+#define _ZSD1(f) D1 (f)
+#define _ZSD2(f) D2 (f)
+
+#include "ulp_funcs_gen.h"
+
+F (_ZGVnN4v_sincosf_sin, v_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
+F (_ZGVnN4v_sincosf_cos, v_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
+F (_ZGVnN4v_cexpif_sin, v_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
+F (_ZGVnN4v_cexpif_cos, v_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
+
+F (_ZGVnN2v_sincos_sin, v_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+F (_ZGVnN2v_sincos_cos, v_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+F (_ZGVnN2v_cexpi_sin, v_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+F (_ZGVnN2v_cexpi_cos, v_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+
+#if WANT_SVE_MATH
+F (_ZGVsMxvv_powk, Z_sv_powk, ref_powi, mpfr_powi, 2, 0, d2, 0)
+F (_ZGVsMxvv_powi, Z_sv_powi, ref_powif, mpfr_powi, 2, 1, f2, 0)
+
+F (_ZGVsMxv_sincosf_sin, sv_sincosf_sin, sin, mpfr_sin, 1, 1, f1, 0)
+F (_ZGVsMxv_sincosf_cos, sv_sincosf_cos, cos, mpfr_cos, 1, 1, f1, 0)
+F (_ZGVsMxv_cexpif_sin, sv_cexpif_sin, sin, mpfr_sin, 1, 1, f1, 0)
+F (_ZGVsMxv_cexpif_cos, sv_cexpif_cos, cos, mpfr_cos, 1, 1, f1, 0)
+
+F (_ZGVsMxv_sincos_sin, sv_sincos_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+F (_ZGVsMxv_sincos_cos, sv_sincos_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+F (_ZGVsMxv_cexpi_sin, sv_cexpi_sin, sinl, mpfr_sin, 1, 0, d1, 0)
+F (_ZGVsMxv_cexpi_cos, sv_cexpi_cos, cosl, mpfr_cos, 1, 0, d1, 0)
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/test/ulp_wrappers.h b/contrib/arm-optimized-routines/pl/math/test/ulp_wrappers.h
new file mode 100644
index 000000000000..0f7b68949c7b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/test/ulp_wrappers.h
@@ -0,0 +1,140 @@
+// clang-format off
+/*
+ * Function wrappers for ulp.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <arm_neon.h>
+
+#if USE_MPFR
+static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
+ mpfr_cos(y, x, r);
+ return mpfr_sin(y, x, r);
+}
+static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
+ mpfr_sin(y, x, r);
+ return mpfr_cos(y, x, r);
+}
+static int wrap_mpfr_powi(mpfr_t ret, const mpfr_t x, const mpfr_t y, mpfr_rnd_t rnd) {
+ mpfr_t y2;
+ mpfr_init(y2);
+ mpfr_trunc(y2, y);
+ return mpfr_pow(ret, x, y2, rnd);
+}
+#endif
+
+/* Our implementations of powi/powk are too imprecise to verify
+ against any established pow implementation. Instead we have the
+ following simple implementation, against which it is enough to
+ maintain bitwise reproducibility. Note the test framework expects
+ the reference impl to be of higher precision than the function
+ under test. For instance this means that the reference for
+ double-precision powi will be passed a long double, so to check
+ bitwise reproducibility we have to cast it back down to
+ double. This is fine since a round-trip to higher precision and
+ back down is correctly rounded. */
+#define DECL_POW_INT_REF(NAME, DBL_T, FLT_T, INT_T) \
+ static DBL_T __attribute__((unused)) NAME (DBL_T in_val, DBL_T y) \
+ { \
+ INT_T n = (INT_T) round (y); \
+ FLT_T acc = 1.0; \
+ bool want_recip = n < 0; \
+ n = n < 0 ? -n : n; \
+ \
+ for (FLT_T c = in_val; n; c *= c, n >>= 1) \
+ { \
+ if (n & 0x1) \
+ { \
+ acc *= c; \
+ } \
+ } \
+ if (want_recip) \
+ { \
+ acc = 1.0 / acc; \
+ } \
+ return acc; \
+ }
+
+DECL_POW_INT_REF(ref_powif, double, float, int)
+DECL_POW_INT_REF(ref_powi, long double, double, int)
+
+#define ZVF1_WRAP(func) static float Z_##func##f(float x) { return _ZGVnN4v_##func##f(argf(x))[0]; }
+#define ZVF2_WRAP(func) static float Z_##func##f(float x, float y) { return _ZGVnN4vv_##func##f(argf(x), argf(y))[0]; }
+#define ZVD1_WRAP(func) static double Z_##func(double x) { return _ZGVnN2v_##func(argd(x))[0]; }
+#define ZVD2_WRAP(func) static double Z_##func(double x, double y) { return _ZGVnN2vv_##func(argd(x), argd(y))[0]; }
+
+#if defined(__vpcs) && __aarch64__
+
+#define ZVNF1_WRAP(func) ZVF1_WRAP(func)
+#define ZVNF2_WRAP(func) ZVF2_WRAP(func)
+#define ZVND1_WRAP(func) ZVD1_WRAP(func)
+#define ZVND2_WRAP(func) ZVD2_WRAP(func)
+
+#else
+
+#define ZVNF1_WRAP(func)
+#define ZVNF2_WRAP(func)
+#define ZVND1_WRAP(func)
+#define ZVND2_WRAP(func)
+
+#endif
+
+#define ZSVF1_WRAP(func) static float Z_sv_##func##f(float x) { return svretf(_ZGVsMxv_##func##f(svargf(x), svptrue_b32())); }
+#define ZSVF2_WRAP(func) static float Z_sv_##func##f(float x, float y) { return svretf(_ZGVsMxvv_##func##f(svargf(x), svargf(y), svptrue_b32())); }
+#define ZSVD1_WRAP(func) static double Z_sv_##func(double x) { return svretd(_ZGVsMxv_##func(svargd(x), svptrue_b64())); }
+#define ZSVD2_WRAP(func) static double Z_sv_##func(double x, double y) { return svretd(_ZGVsMxvv_##func(svargd(x), svargd(y), svptrue_b64())); }
+
+#if WANT_SVE_MATH
+
+#define ZSVNF1_WRAP(func) ZSVF1_WRAP(func)
+#define ZSVNF2_WRAP(func) ZSVF2_WRAP(func)
+#define ZSVND1_WRAP(func) ZSVD1_WRAP(func)
+#define ZSVND2_WRAP(func) ZSVD2_WRAP(func)
+
+#else
+
+#define ZSVNF1_WRAP(func)
+#define ZSVNF2_WRAP(func)
+#define ZSVND1_WRAP(func)
+#define ZSVND2_WRAP(func)
+
+#endif
+
+/* No wrappers for scalar routines, but PL_SIG will emit them. */
+#define ZSNF1_WRAP(func)
+#define ZSNF2_WRAP(func)
+#define ZSND1_WRAP(func)
+#define ZSND2_WRAP(func)
+
+#include "ulp_wrappers_gen.h"
+
+float v_sincosf_sin(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return s[0]; }
+float v_sincosf_cos(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return c[0]; }
+float v_cexpif_sin(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[0][0]; }
+float v_cexpif_cos(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[1][0]; }
+
+double v_sincos_sin(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return s[0]; }
+double v_sincos_cos(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return c[0]; }
+double v_cexpi_sin(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[0][0]; }
+double v_cexpi_cos(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[1][0]; }
+
+#if WANT_SVE_MATH
+static float Z_sv_powi(float x, float y) { return svretf(_ZGVsMxvv_powi(svargf(x), svdup_s32((int)round(y)), svptrue_b32())); }
+static double Z_sv_powk(double x, double y) { return svretd(_ZGVsMxvv_powk(svargd(x), svdup_s64((long)round(y)), svptrue_b64())); }
+
+float sv_sincosf_sin(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return s[0]; }
+float sv_sincosf_cos(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return c[0]; }
+float sv_cexpif_sin(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 0)); }
+float sv_cexpif_cos(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 1)); }
+
+double sv_sincos_sin(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return s[0]; }
+double sv_sincos_cos(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return c[0]; }
+double sv_cexpi_sin(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 0)); }
+double sv_cexpi_cos(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 1)); }
+
+#endif
+// clang-format on
diff --git a/contrib/arm-optimized-routines/pl/math/tools/asin.sollya b/contrib/arm-optimized-routines/pl/math/tools/asin.sollya
new file mode 100644
index 000000000000..8ef861d0898b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/asin.sollya
@@ -0,0 +1,29 @@
+// polynomial for approximating asin(x)
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+f = asin(x);
+dtype = double;
+
+prec=256;
+
+a = 0x1p-106;
+b = 0.25;
+
+deg = 11;
+
+backward = proc(poly, d) {
+ return d + d ^ 3 * poly(d * d);
+};
+
+forward = proc(f, d) {
+ return (f(sqrt(d))-sqrt(d))/(d*sqrt(d));
+};
+
+poly = fpminimax(forward(f, x), [|0,...,deg|], [|dtype ...|], [a;b], relative, floating);
+
+display = hexadecimal!;
+print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b]));
+print("in [", a, b, "]");
+for i from 0 to deg do print(coeff(poly, i));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/asinf.sollya b/contrib/arm-optimized-routines/pl/math/tools/asinf.sollya
new file mode 100644
index 000000000000..5b627e546c73
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/asinf.sollya
@@ -0,0 +1,36 @@
+// polynomial for approximating asinf(x)
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+f = asin(x);
+dtype = single;
+
+a = 0x1p-24;
+b = 0.25;
+
+deg = 4;
+
+backward = proc(poly, d) {
+ return d + d ^ 3 * poly(d * d);
+};
+
+forward = proc(f, d) {
+ return (f(sqrt(d))-sqrt(d))/(d*sqrt(d));
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16);
+};
+
+poly = 0;
+for i from 0 to deg do {
+ i;
+ p = roundcoefficients(approx(poly,i), [|dtype ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal!;
+print("rel error:", accurateinfnorm(1-backward(poly, x)/f(x), [a;b], 30));
+print("in [", a, b, "]");
+for i from 0 to deg do print(coeff(poly, i));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/asinh.sollya b/contrib/arm-optimized-routines/pl/math/tools/asinh.sollya
new file mode 100644
index 000000000000..663ee92f3f34
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/asinh.sollya
@@ -0,0 +1,28 @@
+// polynomial for approximating asinh(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// Polynomial is used in [2^-26, 1]. However it is least accurate close to 1, so
+// we use 2^-6 as the lower bound for coeff generation, which yields sufficiently
+// accurate results in [2^-26, 2^-6].
+a = 0x1p-6;
+b = 1.0;
+
+f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2);
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = 0;
+for i from 0 to deg do {
+ i;
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+
+display = hexadecimal;
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/asinhf.sollya b/contrib/arm-optimized-routines/pl/math/tools/asinhf.sollya
new file mode 100644
index 000000000000..ab115b53b8dc
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/asinhf.sollya
@@ -0,0 +1,29 @@
+// polynomial for approximating asinh(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9;
+
+a = 0x1.0p-12;
+b = 1.0;
+
+f = proc(y) {
+ return asinh(x);
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = x;
+for i from 2 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/atan.sollya b/contrib/arm-optimized-routines/pl/math/tools/atan.sollya
new file mode 100644
index 000000000000..ad4f33b8516a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/atan.sollya
@@ -0,0 +1,23 @@
+// polynomial for approximating atan(x) and atan2(y, x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// atan is odd, so approximate with an odd polynomial:
+// x + ax^3 + bx^5 + cx^7 + ...
+// We generate a, b, c, ... such that we can approximate atan(x) by:
+// x + x^3 * (a + bx^2 + cx^4 + ...)
+
+// Assemble monomials
+deg = 20;
+mons = [|1,...,deg|];
+for i from 0 to deg-1 do mons[i] = mons[i] * 2 + 1;
+
+a = 0x1.0p-1022;
+b = 1;
+
+poly = fpminimax(atan(x)-x, mons, [|double ...|], [a;b]);
+
+display = hexadecimal;
+print("coeffs:");
+for i from 0 to deg-1 do coeff(poly,mons[i]);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/atanf.sollya b/contrib/arm-optimized-routines/pl/math/tools/atanf.sollya
new file mode 100644
index 000000000000..ed88d0ba90f9
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/atanf.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating atanf(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// Generate list of monomials:
+// Taylor series of atan is of the form x + ax^3 + bx^5 + cx^7 + ...
+// So generate a, b, c, ... such that we can approximate atan(x) by:
+// x + x^3 * (a + bx^2 + cx^4 + ...)
+
+deg = 7;
+
+a = 1.1754943508222875e-38;
+b = 1;
+
+poly = fpminimax((atan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/cbrt.sollya b/contrib/arm-optimized-routines/pl/math/tools/cbrt.sollya
new file mode 100644
index 000000000000..1d43dc73d8cd
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/cbrt.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating cbrt(x) in double precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 3;
+
+a = 0.5;
+b = 1;
+
+
+f = x^(1/3);
+
+poly = fpminimax(f, deg, [|double ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do round(coeff(poly,i), D, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/cbrtf.sollya b/contrib/arm-optimized-routines/pl/math/tools/cbrtf.sollya
new file mode 100644
index 000000000000..4e0cc69b46a5
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/cbrtf.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating cbrt(x) in single precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 3;
+
+a = 0.5;
+b = 1;
+
+
+f = x^(1/3);
+
+poly = fpminimax(f, deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do round(coeff(poly,i), SG, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/erf.sollya b/contrib/arm-optimized-routines/pl/math/tools/erf.sollya
new file mode 100644
index 000000000000..b2fc559b511e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/erf.sollya
@@ -0,0 +1,25 @@
+// tables and constants for approximating erf(x).
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+display = hexadecimal;
+prec=128;
+
+// Tables
+print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}");
+for i from 0 to 768 do {
+ r = i / 128;
+ t0 = double(erf(r));
+ t1 = double(2/sqrt(pi) * exp(-r * r));
+ print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },");
+};
+
+// Constants
+double(1/3);
+double(1/10);
+double(2/15);
+double(2/9);
+double(2/45);
+double(2/sqrt(pi));
+
diff --git a/contrib/arm-optimized-routines/pl/math/tools/erfc.sollya b/contrib/arm-optimized-routines/pl/math/tools/erfc.sollya
new file mode 100644
index 000000000000..1e2791291ebb
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/erfc.sollya
@@ -0,0 +1,51 @@
+// tables and constants for approximating erfc(x).
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+display = hexadecimal;
+prec=128;
+
+// Tables
+print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }");
+for i from 0 to 3787 do {
+ r = 0.0 + i / 128;
+ t0 = double(erfc(r) * 2^128);
+ t1 = double(2/sqrt(pi) * exp(-r * r) * 2^128);
+ print("{ " @ t0 @ ",\t" @ t1 @ " },");
+};
+
+// Constants
+print("> 2/sqrt(pi)");
+double(2/sqrt(pi));
+
+print("> 1/3");
+double(1/3);
+
+print("> P5");
+double(2/15);
+double(1/10);
+double(2/9);
+double(2/45);
+
+print("> P6");
+double(1/42);
+double(1/7);
+double(2/21);
+double(4/315);
+
+print("> Q");
+double( 5.0 / 4.0);
+double( 6.0 / 5.0);
+double( 7.0 / 6.0);
+double( 8.0 / 7.0);
+double( 9.0 / 8.0);
+double(10.0 / 9.0);
+
+print("> R");
+double(-2.0 * 4.0 / (5.0 * 6.0));
+double(-2.0 * 5.0 / (6.0 * 7.0));
+double(-2.0 * 6.0 / (7.0 * 8.0));
+double(-2.0 * 7.0 / (8.0 * 9.0));
+double(-2.0 * 8.0 / (9.0 * 10.0));
+double(-2.0 * 9.0 / (10.0 * 11.0));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/erfcf.sollya b/contrib/arm-optimized-routines/pl/math/tools/erfcf.sollya
new file mode 100644
index 000000000000..1d7fc264d99d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/erfcf.sollya
@@ -0,0 +1,22 @@
+// tables and constants for approximating erfcf(x).
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+display = hexadecimal;
+prec=128;
+
+// Tables
+print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }");
+for i from 0 to 644 do {
+ r = 0.0 + i / 64;
+ t0 = single(erfc(r) * 2^47);
+ t1 = single(2/sqrt(pi) * exp(-r * r) * 2^47);
+ print("{ " @ t0 @ ",\t" @ t1 @ " },");
+};
+
+// Constants
+single(1/3);
+single(2/15);
+single(1/10);
+single(2/sqrt(pi));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/erff.sollya b/contrib/arm-optimized-routines/pl/math/tools/erff.sollya
new file mode 100644
index 000000000000..59b23ef021f0
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/erff.sollya
@@ -0,0 +1,20 @@
+// tables and constants for approximating erff(x).
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+display = hexadecimal;
+prec=128;
+
+// Tables
+print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}");
+for i from 0 to 512 do {
+ r = i / 128;
+ t0 = single(erf(r));
+ t1 = single(2/sqrt(pi) * exp(-r * r));
+ print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },");
+};
+
+// Constants
+single(1/3);
+single(2/sqrt(pi));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/exp10.sollya b/contrib/arm-optimized-routines/pl/math/tools/exp10.sollya
new file mode 100644
index 000000000000..9f30b4018209
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/exp10.sollya
@@ -0,0 +1,55 @@
+// polynomial for approximating 10^x
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// exp10f parameters
+deg = 5; // poly degree
+N = 1; // Neon 1, SVE 64
+b = log(2)/(2 * N * log(10)); // interval
+a = -b;
+wp = single;
+
+// exp10 parameters
+//deg = 4; // poly degree - bump to 5 for ~1 ULP
+//N = 128; // table size
+//b = log(2)/(2 * N * log(10)); // interval
+//a = -b;
+//wp = D;
+
+
+// find polynomial with minimal relative error
+
+f = 10^x;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx_abs = proc(poly,d) {
+ return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|wp ...|]);
+// p = roundcoefficients(approx_abs(poly,i), [|wp ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/10^x, [a;b], 30));
+print("abs error:", accurateinfnorm(10^x-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
+
+log10_2 = round(N * log(10) / log(2), wp, RN);
+log2_10 = log(2) / (N * log(10));
+log2_10_hi = round(log2_10, wp, RN);
+log2_10_lo = round(log2_10 - log2_10_hi, wp, RN);
+print(log10_2);
+print(log2_10_hi);
+print(log2_10_lo);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/expm1.sollya b/contrib/arm-optimized-routines/pl/math/tools/expm1.sollya
new file mode 100644
index 000000000000..7b6f324eb247
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/expm1.sollya
@@ -0,0 +1,21 @@
+// polynomial for approximating exp(x)-1 in double precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 12;
+
+a = -log(2)/2;
+b = log(2)/2;
+
+f = proc(y) {
+ return exp(y)-1;
+};
+
+poly = fpminimax(f(x), deg, [|double ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do round(coeff(poly,i), D, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/expm1f.sollya b/contrib/arm-optimized-routines/pl/math/tools/expm1f.sollya
new file mode 100644
index 000000000000..efdf1bd301e0
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/expm1f.sollya
@@ -0,0 +1,21 @@
+// polynomial for approximating exp(x)-1 in single precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 5;
+
+a = -log(2)/2;
+b = log(2)/2;
+
+f = proc(y) {
+ return exp(y)-1;
+};
+
+poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do round(coeff(poly,i), SG, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/log10.sollya b/contrib/arm-optimized-routines/pl/math/tools/log10.sollya
new file mode 100644
index 000000000000..85d1d15c1698
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/log10.sollya
@@ -0,0 +1,44 @@
+// polynomial for approximating log10(1+x)
+//
+// Copyright (c) 2019-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 6; // poly degree
+// |log10(1+x)| > 0x1p-5 outside the interval
+a = -0x1.p-5;
+b = 0x1.p-5;
+
+ln10 = evaluate(log(10),0);
+invln10hi = double(1/ln10 + 0x1p21) - 0x1p21; // round away last 21 bits
+invln10lo = double(1/ln10 - invln10hi);
+
+// find log10(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log10(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f/ln10;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln10hi + invln10lo;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+display = hexadecimal;
+print("invln10hi:", invln10hi);
+print("invln10lo:", invln10lo);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
+
+display = decimal;
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/pl/math/tools/log10f.sollya b/contrib/arm-optimized-routines/pl/math/tools/log10f.sollya
new file mode 100644
index 000000000000..94bf32f2c449
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/log10f.sollya
@@ -0,0 +1,37 @@
+// polynomial for approximating log10f(1+x)
+//
+// Copyright (c) 2019-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// Computation of log10f(1+x) will be carried out in double precision
+
+deg = 4; // poly degree
+// [OFF; 2*OFF] is divided in 2^4 intervals with OFF~0.7
+a = -0.04375;
+b = 0.04375;
+
+// find log(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do double(coeff(poly,i));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/log1p.sollya b/contrib/arm-optimized-routines/pl/math/tools/log1p.sollya
new file mode 100644
index 000000000000..598a36af0339
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/log1p.sollya
@@ -0,0 +1,30 @@
+// polynomial for approximating log(1+x) in double precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 20;
+
+a = sqrt(2)/2-1;
+b = sqrt(2)-1;
+
+f = proc(y) {
+ return log(1+y);
+};
+
+approx = proc(poly, d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+poly = x;
+for i from 2 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+
+print("coeffs:");
+display = hexadecimal;
+for i from 2 to deg do coeff(poly,i);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/pl/math/tools/log1pf.sollya b/contrib/arm-optimized-routines/pl/math/tools/log1pf.sollya
new file mode 100644
index 000000000000..cc1db10e4c0c
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/log1pf.sollya
@@ -0,0 +1,21 @@
+// polynomial for approximating log(1+x) in single precision
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 10;
+
+a = -0.25;
+b = 0.5;
+
+f = proc(y) {
+ return log(1+y);
+};
+
+poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 2 to deg do round(coeff(poly,i), SG, RN);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/sincos.sollya b/contrib/arm-optimized-routines/pl/math/tools/sincos.sollya
new file mode 100644
index 000000000000..7d36266b446b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/sincos.sollya
@@ -0,0 +1,33 @@
+// polynomial for approximating cos(x)
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// This script only finds the coeffs for cos - see math/aarch64/v_sin.c for sin coeffs
+
+deg = 14; // polynomial degree
+a = -pi/4; // interval
+b = pi/4;
+
+// find even polynomial with minimal abs error compared to cos(x)
+
+f = cos(x);
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+ return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg/2 do {
+ p = roundcoefficients(approx(poly,2*i), [|double ...|]);
+ poly = poly + x^(2*i)*coeff(p,0);
+};
+
+display = hexadecimal;
+//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+//print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/sincosf.sollya b/contrib/arm-optimized-routines/pl/math/tools/sincosf.sollya
new file mode 100644
index 000000000000..178ee83ac196
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/sincosf.sollya
@@ -0,0 +1,33 @@
+// polynomial for approximating cos(x)
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+// This script only finds the coeffs for cos - see math/tools/sin.sollya for sin coeffs.
+
+deg = 8; // polynomial degree
+a = -pi/4; // interval
+b = pi/4;
+
+// find even polynomial with minimal abs error compared to cos(x)
+
+f = cos(x);
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+ return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg/2 do {
+ p = roundcoefficients(approx(poly,2*i), [|single ...|]);
+ poly = poly + x^(2*i)*coeff(p,0);
+};
+
+display = hexadecimal;
+//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+//print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/sinpi.sollya b/contrib/arm-optimized-routines/pl/math/tools/sinpi.sollya
new file mode 100644
index 000000000000..62cc87e7697d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/sinpi.sollya
@@ -0,0 +1,33 @@
+// polynomial for approximating sinpi(x)
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 19; // polynomial degree
+a = -1/2; // interval
+b = 1/2;
+
+// find even polynomial with minimal abs error compared to sinpi(x)
+
+// f = sin(pi* x);
+f = pi*x;
+c = 1;
+for i from 1 to 80 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*(pi*x)^(2*i+1)/c; };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|
+approx = proc(poly,d) {
+ return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
+};
+
+// first coeff is predefine, iteratively find optimal double prec coeffs
+poly = pi*x;
+for i from 0 to (deg-1)/2 do {
+ p = roundcoefficients(approx(poly,2*i+1), [|D ...|]);
+ poly = poly + x^(2*i+1)*coeff(p,0);
+};
+
+display = hexadecimal;
+print("abs error:", accurateinfnorm(sin(pi*x)-poly(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/tan.sollya b/contrib/arm-optimized-routines/pl/math/tools/tan.sollya
new file mode 100644
index 000000000000..bb0bb28270e3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/tan.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating double precision tan(x)
+//
+// Copyright (c) 2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 8;
+
+// interval bounds
+a = 0x1.0p-126;
+b = pi / 8;
+
+display = hexadecimal;
+
+f = (tan(sqrt(x))-sqrt(x))/x^(3/2);
+poly = fpminimax(f, deg, [|double ...|], [a*a;b*b]);
+
+//print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/tanf.sollya b/contrib/arm-optimized-routines/pl/math/tools/tanf.sollya
new file mode 100644
index 000000000000..f4b49b40ae64
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/tanf.sollya
@@ -0,0 +1,78 @@
+// polynomial for approximating single precision tan(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+dtype = single;
+
+mthd = 0; // approximate tan
+deg = 5; // poly degree
+
+// // Uncomment for cotan
+// mthd = 1; // approximate cotan
+// deg = 3; // poly degree
+
+// interval bounds
+a = 0x1.0p-126;
+b = pi / 4;
+
+print("Print some useful constants");
+display = hexadecimal!;
+if (dtype==double) then { prec = 53!; }
+else if (dtype==single) then { prec = 23!; };
+
+print("pi/4");
+pi/4;
+
+// Setup precisions (display and computation)
+display = decimal!;
+prec=128!;
+save_prec=prec;
+
+//
+// Select function to approximate with Sollya
+//
+if(mthd==0) then {
+ s = "x + x^3 * P(x^2)";
+ g = tan(x);
+ F = proc(P) { return x + x^3 * P(x^2); };
+ f = (g(sqrt(x))-sqrt(x))/(x*sqrt(x));
+ init_poly = 0;
+ // Display info
+ print("Approximate g(x) =", g, "as F(x)=", s, ".");
+ poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]);
+}
+else if (mthd==1) then {
+ s = "1/x + x * P(x^2)";
+ g = 1 / tan(x);
+ F = proc(P) { return 1/x + x * P(x^2); };
+ f = (g(sqrt(x))-1/sqrt(x))/(sqrt(x));
+ init_poly = 0;
+ deg_init_poly = -1; // a value such that we actually start by building constant coefficient
+ // Display info
+ print("Approximate g(x) =", g, "as F(x)=", s, ".");
+ // Fpminimax used to minimise absolute error
+ approx_fpminimax = proc(func, poly, d) {
+ return fpminimax(func - poly / x^-(deg-d), 0, [|dtype|], [a;b], absolute, floating);
+ };
+ // Optimise all coefficients at once
+ poly = fpminimax(f, [|0,...,deg|], [|dtype ...|], [a;b], absolute, floating);
+};
+
+
+//
+// Display coefficients in Sollya
+//
+display = hexadecimal!;
+if (dtype==double) then { prec = 53!; }
+else if (dtype==single) then { prec = 23!; };
+print("_coeffs :_ hex");
+for i from 0 to deg do coeff(poly, i);
+
+// Compute errors
+display = hexadecimal!;
+d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]);
+d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]);
+print("dirty rel error:", d_rel_err);
+print("dirty abs error:", d_abs_err);
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_erf.sollya b/contrib/arm-optimized-routines/pl/math/tools/v_erf.sollya
new file mode 100644
index 000000000000..394ba377df12
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/v_erf.sollya
@@ -0,0 +1,20 @@
+// polynomial for approximating erf(x).
+// To generate coefficients for interval i (0 to 47) do:
+// $ sollya v_erf.sollya $i
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+scale = 1/8;
+deg = 9;
+
+itv = parse(__argv[0]);
+if (itv == 0) then { a = 0x1p-1022; }
+else { a = itv * scale; };
+
+prec=256;
+
+poly = fpminimax(erf(scale*x+a), deg, [|D ...|], [0; 1]);
+
+display = hexadecimal;
+for i from 0 to deg do coeff(poly, i); \ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_erfc.sollya b/contrib/arm-optimized-routines/pl/math/tools/v_erfc.sollya
new file mode 100644
index 000000000000..3b03ba07863d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/v_erfc.sollya
@@ -0,0 +1,46 @@
+// polynomial for approximating erfc(x)*exp(x*x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 12; // poly degree
+
+itv = parse(__argv[0]);
+
+bounds = [|3.725290298461914e-9,
+ 0.18920711500272103,
+ 0.41421356237309515,
+ 0.681792830507429,
+ 1,
+ 1.378414230005442,
+ 1.8284271247461903,
+ 2.363585661014858,
+ 3,
+ 3.756828460010884,
+ 4.656854249492381,
+ 5.727171322029716,
+ 7,
+ 8.513656920021768,
+ 10.313708498984761,
+ 12.454342644059432,
+ 15,
+ 18.027313840043536,
+ 21.627416997969522,
+ 25.908685288118864,
+ 31|];
+
+a = bounds[itv];
+b = bounds[itv + 1];
+
+f = proc(y) {
+ t = y + a;
+ return erfc(t) * exp(t*t);
+};
+
+poly = fpminimax(f(x), deg, [|double ...|], [0;b-a]);
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly, i);
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_log10.sollya b/contrib/arm-optimized-routines/pl/math/tools/v_log10.sollya
new file mode 100644
index 000000000000..e2df4364ada0
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/v_log10.sollya
@@ -0,0 +1,38 @@
+// polynomial used for __v_log10(x)
+//
+// Copyright (c) 2019-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 6; // poly degree
+a = -0x1.fc1p-9;
+b = 0x1.009p-8;
+
+// find log(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = 1;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|D ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+// scale coefficients by 1/ln(10)
+ln10 = evaluate(log(10),0);
+poly = poly/ln10;
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do double(coeff(poly,i));
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_log10f.sollya b/contrib/arm-optimized-routines/pl/math/tools/v_log10f.sollya
new file mode 100644
index 000000000000..396d5a92302b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/v_log10f.sollya
@@ -0,0 +1,45 @@
+// polynomial for approximating v_log10f(1+x)
+//
+// Copyright (c) 2019-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9; // poly degree
+// |log10(1+x)| > 0x1p-4 outside the interval
+a = -1/3;
+b = 1/3;
+
+display = hexadecimal;
+print("log10(2) = ", single(log10(2)));
+
+ln10 = evaluate(log(10),0);
+invln10 = single(1/ln10);
+
+// find log10(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log10(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f/ln10;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln10;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+display = hexadecimal;
+print("invln10:", invln10);
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do single(coeff(poly,i));
+
+display = decimal;
+print("in [",a,b,"]");
diff --git a/contrib/arm-optimized-routines/pl/math/tools/v_log2f.sollya b/contrib/arm-optimized-routines/pl/math/tools/v_log2f.sollya
new file mode 100644
index 000000000000..99e050c91b03
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/tools/v_log2f.sollya
@@ -0,0 +1,38 @@
+// polynomial used for __v_log2f(x)
+//
+// Copyright (c) 2022-2023, Arm Limited.
+// SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+
+deg = 9; // poly degree
+a = -1/3;
+b = 1/3;
+
+ln2 = evaluate(log(2),0);
+invln2 = single(1/ln2);
+
+// find log2(1+x)/x polynomial with minimal relative error
+// (minimal relative error polynomial for log2(1+x) is the same * x)
+deg = deg-1; // because of /x
+
+// f = log2(1+x)/x; using taylor series
+f = 0;
+for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
+f = f * invln2;
+
+// return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
+approx = proc(poly,d) {
+ return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
+};
+
+// first coeff is fixed, iteratively find optimal double prec coeffs
+poly = invln2;
+for i from 1 to deg do {
+ p = roundcoefficients(approx(poly,i), [|SG ...|]);
+ poly = poly + x^i*coeff(p,0);
+};
+
+display = hexadecimal;
+print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
+print("in [",a,b,"]");
+print("coeffs:");
+for i from 0 to deg do coeff(poly,i);
diff --git a/contrib/arm-optimized-routines/pl/math/trigpi_references.c b/contrib/arm-optimized-routines/pl/math/trigpi_references.c
new file mode 100644
index 000000000000..4b0514b6766a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/trigpi_references.c
@@ -0,0 +1,57 @@
+/*
+ * Extended precision scalar reference functions for trigpi.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#define _GNU_SOURCE
+#include "math_config.h"
+#include "mathlib.h"
+
+long double
+sinpil (long double x)
+{
+ /* sin(inf) should return nan, as defined by C23. */
+ if (isinf (x))
+ return __math_invalid (x);
+
+ long double ax = fabsl (x);
+
+ /* Return 0 for all values above 2^64 to prevent
+ overflow when casting to uint64_t. */
+ if (ax >= 0x1p64)
+ return 0;
+
+ /* All integer cases should return 0. */
+ if (ax == (uint64_t) ax)
+ return 0;
+
+ return sinl (x * M_PIl);
+}
+
+long double
+cospil (long double x)
+{
+ /* cos(inf) should return nan, as defined by C23. */
+ if (isinf (x))
+ return __math_invalid (x);
+
+ long double ax = fabsl (x);
+
+ if (ax >= 0x1p64)
+ return 1;
+
+ uint64_t m = (uint64_t) ax;
+
+ /* Integer values of cospi(x) should return +/-1.
+ The sign depends on if x is odd or even. */
+ if (m == ax)
+ return (m & 1) ? -1 : 1;
+
+ /* Values of Integer + 0.5 should always return 0. */
+ if (ax - 0.5 == m || ax + 0.5 == m)
+ return 0;
+
+ return cosl (ax * M_PIl);
+} \ No newline at end of file
diff --git a/contrib/arm-optimized-routines/pl/math/v_acos_2u.c b/contrib/arm-optimized-routines/pl/math/v_acos_2u.c
new file mode 100644
index 000000000000..581f8506c0d6
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_acos_2u.c
@@ -0,0 +1,122 @@
+/*
+ * Double-precision vector acos(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t poly[12];
+ float64x2_t pi, pi_over_2;
+ uint64x2_t abs_mask;
+} data = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
+ on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */
+ .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4),
+ V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6),
+ V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6),
+ V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7),
+ V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6),
+ V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), },
+ .pi = V2 (0x1.921fb54442d18p+1),
+ .pi_over_2 = V2 (0x1.921fb54442d18p+0),
+ .abs_mask = V2 (0x7fffffffffffffff),
+};
+
+#define AllMask v_u64 (0xffffffffffffffff)
+#define Oneu (0x3ff0000000000000)
+#define Small (0x3e50000000000000) /* 2^-53. */
+
+#if WANT_SIMD_EXCEPT
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (acos, x, y, special);
+}
+#endif
+
+/* Double-precision implementation of vector acos(x).
+
+ For |x| < Small, approximate acos(x) by pi/2 - x. Small = 2^-53 for correct
+ rounding.
+ If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the following
+ approximation.
+
+ For |x| in [Small, 0.5], use an order 11 polynomial P such that the final
+ approximation of asin is an odd polynomial:
+
+ acos(x) ~ pi/2 - (x + x^3 P(x^2)).
+
+ The largest observed error in this region is 1.18 ulps,
+ _ZGVnN2v_acos (0x1.fbab0a7c460f6p-2) got 0x1.0d54d1985c068p+0
+ want 0x1.0d54d1985c069p+0.
+
+ For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+ acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z).
+
+ The largest observed error in this region is 1.52 ulps,
+ _ZGVnN2v_acos (0x1.23d362722f591p-1) got 0x1.edbbedf8a7d6ep-1
+ want 0x1.edbbedf8a7d6cp-1. */
+float64x2_t VPCS_ATTR V_NAME_D1 (acos) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+
+#if WANT_SIMD_EXCEPT
+ /* A single comparison for One, Small and QNaN. */
+ uint64x2_t special
+ = vcgtq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (ax), v_u64 (Small)),
+ v_u64 (Oneu - Small));
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x, x, AllMask);
+#endif
+
+ uint64x2_t a_le_half = vcleq_f64 (ax, v_f64 (0.5));
+
+ /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+ z2 = x ^ 2 and z = |x| , if |x| < 0.5
+ z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */
+ float64x2_t z2 = vbslq_f64 (a_le_half, vmulq_f64 (x, x),
+ vfmaq_f64 (v_f64 (0.5), v_f64 (-0.5), ax));
+ float64x2_t z = vbslq_f64 (a_le_half, ax, vsqrtq_f64 (z2));
+
+ /* Use a single polynomial approximation P for both intervals. */
+ float64x2_t z4 = vmulq_f64 (z2, z2);
+ float64x2_t z8 = vmulq_f64 (z4, z4);
+ float64x2_t z16 = vmulq_f64 (z8, z8);
+ float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly);
+
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
+
+ /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5
+ = 2 Q(|x|) , for 0.5 < x < 1.0
+ = pi - 2 Q(|x|) , for -1.0 < x < -0.5. */
+ float64x2_t y = vbslq_f64 (d->abs_mask, p, x);
+
+ uint64x2_t is_neg = vcltzq_f64 (x);
+ float64x2_t off = vreinterpretq_f64_u64 (
+ vandq_u64 (is_neg, vreinterpretq_u64_f64 (d->pi)));
+ float64x2_t mul = vbslq_f64 (a_le_half, v_f64 (-1.0), v_f64 (2.0));
+ float64x2_t add = vbslq_f64 (a_le_half, d->pi_over_2, off);
+
+ return vfmaq_f64 (add, mul, y);
+}
+
+PL_SIG (V, D, 1, acos, -1.0, 1.0)
+PL_TEST_ULP (V_NAME_D1 (acos), 1.02)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (acos), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME_D1 (acos), 0, Small, 5000)
+PL_TEST_INTERVAL (V_NAME_D1 (acos), Small, 0.5, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (acos), 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (acos), 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (acos), 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (V_NAME_D1 (acos), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_acosf_1u4.c b/contrib/arm-optimized-routines/pl/math/v_acosf_1u4.c
new file mode 100644
index 000000000000..bb17b1df18f3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_acosf_1u4.c
@@ -0,0 +1,113 @@
+/*
+ * Single-precision vector acos(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float32x4_t poly[5];
+ float32x4_t pi_over_2f, pif;
+} data = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on
+ [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */
+ .poly = { V4 (0x1.55555ep-3), V4 (0x1.33261ap-4), V4 (0x1.70d7dcp-5),
+ V4 (0x1.b059dp-6), V4 (0x1.3af7d8p-5) },
+ .pi_over_2f = V4 (0x1.921fb6p+0f),
+ .pif = V4 (0x1.921fb6p+1f),
+};
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define One 0x3f800000
+#define Small 0x32800000 /* 2^-26. */
+
+#if WANT_SIMD_EXCEPT
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (acosf, x, y, special);
+}
+#endif
+
+/* Single-precision implementation of vector acos(x).
+
+ For |x| < Small, approximate acos(x) by pi/2 - x. Small = 2^-26 for correct
+ rounding.
+ If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the following
+ approximation.
+
+ For |x| in [Small, 0.5], use order 4 polynomial P such that the final
+ approximation of asin is an odd polynomial:
+
+ acos(x) ~ pi/2 - (x + x^3 P(x^2)).
+
+ The largest observed error in this region is 1.26 ulps,
+ _ZGVnN4v_acosf (0x1.843bfcp-2) got 0x1.2e934cp+0 want 0x1.2e934ap+0.
+
+ For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+ acos(x) = y + y * z * P(z), with z = (1-x)/2 and y = sqrt(z).
+
+ The largest observed error in this region is 1.32 ulps,
+ _ZGVnN4v_acosf (0x1.15ba56p-1) got 0x1.feb33p-1
+ want 0x1.feb32ep-1. */
+float32x4_t VPCS_ATTR V_NAME_F1 (acos) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask));
+
+#if WANT_SIMD_EXCEPT
+ /* A single comparison for One, Small and QNaN. */
+ uint32x4_t special
+ = vcgtq_u32 (vsubq_u32 (ia, v_u32 (Small)), v_u32 (One - Small));
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x, x, v_u32 (0xffffffff));
+#endif
+
+ float32x4_t ax = vreinterpretq_f32_u32 (ia);
+ uint32x4_t a_le_half = vcleq_u32 (ia, v_u32 (Half));
+
+ /* Evaluate polynomial Q(x) = z + z * z2 * P(z2) with
+ z2 = x ^ 2 and z = |x| , if |x| < 0.5
+ z2 = (1 - |x|) / 2 and z = sqrt(z2), if |x| >= 0.5. */
+ float32x4_t z2 = vbslq_f32 (a_le_half, vmulq_f32 (x, x),
+ vfmsq_n_f32 (v_f32 (0.5), ax, 0.5));
+ float32x4_t z = vbslq_f32 (a_le_half, ax, vsqrtq_f32 (z2));
+
+ /* Use a single polynomial approximation P for both intervals. */
+ float32x4_t p = v_horner_4_f32 (z2, d->poly);
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = vfmaq_f32 (z, vmulq_f32 (z, z2), p);
+
+ /* acos(|x|) = pi/2 - sign(x) * Q(|x|), for |x| < 0.5
+ = 2 Q(|x|) , for 0.5 < x < 1.0
+ = pi - 2 Q(|x|) , for -1.0 < x < -0.5. */
+ float32x4_t y = vbslq_f32 (v_u32 (AbsMask), p, x);
+
+ uint32x4_t is_neg = vcltzq_f32 (x);
+ float32x4_t off = vreinterpretq_f32_u32 (
+ vandq_u32 (vreinterpretq_u32_f32 (d->pif), is_neg));
+ float32x4_t mul = vbslq_f32 (a_le_half, v_f32 (-1.0), v_f32 (2.0));
+ float32x4_t add = vbslq_f32 (a_le_half, d->pi_over_2f, off);
+
+ return vfmaq_f32 (add, mul, y);
+}
+
+PL_SIG (V, F, 1, acos, -1.0, 1.0)
+PL_TEST_ULP (V_NAME_F1 (acos), 0.82)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (acos), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME_F1 (acos), 0, 0x1p-26, 5000)
+PL_TEST_INTERVAL (V_NAME_F1 (acos), 0x1p-26, 0.5, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (acos), 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (acos), 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (acos), 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (V_NAME_F1 (acos), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_acosh_3u5.c b/contrib/arm-optimized-routines/pl/math/v_acosh_3u5.c
new file mode 100644
index 000000000000..42fa2616d562
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_acosh_3u5.c
@@ -0,0 +1,66 @@
+/*
+ * Single-precision vector acosh(x) function.
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define WANT_V_LOG1P_K0_SHORTCUT 1
+#include "v_log1p_inline.h"
+
+const static struct data
+{
+ struct v_log1p_data log1p_consts;
+ uint64x2_t one, thresh;
+} data = {
+ .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
+ .one = V2 (0x3ff0000000000000),
+ .thresh = V2 (0x1ff0000000000000) /* asuint64(0x1p511) - asuint64(1). */
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special,
+ const struct v_log1p_data *d)
+{
+ return v_call_f64 (acosh, x, log1p_inline (y, d), special);
+}
+
+/* Vector approximation for double-precision acosh, based on log1p.
+ The largest observed error is 3.02 ULP in the region where the
+ argument to log1p falls in the k=0 interval, i.e. x close to 1:
+ _ZGVnN2v_acosh(0x1.00798aaf80739p+0) got 0x1.f2d6d823bc9dfp-5
+ want 0x1.f2d6d823bc9e2p-5. */
+VPCS_ATTR float64x2_t V_NAME_D1 (acosh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint64x2_t special
+ = vcgeq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (x), d->one), d->thresh);
+ float64x2_t special_arg = x;
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u64 (special)))
+ x = vbslq_f64 (special, vreinterpretq_f64_u64 (d->one), x);
+#endif
+
+ float64x2_t xm1 = vsubq_f64 (x, v_f64 (1));
+ float64x2_t y;
+ y = vaddq_f64 (x, v_f64 (1));
+ y = vmulq_f64 (y, xm1);
+ y = vsqrtq_f64 (y);
+ y = vaddq_f64 (xm1, y);
+
+ if (unlikely (v_any_u64 (special)))
+ return special_case (special_arg, y, special, &d->log1p_consts);
+ return log1p_inline (y, &d->log1p_consts);
+}
+
+PL_SIG (V, D, 1, acosh, 1.0, 10.0)
+PL_TEST_ULP (V_NAME_D1 (acosh), 2.53)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (acosh), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME_D1 (acosh), 1, 0x1p511, 90000)
+PL_TEST_INTERVAL (V_NAME_D1 (acosh), 0x1p511, inf, 10000)
+PL_TEST_INTERVAL (V_NAME_D1 (acosh), 0, 1, 1000)
+PL_TEST_INTERVAL (V_NAME_D1 (acosh), -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_acoshf_3u1.c b/contrib/arm-optimized-routines/pl/math/v_acoshf_3u1.c
new file mode 100644
index 000000000000..a2ff0f02635b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_acoshf_3u1.c
@@ -0,0 +1,78 @@
+/*
+ * Single-precision vector acosh(x) function.
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "v_log1pf_inline.h"
+
+const static struct data
+{
+ struct v_log1pf_data log1pf_consts;
+ uint32x4_t one;
+ uint16x4_t thresh;
+} data = {
+ .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+ .one = V4 (0x3f800000),
+ .thresh = V4 (0x2000) /* asuint(0x1p64) - asuint(1). */
+};
+
+#define SignMask 0x80000000
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint16x4_t special,
+ const struct v_log1pf_data d)
+{
+ return v_call_f32 (acoshf, x, log1pf_inline (y, d), vmovl_u16 (special));
+}
+
+/* Vector approximation for single-precision acosh, based on log1p. Maximum
+ error depends on WANT_SIMD_EXCEPT. With SIMD fp exceptions enabled, it
+ is 2.78 ULP:
+ __v_acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3
+ want 0x1.ef9ea2p-3.
+ With exceptions disabled, we can compute u with a shorter dependency chain,
+ which gives maximum error of 3.07 ULP:
+ __v_acoshf(0x1.01f83ep+0) got 0x1.fbc7fap-4
+ want 0x1.fbc7f4p-4. */
+
+VPCS_ATTR float32x4_t V_NAME_F1 (acosh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ uint16x4_t special = vcge_u16 (vsubhn_u32 (ix, d->one), d->thresh);
+
+#if WANT_SIMD_EXCEPT
+ /* Mask special lanes with 1 to side-step spurious invalid or overflow. Use
+ only xm1 to calculate u, as operating on x will trigger invalid for NaN.
+ Widening sign-extend special predicate in order to mask with it. */
+ uint32x4_t p
+ = vreinterpretq_u32_s32 (vmovl_s16 (vreinterpret_s16_u16 (special)));
+ float32x4_t xm1 = v_zerofy_f32 (vsubq_f32 (x, v_f32 (1)), p);
+ float32x4_t u = vfmaq_f32 (vaddq_f32 (xm1, xm1), xm1, xm1);
+#else
+ float32x4_t xm1 = vsubq_f32 (x, v_f32 (1));
+ float32x4_t u = vmulq_f32 (xm1, vaddq_f32 (x, v_f32 (1.0f)));
+#endif
+
+ float32x4_t y = vaddq_f32 (xm1, vsqrtq_f32 (u));
+
+ if (unlikely (v_any_u16h (special)))
+ return special_case (x, y, special, d->log1pf_consts);
+ return log1pf_inline (y, d->log1pf_consts);
+}
+
+PL_SIG (V, F, 1, acosh, 1.0, 10.0)
+#if WANT_SIMD_EXCEPT
+PL_TEST_ULP (V_NAME_F1 (acosh), 2.29)
+#else
+PL_TEST_ULP (V_NAME_F1 (acosh), 2.58)
+#endif
+PL_TEST_EXPECT_FENV (V_NAME_F1 (acosh), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME_F1 (acosh), 0, 1, 500)
+PL_TEST_INTERVAL (V_NAME_F1 (acosh), 1, SquareLim, 100000)
+PL_TEST_INTERVAL (V_NAME_F1 (acosh), SquareLim, inf, 1000)
+PL_TEST_INTERVAL (V_NAME_F1 (acosh), -0, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_asin_3u.c b/contrib/arm-optimized-routines/pl/math/v_asin_3u.c
new file mode 100644
index 000000000000..756443c6b320
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_asin_3u.c
@@ -0,0 +1,113 @@
+/*
+ * Double-precision vector asin(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t poly[12];
+ float64x2_t pi_over_2;
+ uint64x2_t abs_mask;
+} data = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
+ on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */
+ .poly = { V2 (0x1.555555555554ep-3), V2 (0x1.3333333337233p-4),
+ V2 (0x1.6db6db67f6d9fp-5), V2 (0x1.f1c71fbd29fbbp-6),
+ V2 (0x1.6e8b264d467d6p-6), V2 (0x1.1c5997c357e9dp-6),
+ V2 (0x1.c86a22cd9389dp-7), V2 (0x1.856073c22ebbep-7),
+ V2 (0x1.fd1151acb6bedp-8), V2 (0x1.087182f799c1dp-6),
+ V2 (-0x1.6602748120927p-7), V2 (0x1.cfa0dd1f9478p-6), },
+ .pi_over_2 = V2 (0x1.921fb54442d18p+0),
+ .abs_mask = V2 (0x7fffffffffffffff),
+};
+
+#define AllMask v_u64 (0xffffffffffffffff)
+#define One (0x3ff0000000000000)
+#define Small (0x3e50000000000000) /* 2^-12. */
+
+#if WANT_SIMD_EXCEPT
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (asin, x, y, special);
+}
+#endif
+
+/* Double-precision implementation of vector asin(x).
+
+ For |x| < Small, approximate asin(x) by x. Small = 2^-12 for correct
+ rounding. If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the
+ following approximation.
+
+ For |x| in [Small, 0.5], use an order 11 polynomial P such that the final
+ approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+
+ The largest observed error in this region is 1.01 ulps,
+ _ZGVnN2v_asin (0x1.da9735b5a9277p-2) got 0x1.ed78525a927efp-2
+ want 0x1.ed78525a927eep-2.
+
+ For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+ asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z).
+
+ The largest observed error in this region is 2.69 ulps,
+ _ZGVnN2v_asin (0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
+ want 0x1.110d7e85fdd53p-1. */
+float64x2_t VPCS_ATTR V_NAME_D1 (asin) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+
+#if WANT_SIMD_EXCEPT
+ /* Special values need to be computed with scalar fallbacks so
+ that appropriate exceptions are raised. */
+ uint64x2_t special
+ = vcgtq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (ax), v_u64 (Small)),
+ v_u64 (One - Small));
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x, x, AllMask);
+#endif
+
+ uint64x2_t a_lt_half = vcltq_f64 (ax, v_f64 (0.5));
+
+ /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+ z = x ^ 2 and y = |x| , if |x| < 0.5
+ z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */
+ float64x2_t z2 = vbslq_f64 (a_lt_half, vmulq_f64 (x, x),
+ vfmsq_n_f64 (v_f64 (0.5), ax, 0.5));
+ float64x2_t z = vbslq_f64 (a_lt_half, ax, vsqrtq_f64 (z2));
+
+ /* Use a single polynomial approximation P for both intervals. */
+ float64x2_t z4 = vmulq_f64 (z2, z2);
+ float64x2_t z8 = vmulq_f64 (z4, z4);
+ float64x2_t z16 = vmulq_f64 (z8, z8);
+ float64x2_t p = v_estrin_11_f64 (z2, z4, z8, z16, d->poly);
+
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = vfmaq_f64 (z, vmulq_f64 (z, z2), p);
+
+ /* asin(|x|) = Q(|x|) , for |x| < 0.5
+ = pi/2 - 2 Q(|x|), for |x| >= 0.5. */
+ float64x2_t y = vbslq_f64 (a_lt_half, p, vfmsq_n_f64 (d->pi_over_2, p, 2.0));
+
+ /* Copy sign. */
+ return vbslq_f64 (d->abs_mask, y, x);
+}
+
+PL_SIG (V, D, 1, asin, -1.0, 1.0)
+PL_TEST_ULP (V_NAME_D1 (asin), 2.19)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (asin), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME_D1 (asin), 0, Small, 5000)
+PL_TEST_INTERVAL (V_NAME_D1 (asin), Small, 0.5, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (asin), 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (asin), 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (asin), 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (V_NAME_D1 (asin), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_asinf_2u5.c b/contrib/arm-optimized-routines/pl/math/v_asinf_2u5.c
new file mode 100644
index 000000000000..eb978cd956ab
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_asinf_2u5.c
@@ -0,0 +1,104 @@
+/*
+ * Single-precision vector asin(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float32x4_t poly[5];
+ float32x4_t pi_over_2f;
+} data = {
+ /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on
+ [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */
+ .poly = { V4 (0x1.55555ep-3), V4 (0x1.33261ap-4), V4 (0x1.70d7dcp-5),
+ V4 (0x1.b059dp-6), V4 (0x1.3af7d8p-5) },
+ .pi_over_2f = V4 (0x1.921fb6p+0f),
+};
+
+#define AbsMask 0x7fffffff
+#define Half 0x3f000000
+#define One 0x3f800000
+#define Small 0x39800000 /* 2^-12. */
+
+#if WANT_SIMD_EXCEPT
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (asinf, x, y, special);
+}
+#endif
+
+/* Single-precision implementation of vector asin(x).
+
+ For |x| < Small, approximate asin(x) by x. Small = 2^-12 for correct
+ rounding. If WANT_SIMD_EXCEPT = 0, Small = 0 and we proceed with the
+ following approximation.
+
+ For |x| in [Small, 0.5], use order 4 polynomial P such that the final
+ approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
+
+ The largest observed error in this region is 0.83 ulps,
+ _ZGVnN4v_asinf (0x1.ea00f4p-2) got 0x1.fef15ep-2 want 0x1.fef15cp-2.
+
+ For |x| in [0.5, 1.0], use same approximation with a change of variable
+
+ asin(x) = pi/2 - (y + y * z * P(z)), with z = (1-x)/2 and y = sqrt(z).
+
+ The largest observed error in this region is 2.41 ulps,
+ _ZGVnN4v_asinf (0x1.00203ep-1) got 0x1.0c3a64p-1 want 0x1.0c3a6p-1. */
+float32x4_t VPCS_ATTR V_NAME_F1 (asin) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ uint32x4_t ia = vandq_u32 (ix, v_u32 (AbsMask));
+
+#if WANT_SIMD_EXCEPT
+ /* Special values need to be computed with scalar fallbacks so
+ that appropriate fp exceptions are raised. */
+ uint32x4_t special
+ = vcgtq_u32 (vsubq_u32 (ia, v_u32 (Small)), v_u32 (One - Small));
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x, x, v_u32 (0xffffffff));
+#endif
+
+ float32x4_t ax = vreinterpretq_f32_u32 (ia);
+ uint32x4_t a_lt_half = vcltq_u32 (ia, v_u32 (Half));
+
+ /* Evaluate polynomial Q(x) = y + y * z * P(z) with
+ z = x ^ 2 and y = |x| , if |x| < 0.5
+ z = (1 - |x|) / 2 and y = sqrt(z), if |x| >= 0.5. */
+ float32x4_t z2 = vbslq_f32 (a_lt_half, vmulq_f32 (x, x),
+ vfmsq_n_f32 (v_f32 (0.5), ax, 0.5));
+ float32x4_t z = vbslq_f32 (a_lt_half, ax, vsqrtq_f32 (z2));
+
+ /* Use a single polynomial approximation P for both intervals. */
+ float32x4_t p = v_horner_4_f32 (z2, d->poly);
+ /* Finalize polynomial: z + z * z2 * P(z2). */
+ p = vfmaq_f32 (z, vmulq_f32 (z, z2), p);
+
+ /* asin(|x|) = Q(|x|) , for |x| < 0.5
+ = pi/2 - 2 Q(|x|), for |x| >= 0.5. */
+ float32x4_t y
+ = vbslq_f32 (a_lt_half, p, vfmsq_n_f32 (d->pi_over_2f, p, 2.0));
+
+ /* Copy sign. */
+ return vbslq_f32 (v_u32 (AbsMask), y, x);
+}
+
+PL_SIG (V, F, 1, asin, -1.0, 1.0)
+PL_TEST_ULP (V_NAME_F1 (asin), 1.91)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (asin), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME_F1 (asin), 0, 0x1p-12, 5000)
+PL_TEST_INTERVAL (V_NAME_F1 (asin), 0x1p-12, 0.5, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (asin), 0.5, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (asin), 1.0, 0x1p11, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (asin), 0x1p11, inf, 20000)
+PL_TEST_INTERVAL (V_NAME_F1 (asin), -0, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_asinh_3u5.c b/contrib/arm-optimized-routines/pl/math/v_asinh_3u5.c
new file mode 100644
index 000000000000..4862bef94861
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_asinh_3u5.c
@@ -0,0 +1,175 @@
+/*
+ * Double-precision vector asinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define A(i) v_f64 (__v_log_data.poly[i])
+#define N (1 << V_LOG_TABLE_BITS)
+
+const static struct data
+{
+ float64x2_t poly[18];
+ uint64x2_t off, huge_bound, abs_mask;
+ float64x2_t ln2, tiny_bound;
+} data = {
+ .off = V2 (0x3fe6900900000000),
+ .ln2 = V2 (0x1.62e42fefa39efp-1),
+ .huge_bound = V2 (0x5fe0000000000000),
+ .tiny_bound = V2 (0x1p-26),
+ .abs_mask = V2 (0x7fffffffffffffff),
+ /* Even terms of polynomial s.t. asinh(x) is approximated by
+ asinh(x) ~= x + x^3 * (C0 + C1 * x + C2 * x^2 + C3 * x^3 + ...).
+ Generated using Remez, f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2). */
+ .poly = { V2 (-0x1.55555555554a7p-3), V2 (0x1.3333333326c7p-4),
+ V2 (-0x1.6db6db68332e6p-5), V2 (0x1.f1c71b26fb40dp-6),
+ V2 (-0x1.6e8b8b654a621p-6), V2 (0x1.1c4daa9e67871p-6),
+ V2 (-0x1.c9871d10885afp-7), V2 (0x1.7a16e8d9d2ecfp-7),
+ V2 (-0x1.3ddca533e9f54p-7), V2 (0x1.0becef748dafcp-7),
+ V2 (-0x1.b90c7099dd397p-8), V2 (0x1.541f2bb1ffe51p-8),
+ V2 (-0x1.d217026a669ecp-9), V2 (0x1.0b5c7977aaf7p-9),
+ V2 (-0x1.e0f37daef9127p-11), V2 (0x1.388b5fe542a6p-12),
+ V2 (-0x1.021a48685e287p-14), V2 (0x1.93d4ba83d34dap-18) },
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (asinh, x, y, special);
+}
+
+struct entry
+{
+ float64x2_t invc;
+ float64x2_t logc;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+ float64x2_t e0 = vld1q_f64 (
+ &__v_log_data.table[(i[0] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
+ float64x2_t e1 = vld1q_f64 (
+ &__v_log_data.table[(i[1] >> (52 - V_LOG_TABLE_BITS)) & (N - 1)].invc);
+ return (struct entry){ vuzp1q_f64 (e0, e1), vuzp2q_f64 (e0, e1) };
+}
+
+static inline float64x2_t
+log_inline (float64x2_t x, const struct data *d)
+{
+ /* Double-precision vector log, copied from ordinary vector log with some
+ cosmetic modification and special-cases removed. */
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+ uint64x2_t tmp = vsubq_u64 (ix, d->off);
+ int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
+ uint64x2_t iz
+ = vsubq_u64 (ix, vandq_u64 (tmp, vdupq_n_u64 (0xfffULL << 52)));
+ float64x2_t z = vreinterpretq_f64_u64 (iz);
+ struct entry e = lookup (tmp);
+ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+ float64x2_t kd = vcvtq_f64_s64 (k);
+ float64x2_t hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
+ float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t y = vfmaq_f64 (A (2), A (3), r);
+ float64x2_t p = vfmaq_f64 (A (0), A (1), r);
+ y = vfmaq_f64 (y, A (4), r2);
+ y = vfmaq_f64 (p, y, r2);
+ y = vfmaq_f64 (hi, y, r2);
+ return y;
+}
+
+/* Double-precision implementation of vector asinh(x).
+ asinh is very sensitive around 1, so it is impractical to devise a single
+ low-cost algorithm which is sufficiently accurate on a wide range of input.
+ Instead we use two different algorithms:
+ asinh(x) = sign(x) * log(|x| + sqrt(x^2 + 1) if |x| >= 1
+ = sign(x) * (|x| + |x|^3 * P(x^2)) otherwise
+ where log(x) is an optimized log approximation, and P(x) is a polynomial
+ shared with the scalar routine. The greatest observed error 3.29 ULP, in
+ |x| >= 1:
+ __v_asinh(0x1.2cd9d717e2c9bp+0) got 0x1.ffffcfd0e234fp-1
+ want 0x1.ffffcfd0e2352p-1. */
+VPCS_ATTR float64x2_t V_NAME_D1 (asinh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ uint64x2_t iax = vreinterpretq_u64_f64 (ax);
+
+ uint64x2_t gt1 = vcgeq_f64 (ax, v_f64 (1));
+ uint64x2_t special = vcgeq_u64 (iax, d->huge_bound);
+
+#if WANT_SIMD_EXCEPT
+ uint64x2_t tiny = vcltq_f64 (ax, d->tiny_bound);
+ special = vorrq_u64 (special, tiny);
+#endif
+
+ /* Option 1: |x| >= 1.
+ Compute asinh(x) according by asinh(x) = log(x + sqrt(x^2 + 1)).
+ If WANT_SIMD_EXCEPT is enabled, sidestep special values, which will
+ overflow, by setting special lanes to 1. These will be fixed later. */
+ float64x2_t option_1 = v_f64 (0);
+ if (likely (v_any_u64 (gt1)))
+ {
+#if WANT_SIMD_EXCEPT
+ float64x2_t xm = v_zerofy_f64 (ax, special);
+#else
+ float64x2_t xm = ax;
+#endif
+ option_1 = log_inline (
+ vaddq_f64 (xm, vsqrtq_f64 (vfmaq_f64 (v_f64 (1), xm, xm))), d);
+ }
+
+ /* Option 2: |x| < 1.
+ Compute asinh(x) using a polynomial.
+ If WANT_SIMD_EXCEPT is enabled, sidestep special lanes, which will
+ overflow, and tiny lanes, which will underflow, by setting them to 0. They
+ will be fixed later, either by selecting x or falling back to the scalar
+ special-case. The largest observed error in this region is 1.47 ULPs:
+ __v_asinh(0x1.fdfcd00cc1e6ap-1) got 0x1.c1d6bf874019bp-1
+ want 0x1.c1d6bf874019cp-1. */
+ float64x2_t option_2 = v_f64 (0);
+ if (likely (v_any_u64 (vceqzq_u64 (gt1))))
+ {
+#if WANT_SIMD_EXCEPT
+ ax = v_zerofy_f64 (ax, vorrq_u64 (tiny, gt1));
+#endif
+ float64x2_t x2 = vmulq_f64 (ax, ax), x3 = vmulq_f64 (ax, x2),
+ z2 = vmulq_f64 (x2, x2), z4 = vmulq_f64 (z2, z2),
+ z8 = vmulq_f64 (z4, z4), z16 = vmulq_f64 (z8, z8);
+ float64x2_t p = v_estrin_17_f64 (x2, z2, z4, z8, z16, d->poly);
+ option_2 = vfmaq_f64 (ax, p, x3);
+#if WANT_SIMD_EXCEPT
+ option_2 = vbslq_f64 (tiny, x, option_2);
+#endif
+ }
+
+ /* Choose the right option for each lane. */
+ float64x2_t y = vbslq_f64 (gt1, option_1, option_2);
+ /* Copy sign. */
+ y = vbslq_f64 (d->abs_mask, y, x);
+
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x, y, special);
+ return y;
+}
+
+PL_SIG (V, D, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_D1 (asinh), 2.80)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (asinh), WANT_SIMD_EXCEPT)
+/* Test vector asinh 3 times, with control lane < 1, > 1 and special.
+ Ensures the v_sel is choosing the right option in all cases. */
+#define V_ASINH_INTERVAL(lo, hi, n) \
+ PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (asinh), lo, hi, n, 0.5) \
+ PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (asinh), lo, hi, n, 2) \
+ PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (asinh), lo, hi, n, 0x1p600)
+V_ASINH_INTERVAL (0, 0x1p-26, 50000)
+V_ASINH_INTERVAL (0x1p-26, 1, 50000)
+V_ASINH_INTERVAL (1, 0x1p511, 50000)
+V_ASINH_INTERVAL (0x1p511, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_asinhf_2u7.c b/contrib/arm-optimized-routines/pl/math/v_asinhf_2u7.c
new file mode 100644
index 000000000000..1723ba90d2f3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_asinhf_2u7.c
@@ -0,0 +1,80 @@
+/*
+ * Single-precision vector asinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "v_log1pf_inline.h"
+
+#define SignMask v_u32 (0x80000000)
+
+const static struct data
+{
+ struct v_log1pf_data log1pf_consts;
+ uint32x4_t big_bound;
+#if WANT_SIMD_EXCEPT
+ uint32x4_t tiny_bound;
+#endif
+} data = {
+ .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+ .big_bound = V4 (0x5f800000), /* asuint(0x1p64). */
+#if WANT_SIMD_EXCEPT
+ .tiny_bound = V4 (0x30800000) /* asuint(0x1p-30). */
+#endif
+};
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (asinhf, x, y, special);
+}
+
+/* Single-precision implementation of vector asinh(x), using vector log1p.
+ Worst-case error is 2.66 ULP, at roughly +/-0.25:
+ __v_asinhf(0x1.01b04p-2) got 0x1.fe163ep-3 want 0x1.fe1638p-3. */
+VPCS_ATTR float32x4_t V_NAME_F1 (asinh) (float32x4_t x)
+{
+ const struct data *dat = ptr_barrier (&data);
+ uint32x4_t iax = vbicq_u32 (vreinterpretq_u32_f32 (x), SignMask);
+ float32x4_t ax = vreinterpretq_f32_u32 (iax);
+ uint32x4_t special = vcgeq_u32 (iax, dat->big_bound);
+ float32x4_t special_arg = x;
+
+#if WANT_SIMD_EXCEPT
+ /* Sidestep tiny and large values to avoid inadvertently triggering
+ under/overflow. */
+ special = vorrq_u32 (special, vcltq_u32 (iax, dat->tiny_bound));
+ if (unlikely (v_any_u32 (special)))
+ {
+ ax = v_zerofy_f32 (ax, special);
+ x = v_zerofy_f32 (x, special);
+ }
+#endif
+
+ /* asinh(x) = log(x + sqrt(x * x + 1)).
+ For positive x, asinh(x) = log1p(x + x * x / (1 + sqrt(x * x + 1))). */
+ float32x4_t d
+ = vaddq_f32 (v_f32 (1), vsqrtq_f32 (vfmaq_f32 (v_f32 (1), x, x)));
+ float32x4_t y = log1pf_inline (
+ vaddq_f32 (ax, vdivq_f32 (vmulq_f32 (ax, ax), d)), dat->log1pf_consts);
+
+ if (unlikely (v_any_u32 (special)))
+ return special_case (special_arg, vbslq_f32 (SignMask, x, y), special);
+ return vbslq_f32 (SignMask, x, y);
+}
+
+PL_SIG (V, F, 1, asinh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_F1 (asinh), 2.17)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (asinh), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME_F1 (asinh), 0, 0x1p-12, 40000)
+PL_TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p-12, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME_F1 (asinh), 1.0, 0x1p11, 40000)
+PL_TEST_INTERVAL (V_NAME_F1 (asinh), 0x1p11, inf, 40000)
+PL_TEST_INTERVAL (V_NAME_F1 (asinh), -0, -0x1p-12, 20000)
+PL_TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p-12, -1.0, 20000)
+PL_TEST_INTERVAL (V_NAME_F1 (asinh), -1.0, -0x1p11, 20000)
+PL_TEST_INTERVAL (V_NAME_F1 (asinh), -0x1p11, -inf, 20000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atan2_3u.c b/contrib/arm-optimized-routines/pl/math/v_atan2_3u.c
new file mode 100644
index 000000000000..f24667682dec
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_atan2_3u.c
@@ -0,0 +1,121 @@
+/*
+ * Double-precision vector atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f64.h"
+
+static const struct data
+{
+ float64x2_t pi_over_2;
+ float64x2_t poly[20];
+} data = {
+ /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ the interval [2**-1022, 1.0]. */
+ .poly = { V2 (-0x1.5555555555555p-2), V2 (0x1.99999999996c1p-3),
+ V2 (-0x1.2492492478f88p-3), V2 (0x1.c71c71bc3951cp-4),
+ V2 (-0x1.745d160a7e368p-4), V2 (0x1.3b139b6a88ba1p-4),
+ V2 (-0x1.11100ee084227p-4), V2 (0x1.e1d0f9696f63bp-5),
+ V2 (-0x1.aebfe7b418581p-5), V2 (0x1.842dbe9b0d916p-5),
+ V2 (-0x1.5d30140ae5e99p-5), V2 (0x1.338e31eb2fbbcp-5),
+ V2 (-0x1.00e6eece7de8p-5), V2 (0x1.860897b29e5efp-6),
+ V2 (-0x1.0051381722a59p-6), V2 (0x1.14e9dc19a4a4ep-7),
+ V2 (-0x1.d0062b42fe3bfp-9), V2 (0x1.17739e210171ap-10),
+ V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), },
+ .pi_over_2 = V2 (0x1.921fb54442d18p+0),
+};
+
+#define SignMask v_u64 (0x8000000000000000)
+
+/* Special cases i.e. 0, infinity, NaN (fall back to scalar calls). */
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t y, float64x2_t x, float64x2_t ret, uint64x2_t cmp)
+{
+ return v_call2_f64 (atan2, y, x, ret, cmp);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan. */
+static inline uint64x2_t
+zeroinfnan (uint64x2_t i)
+{
+ /* (2 * i - 1) >= (2 * asuint64 (INFINITY) - 1). */
+ return vcgeq_u64 (vsubq_u64 (vaddq_u64 (i, i), v_u64 (1)),
+ v_u64 (2 * asuint64 (INFINITY) - 1));
+}
+
+/* Fast implementation of vector atan2.
+ Maximum observed error is 2.8 ulps:
+ _ZGVnN2vv_atan2 (0x1.9651a429a859ap+5, 0x1.953075f4ee26p+5)
+ got 0x1.92d628ab678ccp-1
+ want 0x1.92d628ab678cfp-1. */
+float64x2_t VPCS_ATTR V_NAME_D2 (atan2) (float64x2_t y, float64x2_t x)
+{
+ const struct data *data_ptr = ptr_barrier (&data);
+
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+ uint64x2_t iy = vreinterpretq_u64_f64 (y);
+
+ uint64x2_t special_cases = vorrq_u64 (zeroinfnan (ix), zeroinfnan (iy));
+
+ uint64x2_t sign_x = vandq_u64 (ix, SignMask);
+ uint64x2_t sign_y = vandq_u64 (iy, SignMask);
+ uint64x2_t sign_xy = veorq_u64 (sign_x, sign_y);
+
+ float64x2_t ax = vabsq_f64 (x);
+ float64x2_t ay = vabsq_f64 (y);
+
+ uint64x2_t pred_xlt0 = vcltzq_f64 (x);
+ uint64x2_t pred_aygtax = vcgtq_f64 (ay, ax);
+
+ /* Set up z for call to atan. */
+ float64x2_t n = vbslq_f64 (pred_aygtax, vnegq_f64 (ax), ay);
+ float64x2_t d = vbslq_f64 (pred_aygtax, ay, ax);
+ float64x2_t z = vdivq_f64 (n, d);
+
+ /* Work out the correct shift. */
+ float64x2_t shift = vreinterpretq_f64_u64 (
+ vandq_u64 (pred_xlt0, vreinterpretq_u64_f64 (v_f64 (-2.0))));
+ shift = vbslq_f64 (pred_aygtax, vaddq_f64 (shift, v_f64 (1.0)), shift);
+ shift = vmulq_f64 (shift, data_ptr->pi_over_2);
+
+ /* Calculate the polynomial approximation.
+ Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
+ full scheme to avoid underflow in x^16.
+ The order 19 polynomial P approximates
+ (atan(sqrt(x))-sqrt(x))/x^(3/2). */
+ float64x2_t z2 = vmulq_f64 (z, z);
+ float64x2_t x2 = vmulq_f64 (z2, z2);
+ float64x2_t x4 = vmulq_f64 (x2, x2);
+ float64x2_t x8 = vmulq_f64 (x4, x4);
+ float64x2_t ret
+ = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, data_ptr->poly),
+ v_estrin_11_f64 (z2, x2, x4, x8, data_ptr->poly + 8), x8);
+
+ /* Finalize. y = shift + z + z^3 * P(z^2). */
+ ret = vfmaq_f64 (z, ret, vmulq_f64 (z2, z));
+ ret = vaddq_f64 (ret, shift);
+
+ /* Account for the sign of x and y. */
+ ret = vreinterpretq_f64_u64 (
+ veorq_u64 (vreinterpretq_u64_f64 (ret), sign_xy));
+
+ if (unlikely (v_any_u64 (special_cases)))
+ return special_case (y, x, ret, special_cases);
+
+ return ret;
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (V, D, 2, atan2)
+// TODO tighten this once __v_atan2 is fixed
+PL_TEST_ULP (V_NAME_D2 (atan2), 2.9)
+PL_TEST_INTERVAL (V_NAME_D2 (atan2), -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (V_NAME_D2 (atan2), -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME_D2 (atan2), 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME_D2 (atan2), 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (V_NAME_D2 (atan2), 1e6, 1e32, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atan2f_3u.c b/contrib/arm-optimized-routines/pl/math/v_atan2f_3u.c
new file mode 100644
index 000000000000..bbfc3cb552f6
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_atan2f_3u.c
@@ -0,0 +1,115 @@
+/*
+ * Single-precision vector atan2(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f32.h"
+
+static const struct data
+{
+ float32x4_t poly[8];
+ float32x4_t pi_over_2;
+} data = {
+ /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ [2**-128, 1.0].
+ Generated using fpminimax between FLT_MIN and 1. */
+ .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f),
+ V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f),
+ V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) },
+ .pi_over_2 = V4 (0x1.921fb6p+0f),
+};
+
+#define SignMask v_u32 (0x80000000)
+
+/* Special cases i.e. 0, infinity and nan (fall back to scalar calls). */
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t y, float32x4_t x, float32x4_t ret, uint32x4_t cmp)
+{
+ return v_call2_f32 (atan2f, y, x, ret, cmp);
+}
+
+/* Returns 1 if input is the bit representation of 0, infinity or nan. */
+static inline uint32x4_t
+zeroinfnan (uint32x4_t i)
+{
+ /* 2 * i - 1 >= 2 * 0x7f800000lu - 1. */
+ return vcgeq_u32 (vsubq_u32 (vmulq_n_u32 (i, 2), v_u32 (1)),
+ v_u32 (2 * 0x7f800000lu - 1));
+}
+
+/* Fast implementation of vector atan2f. Maximum observed error is
+ 2.95 ULP in [0x1.9300d6p+6 0x1.93c0c6p+6] x [0x1.8c2dbp+6 0x1.8cea6p+6]:
+ _ZGVnN4vv_atan2f (0x1.93836cp+6, 0x1.8cae1p+6) got 0x1.967f06p-1
+ want 0x1.967f00p-1. */
+float32x4_t VPCS_ATTR V_NAME_F2 (atan2) (float32x4_t y, float32x4_t x)
+{
+ const struct data *data_ptr = ptr_barrier (&data);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ uint32x4_t iy = vreinterpretq_u32_f32 (y);
+
+ uint32x4_t special_cases = vorrq_u32 (zeroinfnan (ix), zeroinfnan (iy));
+
+ uint32x4_t sign_x = vandq_u32 (ix, SignMask);
+ uint32x4_t sign_y = vandq_u32 (iy, SignMask);
+ uint32x4_t sign_xy = veorq_u32 (sign_x, sign_y);
+
+ float32x4_t ax = vabsq_f32 (x);
+ float32x4_t ay = vabsq_f32 (y);
+
+ uint32x4_t pred_xlt0 = vcltzq_f32 (x);
+ uint32x4_t pred_aygtax = vcgtq_f32 (ay, ax);
+
+ /* Set up z for call to atanf. */
+ float32x4_t n = vbslq_f32 (pred_aygtax, vnegq_f32 (ax), ay);
+ float32x4_t d = vbslq_f32 (pred_aygtax, ay, ax);
+ float32x4_t z = vdivq_f32 (n, d);
+
+ /* Work out the correct shift. */
+ float32x4_t shift = vreinterpretq_f32_u32 (
+ vandq_u32 (pred_xlt0, vreinterpretq_u32_f32 (v_f32 (-2.0f))));
+ shift = vbslq_f32 (pred_aygtax, vaddq_f32 (shift, v_f32 (1.0f)), shift);
+ shift = vmulq_f32 (shift, data_ptr->pi_over_2);
+
+ /* Calculate the polynomial approximation.
+ Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
+ a standard implementation using z8 creates spurious underflow
+ in the very last fma (when z^8 is small enough).
+ Therefore, we split the last fma into a mul and an fma.
+ Horner and single-level Estrin have higher errors that exceed
+ threshold. */
+ float32x4_t z2 = vmulq_f32 (z, z);
+ float32x4_t z4 = vmulq_f32 (z2, z2);
+
+ float32x4_t ret = vfmaq_f32 (
+ v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly), z4,
+ vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, data_ptr->poly + 4)));
+
+ /* y = shift + z * P(z^2). */
+ ret = vaddq_f32 (vfmaq_f32 (z, ret, vmulq_f32 (z2, z)), shift);
+
+ /* Account for the sign of y. */
+ ret = vreinterpretq_f32_u32 (
+ veorq_u32 (vreinterpretq_u32_f32 (ret), sign_xy));
+
+ if (unlikely (v_any_u32 (special_cases)))
+ {
+ return special_case (y, x, ret, special_cases);
+ }
+
+ return ret;
+}
+
+/* Arity of 2 means no mathbench entry emitted. See test/mathbench_funcs.h. */
+PL_SIG (V, F, 2, atan2)
+PL_TEST_ULP (V_NAME_F2 (atan2), 2.46)
+PL_TEST_INTERVAL (V_NAME_F2 (atan2), -10.0, 10.0, 50000)
+PL_TEST_INTERVAL (V_NAME_F2 (atan2), -1.0, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME_F2 (atan2), 0.0, 1.0, 40000)
+PL_TEST_INTERVAL (V_NAME_F2 (atan2), 1.0, 100.0, 40000)
+PL_TEST_INTERVAL (V_NAME_F2 (atan2), 1e6, 1e32, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atan_2u5.c b/contrib/arm-optimized-routines/pl/math/v_atan_2u5.c
new file mode 100644
index 000000000000..ba68cc3cc720
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_atan_2u5.c
@@ -0,0 +1,104 @@
+/*
+ * Double-precision vector atan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f64.h"
+
+static const struct data
+{
+ float64x2_t pi_over_2;
+ float64x2_t poly[20];
+} data = {
+ /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ [2**-1022, 1.0]. */
+ .poly = { V2 (-0x1.5555555555555p-2), V2 (0x1.99999999996c1p-3),
+ V2 (-0x1.2492492478f88p-3), V2 (0x1.c71c71bc3951cp-4),
+ V2 (-0x1.745d160a7e368p-4), V2 (0x1.3b139b6a88ba1p-4),
+ V2 (-0x1.11100ee084227p-4), V2 (0x1.e1d0f9696f63bp-5),
+ V2 (-0x1.aebfe7b418581p-5), V2 (0x1.842dbe9b0d916p-5),
+ V2 (-0x1.5d30140ae5e99p-5), V2 (0x1.338e31eb2fbbcp-5),
+ V2 (-0x1.00e6eece7de8p-5), V2 (0x1.860897b29e5efp-6),
+ V2 (-0x1.0051381722a59p-6), V2 (0x1.14e9dc19a4a4ep-7),
+ V2 (-0x1.d0062b42fe3bfp-9), V2 (0x1.17739e210171ap-10),
+ V2 (-0x1.ab24da7be7402p-13), V2 (0x1.358851160a528p-16), },
+ .pi_over_2 = V2 (0x1.921fb54442d18p+0),
+};
+
+#define SignMask v_u64 (0x8000000000000000)
+#define TinyBound 0x3e10000000000000 /* asuint64(0x1p-30). */
+#define BigBound 0x4340000000000000 /* asuint64(0x1p53). */
+
+/* Fast implementation of vector atan.
+ Based on atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1] using
+ z=1/x and shift = pi/2. Maximum observed error is 2.27 ulps:
+ _ZGVnN2v_atan (0x1.0005af27c23e9p+0) got 0x1.9225645bdd7c1p-1
+ want 0x1.9225645bdd7c3p-1. */
+float64x2_t VPCS_ATTR V_NAME_D1 (atan) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Small cases, infs and nans are supported by our approximation technique,
+ but do not set fenv flags correctly. Only trigger special case if we need
+ fenv. */
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+ uint64x2_t sign = vandq_u64 (ix, SignMask);
+
+#if WANT_SIMD_EXCEPT
+ uint64x2_t ia12 = vandq_u64 (ix, v_u64 (0x7ff0000000000000));
+ uint64x2_t special = vcgtq_u64 (vsubq_u64 (ia12, v_u64 (TinyBound)),
+ v_u64 (BigBound - TinyBound));
+ /* If any lane is special, fall back to the scalar routine for all lanes. */
+ if (unlikely (v_any_u64 (special)))
+ return v_call_f64 (atan, x, v_f64 (0), v_u64 (-1));
+#endif
+
+ /* Argument reduction:
+ y := arctan(x) for x < 1
+ y := pi/2 + arctan(-1/x) for x > 1
+ Hence, use z=-1/a if x>=1, otherwise z=a. */
+ uint64x2_t red = vcagtq_f64 (x, v_f64 (1.0));
+ /* Avoid dependency in abs(x) in division (and comparison). */
+ float64x2_t z = vbslq_f64 (red, vdivq_f64 (v_f64 (1.0), x), x);
+ float64x2_t shift = vreinterpretq_f64_u64 (
+ vandq_u64 (red, vreinterpretq_u64_f64 (d->pi_over_2)));
+ /* Use absolute value only when needed (odd powers of z). */
+ float64x2_t az = vbslq_f64 (
+ SignMask, vreinterpretq_f64_u64 (vandq_u64 (SignMask, red)), z);
+
+ /* Calculate the polynomial approximation.
+ Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
+ full scheme to avoid underflow in x^16.
+ The order 19 polynomial P approximates
+ (atan(sqrt(x))-sqrt(x))/x^(3/2). */
+ float64x2_t z2 = vmulq_f64 (z, z);
+ float64x2_t x2 = vmulq_f64 (z2, z2);
+ float64x2_t x4 = vmulq_f64 (x2, x2);
+ float64x2_t x8 = vmulq_f64 (x4, x4);
+ float64x2_t y
+ = vfmaq_f64 (v_estrin_7_f64 (z2, x2, x4, d->poly),
+ v_estrin_11_f64 (z2, x2, x4, x8, d->poly + 8), x8);
+
+ /* Finalize. y = shift + z + z^3 * P(z^2). */
+ y = vfmaq_f64 (az, y, vmulq_f64 (z2, az));
+ y = vaddq_f64 (y, shift);
+
+ /* y = atan(x) if x>0, -atan(-x) otherwise. */
+ y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), sign));
+ return y;
+}
+
+PL_SIG (V, D, 1, atan, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_D1 (atan), 1.78)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (atan), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL (V_NAME_D1 (atan), 0, 0x1p-30, 10000)
+PL_TEST_INTERVAL (V_NAME_D1 (atan), -0, -0x1p-30, 1000)
+PL_TEST_INTERVAL (V_NAME_D1 (atan), 0x1p-30, 0x1p53, 900000)
+PL_TEST_INTERVAL (V_NAME_D1 (atan), -0x1p-30, -0x1p53, 90000)
+PL_TEST_INTERVAL (V_NAME_D1 (atan), 0x1p53, inf, 10000)
+PL_TEST_INTERVAL (V_NAME_D1 (atan), -0x1p53, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atanf_3u.c b/contrib/arm-optimized-routines/pl/math/v_atanf_3u.c
new file mode 100644
index 000000000000..f522d957c1cc
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_atanf_3u.c
@@ -0,0 +1,107 @@
+/*
+ * Single-precision vector atan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f32.h"
+
+static const struct data
+{
+ float32x4_t poly[8];
+ float32x4_t pi_over_2;
+} data = {
+ /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on
+ [2**-128, 1.0].
+ Generated using fpminimax between FLT_MIN and 1. */
+ .poly = { V4 (-0x1.55555p-2f), V4 (0x1.99935ep-3f), V4 (-0x1.24051ep-3f),
+ V4 (0x1.bd7368p-4f), V4 (-0x1.491f0ep-4f), V4 (0x1.93a2c0p-5f),
+ V4 (-0x1.4c3c60p-6f), V4 (0x1.01fd88p-8f) },
+ .pi_over_2 = V4 (0x1.921fb6p+0f),
+};
+
+#define SignMask v_u32 (0x80000000)
+
+#define P(i) d->poly[i]
+
+#define TinyBound 0x30800000 /* asuint(0x1p-30). */
+#define BigBound 0x4e800000 /* asuint(0x1p30). */
+
+#if WANT_SIMD_EXCEPT
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (atanf, x, y, special);
+}
+#endif
+
+/* Fast implementation of vector atanf based on
+ atan(x) ~ shift + z + z^3 * P(z^2) with reduction to [0,1]
+ using z=-1/x and shift = pi/2. Maximum observed error is 2.9ulps:
+ _ZGVnN4v_atanf (0x1.0468f6p+0) got 0x1.967f06p-1 want 0x1.967fp-1. */
+float32x4_t VPCS_ATTR V_NAME_F1 (atan) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Small cases, infs and nans are supported by our approximation technique,
+ but do not set fenv flags correctly. Only trigger special case if we need
+ fenv. */
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ uint32x4_t sign = vandq_u32 (ix, SignMask);
+
+#if WANT_SIMD_EXCEPT
+ uint32x4_t ia = vandq_u32 (ix, v_u32 (0x7ff00000));
+ uint32x4_t special = vcgtq_u32 (vsubq_u32 (ia, v_u32 (TinyBound)),
+ v_u32 (BigBound - TinyBound));
+ /* If any lane is special, fall back to the scalar routine for all lanes. */
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x, x, v_u32 (-1));
+#endif
+
+ /* Argument reduction:
+ y := arctan(x) for x < 1
+ y := pi/2 + arctan(-1/x) for x > 1
+ Hence, use z=-1/a if x>=1, otherwise z=a. */
+ uint32x4_t red = vcagtq_f32 (x, v_f32 (1.0));
+ /* Avoid dependency in abs(x) in division (and comparison). */
+ float32x4_t z = vbslq_f32 (red, vdivq_f32 (v_f32 (1.0f), x), x);
+ float32x4_t shift = vreinterpretq_f32_u32 (
+ vandq_u32 (red, vreinterpretq_u32_f32 (d->pi_over_2)));
+ /* Use absolute value only when needed (odd powers of z). */
+ float32x4_t az = vbslq_f32 (
+ SignMask, vreinterpretq_f32_u32 (vandq_u32 (SignMask, red)), z);
+
+ /* Calculate the polynomial approximation.
+ Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
+ a standard implementation using z8 creates spurious underflow
+ in the very last fma (when z^8 is small enough).
+ Therefore, we split the last fma into a mul and an fma.
+ Horner and single-level Estrin have higher errors that exceed
+ threshold. */
+ float32x4_t z2 = vmulq_f32 (z, z);
+ float32x4_t z4 = vmulq_f32 (z2, z2);
+
+ float32x4_t y = vfmaq_f32 (
+ v_pairwise_poly_3_f32 (z2, z4, d->poly), z4,
+ vmulq_f32 (z4, v_pairwise_poly_3_f32 (z2, z4, d->poly + 4)));
+
+ /* y = shift + z * P(z^2). */
+ y = vaddq_f32 (vfmaq_f32 (az, y, vmulq_f32 (z2, az)), shift);
+
+ /* y = atan(x) if x>0, -atan(-x) otherwise. */
+ y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), sign));
+
+ return y;
+}
+
+PL_SIG (V, F, 1, atan, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_F1 (atan), 2.5)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (atan), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0, 0x1p-30, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p-30, 1, 40000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 1, 0x1p30, 40000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (atan), 0x1p30, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atanh_3u5.c b/contrib/arm-optimized-routines/pl/math/v_atanh_3u5.c
new file mode 100644
index 000000000000..f282826a3f32
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_atanh_3u5.c
@@ -0,0 +1,66 @@
+/*
+ * Double-precision vector atanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define WANT_V_LOG1P_K0_SHORTCUT 0
+#include "v_log1p_inline.h"
+
+const static struct data
+{
+ struct v_log1p_data log1p_consts;
+ uint64x2_t one, half;
+} data = { .log1p_consts = V_LOG1P_CONSTANTS_TABLE,
+ .one = V2 (0x3ff0000000000000),
+ .half = V2 (0x3fe0000000000000) };
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (atanh, x, y, special);
+}
+
+/* Approximation for vector double-precision atanh(x) using modified log1p.
+ The greatest observed error is 3.31 ULP:
+ _ZGVnN2v_atanh(0x1.ffae6288b601p-6) got 0x1.ffd8ff31b5019p-6
+ want 0x1.ffd8ff31b501cp-6. */
+VPCS_ATTR
+float64x2_t V_NAME_D1 (atanh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ uint64x2_t ia = vreinterpretq_u64_f64 (ax);
+ uint64x2_t sign = veorq_u64 (vreinterpretq_u64_f64 (x), ia);
+ uint64x2_t special = vcgeq_u64 (ia, d->one);
+ float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->half));
+
+#if WANT_SIMD_EXCEPT
+ ax = v_zerofy_f64 (ax, special);
+#endif
+
+ float64x2_t y;
+ y = vaddq_f64 (ax, ax);
+ y = vdivq_f64 (y, vsubq_f64 (v_f64 (1), ax));
+ y = log1p_inline (y, &d->log1p_consts);
+
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x, vmulq_f64 (y, halfsign), special);
+ return vmulq_f64 (y, halfsign);
+}
+
+PL_SIG (V, D, 1, atanh, -1.0, 1.0)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (atanh), WANT_SIMD_EXCEPT)
+PL_TEST_ULP (V_NAME_D1 (atanh), 3.32)
+/* atanh is asymptotic at 1, which is the default control value - have to set
+ -c 0 specially to ensure fp exceptions are triggered correctly (choice of
+ control lane is irrelevant if fp exceptions are disabled). */
+PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (atanh), 0, 0x1p-23, 10000, 0)
+PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (atanh), 0x1p-23, 1, 90000, 0)
+PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (atanh), 1, inf, 100, 0)
diff --git a/contrib/arm-optimized-routines/pl/math/v_atanhf_3u1.c b/contrib/arm-optimized-routines/pl/math/v_atanhf_3u1.c
new file mode 100644
index 000000000000..f6a5f25eca9a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_atanhf_3u1.c
@@ -0,0 +1,77 @@
+/*
+ * Single-precision vector atanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "v_log1pf_inline.h"
+
+const static struct data
+{
+ struct v_log1pf_data log1pf_consts;
+ uint32x4_t one;
+#if WANT_SIMD_EXCEPT
+ uint32x4_t tiny_bound;
+#endif
+} data = {
+ .log1pf_consts = V_LOG1PF_CONSTANTS_TABLE,
+ .one = V4 (0x3f800000),
+#if WANT_SIMD_EXCEPT
+ /* 0x1p-12, below which atanhf(x) rounds to x. */
+ .tiny_bound = V4 (0x39800000),
+#endif
+};
+
+#define AbsMask v_u32 (0x7fffffff)
+#define Half v_u32 (0x3f000000)
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (atanhf, x, y, special);
+}
+
+/* Approximation for vector single-precision atanh(x) using modified log1p.
+ The maximum error is 3.08 ULP:
+ __v_atanhf(0x1.ff215p-5) got 0x1.ffcb7cp-5
+ want 0x1.ffcb82p-5. */
+VPCS_ATTR float32x4_t V_NAME_F1 (atanh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float32x4_t halfsign = vbslq_f32 (AbsMask, v_f32 (0.5), x);
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+
+#if WANT_SIMD_EXCEPT
+ uint32x4_t special
+ = vorrq_u32 (vcgeq_u32 (iax, d->one), vcltq_u32 (iax, d->tiny_bound));
+ /* Side-step special cases by setting those lanes to 0, which will trigger no
+ exceptions. These will be fixed up later. */
+ if (unlikely (v_any_u32 (special)))
+ ax = v_zerofy_f32 (ax, special);
+#else
+ uint32x4_t special = vcgeq_u32 (iax, d->one);
+#endif
+
+ float32x4_t y = vdivq_f32 (vaddq_f32 (ax, ax), vsubq_f32 (v_f32 (1), ax));
+ y = log1pf_inline (y, d->log1pf_consts);
+
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x, vmulq_f32 (halfsign, y), special);
+ return vmulq_f32 (halfsign, y);
+}
+
+PL_SIG (V, F, 1, atanh, -1.0, 1.0)
+PL_TEST_ULP (V_NAME_F1 (atanh), 2.59)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (atanh), WANT_SIMD_EXCEPT)
+/* atanh is asymptotic at 1, which is the default control value - have to set
+ -c 0 specially to ensure fp exceptions are triggered correctly (choice of
+ control lane is irrelevant if fp exceptions are disabled). */
+PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (atanh), 0, 0x1p-12, 500, 0)
+PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (atanh), 0x1p-12, 1, 200000, 0)
+PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (atanh), 1, inf, 1000, 0)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cbrt_2u.c b/contrib/arm-optimized-routines/pl/math/v_cbrt_2u.c
new file mode 100644
index 000000000000..cc7cff15dc0f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_cbrt_2u.c
@@ -0,0 +1,116 @@
+/*
+ * Double-precision vector cbrt(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f64.h"
+
+const static struct data
+{
+ float64x2_t poly[4], one_third, shift;
+ int64x2_t exp_bias;
+ uint64x2_t abs_mask, tiny_bound;
+ uint32x4_t thresh;
+ double table[5];
+} data = {
+ .shift = V2 (0x1.8p52),
+ .poly = { /* Generated with fpminimax in [0.5, 1]. */
+ V2 (0x1.c14e8ee44767p-2), V2 (0x1.dd2d3f99e4c0ep-1),
+ V2 (-0x1.08e83026b7e74p-1), V2 (0x1.2c74eaa3ba428p-3) },
+ .exp_bias = V2 (1022),
+ .abs_mask = V2(0x7fffffffffffffff),
+ .tiny_bound = V2(0x0010000000000000), /* Smallest normal. */
+ .thresh = V4(0x7fe00000), /* asuint64 (infinity) - tiny_bound. */
+ .one_third = V2(0x1.5555555555555p-2),
+ .table = { /* table[i] = 2^((i - 2) / 3). */
+ 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0,
+ 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0 }
+};
+
+#define MantissaMask v_u64 (0x000fffffffffffff)
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint32x2_t special)
+{
+ return v_call_f64 (cbrt, x, y, vmovl_u32 (special));
+}
+
+/* Approximation for double-precision vector cbrt(x), using low-order polynomial
+ and two Newton iterations. Greatest observed error is 1.79 ULP. Errors repeat
+ according to the exponent, for instance an error observed for double value
+ m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
+ integer.
+ __v_cbrt(0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
+ want 0x1.965fe72821e99p+0. */
+VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
+
+ /* Subnormal, +/-0 and special values. */
+ uint32x2_t special
+ = vcge_u32 (vsubhn_u64 (iax, d->tiny_bound), vget_low_u32 (d->thresh));
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexp, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ float64x2_t m = vbslq_f64 (MantissaMask, x, v_f64 (0.5));
+ int64x2_t exp_bias = d->exp_bias;
+ uint64x2_t ia12 = vshrq_n_u64 (iax, 52);
+ int64x2_t e = vsubq_s64 (vreinterpretq_s64_u64 (ia12), exp_bias);
+
+ /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point for
+ Newton iterations. */
+ float64x2_t p = v_pairwise_poly_3_f64 (m, vmulq_f64 (m, m), d->poly);
+ float64x2_t one_third = d->one_third;
+ /* Two iterations of Newton's method for iteratively approximating cbrt. */
+ float64x2_t m_by_3 = vmulq_f64 (m, one_third);
+ float64x2_t two_thirds = vaddq_f64 (one_third, one_third);
+ float64x2_t a
+ = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (p, p)), two_thirds, p);
+ a = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (a, a)), two_thirds, a);
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which is
+ an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+
+ float64x2_t ef = vcvtq_f64_s64 (e);
+ float64x2_t eb3f = vrndnq_f64 (vmulq_f64 (ef, one_third));
+ int64x2_t em3 = vcvtq_s64_f64 (vfmsq_f64 (ef, eb3f, v_f64 (3)));
+ int64x2_t ey = vcvtq_s64_f64 (eb3f);
+
+ float64x2_t my = (float64x2_t){ d->table[em3[0] + 2], d->table[em3[1] + 2] };
+ my = vmulq_f64 (my, a);
+
+ /* Vector version of ldexp. */
+ float64x2_t y = vreinterpretq_f64_s64 (
+ vshlq_n_s64 (vaddq_s64 (ey, vaddq_s64 (exp_bias, v_s64 (1))), 52));
+ y = vmulq_f64 (y, my);
+
+ if (unlikely (v_any_u32h (special)))
+ return special_case (x, vbslq_f64 (d->abs_mask, y, x), special);
+
+ /* Copy sign. */
+ return vbslq_f64 (d->abs_mask, y, x);
+}
+
+PL_TEST_ULP (V_NAME_D1 (cbrt), 1.30)
+PL_SIG (V, D, 1, cbrt, -10.0, 10.0)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (cbrt))
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (cbrt), 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cbrtf_1u7.c b/contrib/arm-optimized-routines/pl/math/v_cbrtf_1u7.c
new file mode 100644
index 000000000000..74918765209f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_cbrtf_1u7.c
@@ -0,0 +1,116 @@
+/*
+ * Single-precision vector cbrt(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f32.h"
+
+const static struct data
+{
+ float32x4_t poly[4], one_third;
+ float table[5];
+} data = {
+ .poly = { /* Very rough approximation of cbrt(x) in [0.5, 1], generated with
+ FPMinimax. */
+ V4 (0x1.c14e96p-2), V4 (0x1.dd2d3p-1), V4 (-0x1.08e81ap-1),
+ V4 (0x1.2c74c2p-3) },
+ .table = { /* table[i] = 2^((i - 2) / 3). */
+ 0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0 },
+ .one_third = V4 (0x1.555556p-2f),
+};
+
+#define SignMask v_u32 (0x80000000)
+#define SmallestNormal v_u32 (0x00800000)
+#define Thresh vdup_n_u16 (0x7f00) /* asuint(INFINITY) - SmallestNormal. */
+#define MantissaMask v_u32 (0x007fffff)
+#define HalfExp v_u32 (0x3f000000)
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint16x4_t special)
+{
+ return v_call_f32 (cbrtf, x, y, vmovl_u16 (special));
+}
+
+static inline float32x4_t
+shifted_lookup (const float *table, int32x4_t i)
+{
+ return (float32x4_t){ table[i[0] + 2], table[i[1] + 2], table[i[2] + 2],
+ table[i[3] + 2] };
+}
+
+/* Approximation for vector single-precision cbrt(x) using Newton iteration
+ with initial guess obtained by a low-order polynomial. Greatest error
+ is 1.64 ULP. This is observed for every value where the mantissa is
+ 0x1.85a2aa and the exponent is a multiple of 3, for example:
+ _ZGVnN4v_cbrtf(0x1.85a2aap+3) got 0x1.267936p+1
+ want 0x1.267932p+1. */
+VPCS_ATTR float32x4_t V_NAME_F1 (cbrt) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint32x4_t iax = vreinterpretq_u32_f32 (vabsq_f32 (x));
+
+ /* Subnormal, +/-0 and special values. */
+ uint16x4_t special = vcge_u16 (vsubhn_u32 (iax, SmallestNormal), Thresh);
+
+ /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
+ version of frexpf, which gets subnormal values wrong - these have to be
+ special-cased as a result. */
+ float32x4_t m = vbslq_f32 (MantissaMask, x, v_f32 (0.5));
+ int32x4_t e
+ = vsubq_s32 (vreinterpretq_s32_u32 (vshrq_n_u32 (iax, 23)), v_s32 (126));
+
+ /* p is a rough approximation for cbrt(m) in [0.5, 1.0]. The better this is,
+ the less accurate the next stage of the algorithm needs to be. An order-4
+ polynomial is enough for one Newton iteration. */
+ float32x4_t p = v_pairwise_poly_3_f32 (m, vmulq_f32 (m, m), d->poly);
+
+ float32x4_t one_third = d->one_third;
+ float32x4_t two_thirds = vaddq_f32 (one_third, one_third);
+
+ /* One iteration of Newton's method for iteratively approximating cbrt. */
+ float32x4_t m_by_3 = vmulq_f32 (m, one_third);
+ float32x4_t a
+ = vfmaq_f32 (vdivq_f32 (m_by_3, vmulq_f32 (p, p)), two_thirds, p);
+
+ /* Assemble the result by the following:
+
+ cbrt(x) = cbrt(m) * 2 ^ (e / 3).
+
+ We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
+ not necessarily a multiple of 3 we lose some information.
+
+ Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
+
+ Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
+ is an integer in [-2, 2], and can be looked up in the table T. Hence the
+ result is assembled as:
+
+ cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign. */
+ float32x4_t ef = vmulq_f32 (vcvtq_f32_s32 (e), one_third);
+ int32x4_t ey = vcvtq_s32_f32 (ef);
+ int32x4_t em3 = vsubq_s32 (e, vmulq_s32 (ey, v_s32 (3)));
+
+ float32x4_t my = shifted_lookup (d->table, em3);
+ my = vmulq_f32 (my, a);
+
+ /* Vector version of ldexpf. */
+ float32x4_t y
+ = vreinterpretq_f32_s32 (vshlq_n_s32 (vaddq_s32 (ey, v_s32 (127)), 23));
+ y = vmulq_f32 (y, my);
+
+ if (unlikely (v_any_u16h (special)))
+ return special_case (x, vbslq_f32 (SignMask, x, y), special);
+
+ /* Copy sign. */
+ return vbslq_f32 (SignMask, x, y);
+}
+
+PL_SIG (V, F, 1, cbrt, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_F1 (cbrt), 1.15)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_F1 (cbrt))
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (cbrt), 0, inf, 1000000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cexpi_3u5.c b/contrib/arm-optimized-routines/pl/math/v_cexpi_3u5.c
new file mode 100644
index 000000000000..5163b15926b8
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_cexpi_3u5.c
@@ -0,0 +1,45 @@
+/*
+ * Double-precision vector sincos function - return-by-value interface.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_sincos_common.h"
+#include "v_math.h"
+#include "pl_test.h"
+
+static float64x2x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, uint64x2_t special, float64x2x2_t y)
+{
+ return (float64x2x2_t){ v_call_f64 (sin, x, y.val[0], special),
+ v_call_f64 (cos, x, y.val[1], special) };
+}
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate polynomials.
+ Largest observed error is for sin, 3.22 ULP:
+ v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
+ want -0x1.ffe9537d5dbb4p-3. */
+VPCS_ATTR float64x2x2_t
+_ZGVnN2v_cexpi (float64x2_t x)
+{
+ const struct v_sincos_data *d = ptr_barrier (&v_sincos_data);
+ uint64x2_t special = check_ge_rangeval (x, d);
+
+ float64x2x2_t sc = v_sincos_inline (x, d);
+
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x, special, sc);
+ return sc;
+}
+
+PL_TEST_ULP (_ZGVnN2v_cexpi_sin, 2.73)
+PL_TEST_ULP (_ZGVnN2v_cexpi_cos, 2.73)
+#define V_CEXPI_INTERVAL(lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVnN2v_cexpi_sin, lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVnN2v_cexpi_cos, lo, hi, n)
+V_CEXPI_INTERVAL (0, 0x1p23, 500000)
+V_CEXPI_INTERVAL (-0, -0x1p23, 500000)
+V_CEXPI_INTERVAL (0x1p23, inf, 10000)
+V_CEXPI_INTERVAL (-0x1p23, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cexpif_1u8.c b/contrib/arm-optimized-routines/pl/math/v_cexpif_1u8.c
new file mode 100644
index 000000000000..4897018d3090
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_cexpif_1u8.c
@@ -0,0 +1,47 @@
+/*
+ * Single-precision vector cexpi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_sincosf_common.h"
+#include "v_math.h"
+#include "pl_test.h"
+
+static float32x4x2_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, uint32x4_t special, float32x4x2_t y)
+{
+ return (float32x4x2_t){ v_call_f32 (sinf, x, y.val[0], special),
+ v_call_f32 (cosf, x, y.val[1], special) };
+}
+
+/* Single-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate low-order
+ polynomials.
+ Worst-case error for sin is 1.67 ULP:
+ v_cexpif_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
+ Worst-case error for cos is 1.81 ULP:
+ v_cexpif_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6. */
+VPCS_ATTR float32x4x2_t
+_ZGVnN4v_cexpif (float32x4_t x)
+{
+ const struct v_sincosf_data *d = ptr_barrier (&v_sincosf_data);
+ uint32x4_t special = check_ge_rangeval (x, d);
+
+ float32x4x2_t sc = v_sincosf_inline (x, d);
+
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x, special, sc);
+ return sc;
+}
+
+PL_TEST_ULP (_ZGVnN4v_cexpif_sin, 1.17)
+PL_TEST_ULP (_ZGVnN4v_cexpif_cos, 1.31)
+#define V_CEXPIF_INTERVAL(lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVnN4v_cexpif_sin, lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVnN4v_cexpif_cos, lo, hi, n)
+V_CEXPIF_INTERVAL (0, 0x1p20, 500000)
+V_CEXPIF_INTERVAL (-0, -0x1p20, 500000)
+V_CEXPIF_INTERVAL (0x1p20, inf, 10000)
+V_CEXPIF_INTERVAL (-0x1p20, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cosh_2u.c b/contrib/arm-optimized-routines/pl/math/v_cosh_2u.c
new file mode 100644
index 000000000000..649c390f4622
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_cosh_2u.c
@@ -0,0 +1,104 @@
+/*
+ * Double-precision vector cosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t poly[3];
+ float64x2_t inv_ln2, ln2, shift, thres;
+ uint64x2_t index_mask, special_bound;
+} data = {
+ .poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6b68cp-3),
+ V2 (0x1.5555576a59599p-5), },
+
+ .inv_ln2 = V2 (0x1.71547652b82fep8), /* N/ln2. */
+ /* -ln2/N. */
+ .ln2 = {-0x1.62e42fefa39efp-9, -0x1.abc9e3b39803f3p-64},
+ .shift = V2 (0x1.8p+52),
+ .thres = V2 (704.0),
+
+ .index_mask = V2 (0xff),
+ /* 0x1.6p9, above which exp overflows. */
+ .special_bound = V2 (0x4086000000000000),
+};
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (cosh, x, y, special);
+}
+
+/* Helper for approximating exp(x). Copied from v_exp_tail, with no
+ special-case handling or tail. */
+static inline float64x2_t
+exp_inline (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* n = round(x/(ln2/N)). */
+ float64x2_t z = vfmaq_f64 (d->shift, x, d->inv_ln2);
+ uint64x2_t u = vreinterpretq_u64_f64 (z);
+ float64x2_t n = vsubq_f64 (z, d->shift);
+
+ /* r = x - n*ln2/N. */
+ float64x2_t r = vfmaq_laneq_f64 (x, n, d->ln2, 0);
+ r = vfmaq_laneq_f64 (r, n, d->ln2, 1);
+
+ uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TAIL_TABLE_BITS);
+ uint64x2_t i = vandq_u64 (u, d->index_mask);
+
+ /* y = tail + exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4. */
+ float64x2_t y = vfmaq_f64 (d->poly[1], d->poly[2], r);
+ y = vfmaq_f64 (d->poly[0], y, r);
+ y = vmulq_f64 (vfmaq_f64 (v_f64 (1), y, r), r);
+
+ /* s = 2^(n/N). */
+ u = v_lookup_u64 (__v_exp_tail_data, i);
+ float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
+
+ return vfmaq_f64 (s, y, s);
+}
+
+/* Approximation for vector double-precision cosh(x) using exp_inline.
+ cosh(x) = (exp(x) + exp(-x)) / 2.
+ The greatest observed error is in the scalar fall-back region, so is the
+ same as the scalar routine, 1.93 ULP:
+ _ZGVnN2v_cosh (0x1.628af341989dap+9) got 0x1.fdf28623ef921p+1021
+ want 0x1.fdf28623ef923p+1021.
+
+ The greatest observed error in the non-special region is 1.54 ULP:
+ _ZGVnN2v_cosh (0x1.8e205b6ecacf7p+2) got 0x1.f711dcb0c77afp+7
+ want 0x1.f711dcb0c77b1p+7. */
+float64x2_t VPCS_ATTR V_NAME_D1 (cosh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ uint64x2_t special
+ = vcgtq_u64 (vreinterpretq_u64_f64 (ax), d->special_bound);
+
+ /* Up to the point that exp overflows, we can use it to calculate cosh by
+ exp(|x|) / 2 + 1 / (2 * exp(|x|)). */
+ float64x2_t t = exp_inline (ax);
+ float64x2_t half_t = vmulq_n_f64 (t, 0.5);
+ float64x2_t half_over_t = vdivq_f64 (v_f64 (0.5), t);
+
+ /* Fall back to scalar for any special cases. */
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x, vaddq_f64 (half_t, half_over_t), special);
+
+ return vaddq_f64 (half_t, half_over_t);
+}
+
+PL_SIG (V, D, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_D1 (cosh), 1.43)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (cosh))
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0, 0x1.6p9, 100000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (cosh), 0x1.6p9, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_coshf_2u4.c b/contrib/arm-optimized-routines/pl/math/v_coshf_2u4.c
new file mode 100644
index 000000000000..c622b0b183f1
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_coshf_2u4.c
@@ -0,0 +1,80 @@
+/*
+ * Single-precision vector cosh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_expf_inline.h"
+#include "v_math.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ struct v_expf_data expf_consts;
+ uint32x4_t tiny_bound, special_bound;
+} data = {
+ .expf_consts = V_EXPF_DATA,
+ .tiny_bound = V4 (0x20000000), /* 0x1p-63: Round to 1 below this. */
+ /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case. */
+ .special_bound = V4 (0x42ad496c),
+};
+
+#if !WANT_SIMD_EXCEPT
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (coshf, x, y, special);
+}
+#endif
+
+/* Single-precision vector cosh, using vector expf.
+ Maximum error is 2.38 ULP:
+ _ZGVnN4v_coshf (0x1.e8001ep+1) got 0x1.6a491ep+4
+ want 0x1.6a4922p+4. */
+float32x4_t VPCS_ATTR V_NAME_F1 (cosh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+ uint32x4_t special = vcgeq_u32 (iax, d->special_bound);
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered correctly, fall back to the scalar
+ variant for all inputs if any input is a special value or above the bound
+ at which expf overflows. */
+ if (unlikely (v_any_u32 (special)))
+ return v_call_f32 (coshf, x, x, v_u32 (-1));
+
+ uint32x4_t tiny = vcleq_u32 (iax, d->tiny_bound);
+ /* If any input is tiny, avoid underflow exception by fixing tiny lanes of
+ input to 0, which will generate no exceptions. */
+ if (unlikely (v_any_u32 (tiny)))
+ ax = v_zerofy_f32 (ax, tiny);
+#endif
+
+ /* Calculate cosh by exp(x) / 2 + exp(-x) / 2. */
+ float32x4_t t = v_expf_inline (ax, &d->expf_consts);
+ float32x4_t half_t = vmulq_n_f32 (t, 0.5);
+ float32x4_t half_over_t = vdivq_f32 (v_f32 (0.5), t);
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u32 (tiny)))
+ return vbslq_f32 (tiny, v_f32 (1), vaddq_f32 (half_t, half_over_t));
+#else
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x, vaddq_f32 (half_t, half_over_t), special);
+#endif
+
+ return vaddq_f32 (half_t, half_over_t);
+}
+
+PL_SIG (V, F, 1, cosh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_F1 (cosh), 1.89)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (cosh), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0, 0x1p-63, 100)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0, 0x1.5a92d8p+6, 80000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (cosh), 0x1.5a92d8p+6, inf, 2000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cospi_3u1.c b/contrib/arm-optimized-routines/pl/math/v_cospi_3u1.c
new file mode 100644
index 000000000000..3c2ee0b74c8e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_cospi_3u1.c
@@ -0,0 +1,86 @@
+/*
+ * Double-precision vector cospi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t poly[10];
+ float64x2_t range_val;
+} data = {
+ /* Polynomial coefficients generated using Remez algorithm,
+ see sinpi.sollya for details. */
+ .poly = { V2 (0x1.921fb54442d184p1), V2 (-0x1.4abbce625be53p2),
+ V2 (0x1.466bc6775ab16p1), V2 (-0x1.32d2cce62dc33p-1),
+ V2 (0x1.507834891188ep-4), V2 (-0x1.e30750a28c88ep-8),
+ V2 (0x1.e8f48308acda4p-12), V2 (-0x1.6fc0032b3c29fp-16),
+ V2 (0x1.af86ae521260bp-21), V2 (-0x1.012a9870eeb7dp-25) },
+ .range_val = V2 (0x1p63),
+};
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
+{
+ /* Fall back to scalar code. */
+ y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+ return v_call_f64 (cospi, x, y, cmp);
+}
+
+/* Approximation for vector double-precision cospi(x).
+ Maximum Error 3.06 ULP:
+ _ZGVnN2v_cospi(0x1.7dd4c0b03cc66p-5) got 0x1.fa854babfb6bep-1
+ want 0x1.fa854babfb6c1p-1. */
+float64x2_t VPCS_ATTR V_NAME_D1 (cospi) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+ float64x2_t r = vabsq_f64 (x);
+ uint64x2_t cmp = vcaleq_f64 (v_f64 (0x1p64), x);
+
+ /* When WANT_SIMD_EXCEPT = 1, special lanes should be zero'd
+ to avoid them overflowing and throwing exceptions. */
+ r = v_zerofy_f64 (r, cmp);
+ uint64x2_t odd = vshlq_n_u64 (vcvtnq_u64_f64 (r), 63);
+
+#else
+ float64x2_t r = x;
+ uint64x2_t cmp = vcageq_f64 (r, d->range_val);
+ uint64x2_t odd
+ = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtaq_s64_f64 (r)), 63);
+
+#endif
+
+ r = vsubq_f64 (r, vrndaq_f64 (r));
+
+ /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2. */
+ r = vsubq_f64 (v_f64 (0.5), vabsq_f64 (r));
+
+ /* y = sin(r). */
+ float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t r4 = vmulq_f64 (r2, r2);
+ float64x2_t y = vmulq_f64 (v_pw_horner_9_f64 (r2, r4, d->poly), r);
+
+ /* Fallback to scalar. */
+ if (unlikely (v_any_u64 (cmp)))
+ return special_case (x, y, odd, cmp);
+
+ /* Reintroduce the sign bit for inputs which round to odd. */
+ return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+}
+
+PL_SIG (V, D, 1, cospi, -0.9, 0.9)
+PL_TEST_ULP (V_NAME_D1 (cospi), 2.56)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (cospi), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0, 0x1p-63, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p-63, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0.5, 0x1p51, 10000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (cospi), 0x1p51, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_cospif_3u2.c b/contrib/arm-optimized-routines/pl/math/v_cospif_3u2.c
new file mode 100644
index 000000000000..d88aa828439d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_cospif_3u2.c
@@ -0,0 +1,83 @@
+/*
+ * Single-precision vector cospi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float32x4_t poly[6];
+ float32x4_t range_val;
+} data = {
+ /* Taylor series coefficents for sin(pi * x). */
+ .poly = { V4 (0x1.921fb6p1f), V4 (-0x1.4abbcep2f), V4 (0x1.466bc6p1f),
+ V4 (-0x1.32d2ccp-1f), V4 (0x1.50783p-4f), V4 (-0x1.e30750p-8f) },
+ .range_val = V4 (0x1p31f),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
+{
+ y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+ return v_call_f32 (cospif, x, y, cmp);
+}
+
+/* Approximation for vector single-precision cospi(x)
+ Maximum Error: 3.17 ULP:
+ _ZGVnN4v_cospif(0x1.d341a8p-5) got 0x1.f7cd56p-1
+ want 0x1.f7cd5p-1. */
+float32x4_t VPCS_ATTR V_NAME_F1 (cospi) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+ float32x4_t r = vabsq_f32 (x);
+ uint32x4_t cmp = vcaleq_f32 (v_f32 (0x1p32f), x);
+
+ /* When WANT_SIMD_EXCEPT = 1, special lanes should be zero'd
+ to avoid them overflowing and throwing exceptions. */
+ r = v_zerofy_f32 (r, cmp);
+ uint32x4_t odd = vshlq_n_u32 (vcvtnq_u32_f32 (r), 31);
+
+#else
+ float32x4_t r = x;
+ uint32x4_t cmp = vcageq_f32 (r, d->range_val);
+
+ uint32x4_t odd
+ = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (r)), 31);
+
+#endif
+
+ /* r = x - rint(x). */
+ r = vsubq_f32 (r, vrndaq_f32 (r));
+
+ /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2. */
+ r = vsubq_f32 (v_f32 (0.5f), vabsq_f32 (r));
+
+ /* Pairwise Horner approximation for y = sin(r * pi). */
+ float32x4_t r2 = vmulq_f32 (r, r);
+ float32x4_t r4 = vmulq_f32 (r2, r2);
+ float32x4_t y = vmulq_f32 (v_pw_horner_5_f32 (r2, r4, d->poly), r);
+
+ /* Fallback to scalar. */
+ if (unlikely (v_any_u32 (cmp)))
+ return special_case (x, y, odd, cmp);
+
+ /* Reintroduce the sign bit for inputs which round to odd. */
+ return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+}
+
+PL_SIG (V, F, 1, cospi, -0.9, 0.9)
+PL_TEST_ULP (V_NAME_F1 (cospi), 2.67)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (cospi), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0, 0x1p-31, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p-31, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0.5, 0x1p32f, 10000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (cospi), 0x1p32f, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erf_2u5.c b/contrib/arm-optimized-routines/pl/math/v_erf_2u5.c
new file mode 100644
index 000000000000..e581ec5bb8a7
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_erf_2u5.c
@@ -0,0 +1,158 @@
+/*
+ * Double-precision vector erf(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t third;
+ float64x2_t tenth, two_over_five, two_over_fifteen;
+ float64x2_t two_over_nine, two_over_fortyfive;
+ float64x2_t max, shift;
+#if WANT_SIMD_EXCEPT
+ float64x2_t tiny_bound, huge_bound, scale_minus_one;
+#endif
+} data = {
+ .third = V2 (0x1.5555555555556p-2), /* used to compute 2/3 and 1/6 too. */
+ .two_over_fifteen = V2 (0x1.1111111111111p-3),
+ .tenth = V2 (-0x1.999999999999ap-4),
+ .two_over_five = V2 (-0x1.999999999999ap-2),
+ .two_over_nine = V2 (-0x1.c71c71c71c71cp-3),
+ .two_over_fortyfive = V2 (0x1.6c16c16c16c17p-5),
+ .max = V2 (5.9921875), /* 6 - 1/128. */
+ .shift = V2 (0x1p45),
+#if WANT_SIMD_EXCEPT
+ .huge_bound = V2 (0x1p205),
+ .tiny_bound = V2 (0x1p-226),
+ .scale_minus_one = V2 (0x1.06eba8214db69p-3), /* 2/sqrt(pi) - 1.0. */
+#endif
+};
+
+#define AbsMask 0x7fffffffffffffff
+
+struct entry
+{
+ float64x2_t erf;
+ float64x2_t scale;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+ struct entry e;
+ float64x2_t e1 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[0])),
+ e2 = vld1q_f64 ((float64_t *) (__erf_data.tab + i[1]));
+ e.erf = vuzp1q_f64 (e1, e2);
+ e.scale = vuzp2q_f64 (e1, e2);
+ return e;
+}
+
+/* Double-precision implementation of vector erf(x).
+ Approximation based on series expansion near x rounded to
+ nearest multiple of 1/128.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erf(x) ~ erf(r) + scale * d * [
+ + 1
+ - r d
+ + 1/3 (2 r^2 - 1) d^2
+ - 1/6 (r (2 r^2 - 3)) d^3
+ + 1/30 (4 r^4 - 12 r^2 + 3) d^4
+ - 1/90 (4 r^4 - 20 r^2 + 15) d^5
+ ]
+
+ Maximum measure error: 2.29 ULP
+ V_NAME_D1 (erf)(-0x1.00003c924e5d1p-8) got -0x1.20dd59132ebadp-8
+ want -0x1.20dd59132ebafp-8. */
+float64x2_t VPCS_ATTR V_NAME_D1 (erf) (float64x2_t x)
+{
+ const struct data *dat = ptr_barrier (&data);
+
+ float64x2_t a = vabsq_f64 (x);
+ /* Reciprocal conditions that do not catch NaNs so they can be used in BSLs
+ to return expected results. */
+ uint64x2_t a_le_max = vcleq_f64 (a, dat->max);
+ uint64x2_t a_gt_max = vcgtq_f64 (a, dat->max);
+
+#if WANT_SIMD_EXCEPT
+ /* |x| huge or tiny. */
+ uint64x2_t cmp1 = vcgtq_f64 (a, dat->huge_bound);
+ uint64x2_t cmp2 = vcltq_f64 (a, dat->tiny_bound);
+ uint64x2_t cmp = vorrq_u64 (cmp1, cmp2);
+ /* If any lanes are special, mask them with 1 for small x or 8 for large
+ values and retain a copy of a to allow special case handler to fix special
+ lanes later. This is only necessary if fenv exceptions are to be triggered
+ correctly. */
+ if (unlikely (v_any_u64 (cmp)))
+ {
+ a = vbslq_f64 (cmp1, v_f64 (8.0), a);
+ a = vbslq_f64 (cmp2, v_f64 (1.0), a);
+ }
+#endif
+
+ /* Set r to multiple of 1/128 nearest to |x|. */
+ float64x2_t shift = dat->shift;
+ float64x2_t z = vaddq_f64 (a, shift);
+
+ /* Lookup erf(r) and scale(r) in table, without shortcut for small values,
+ but with saturated indices for large values and NaNs in order to avoid
+ segfault. */
+ uint64x2_t i
+ = vsubq_u64 (vreinterpretq_u64_f64 (z), vreinterpretq_u64_f64 (shift));
+ i = vbslq_u64 (a_le_max, i, v_u64 (768));
+ struct entry e = lookup (i);
+
+ float64x2_t r = vsubq_f64 (z, shift);
+
+ /* erf(x) ~ erf(r) + scale * d * poly (r, d). */
+ float64x2_t d = vsubq_f64 (a, r);
+ float64x2_t d2 = vmulq_f64 (d, d);
+ float64x2_t r2 = vmulq_f64 (r, r);
+
+ /* poly (d, r) = 1 + p1(r) * d + p2(r) * d^2 + ... + p5(r) * d^5. */
+ float64x2_t p1 = r;
+ float64x2_t p2
+ = vfmsq_f64 (dat->third, r2, vaddq_f64 (dat->third, dat->third));
+ float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->third));
+ float64x2_t p4 = vfmaq_f64 (dat->two_over_five, r2, dat->two_over_fifteen);
+ p4 = vfmsq_f64 (dat->tenth, r2, p4);
+ float64x2_t p5 = vfmaq_f64 (dat->two_over_nine, r2, dat->two_over_fortyfive);
+ p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->third), r2, p5));
+
+ float64x2_t p34 = vfmaq_f64 (p3, d, p4);
+ float64x2_t p12 = vfmaq_f64 (p1, d, p2);
+ float64x2_t y = vfmaq_f64 (p34, d2, p5);
+ y = vfmaq_f64 (p12, d2, y);
+
+ y = vfmaq_f64 (e.erf, e.scale, vfmsq_f64 (d, d2, y));
+
+ /* Solves the |x| = inf and NaN cases. */
+ y = vbslq_f64 (a_gt_max, v_f64 (1.0), y);
+
+ /* Copy sign. */
+ y = vbslq_f64 (v_u64 (AbsMask), y, x);
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u64 (cmp2)))
+ {
+ /* Neutralise huge values of x before fixing small values. */
+ x = vbslq_f64 (cmp1, v_f64 (1.0), x);
+ /* Fix tiny values that trigger spurious underflow. */
+ return vbslq_f64 (cmp2, vfmaq_f64 (x, dat->scale_minus_one, x), y);
+ }
+#endif
+ return y;
+}
+
+PL_SIG (V, D, 1, erf, -6.0, 6.0)
+PL_TEST_ULP (V_NAME_D1 (erf), 1.79)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (erf), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, 5.9921875, 40000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (erf), 5.9921875, inf, 40000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (erf), 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erfc_1u8.c b/contrib/arm-optimized-routines/pl/math/v_erfc_1u8.c
new file mode 100644
index 000000000000..10ef7e6a3c34
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_erfc_1u8.c
@@ -0,0 +1,198 @@
+/*
+ * Double-precision vector erfc(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ uint64x2_t offset, table_scale;
+ float64x2_t max, shift;
+ float64x2_t p20, p40, p41, p42;
+ float64x2_t p51, p52;
+ float64x2_t qr5, qr6, qr7, qr8, qr9;
+#if WANT_SIMD_EXCEPT
+ float64x2_t uflow_bound;
+#endif
+} data = {
+ /* Set an offset so the range of the index used for lookup is 3487, and it
+ can be clamped using a saturated add on an offset index.
+ Index offset is 0xffffffffffffffff - asuint64(shift) - 3487. */
+ .offset = V2 (0xbd3ffffffffff260),
+ .table_scale = V2 (0x37f0000000000000 << 1), /* asuint64 (2^-128) << 1. */
+ .max = V2 (0x1.b3ep+4), /* 3487/128. */
+ .shift = V2 (0x1p45),
+ .p20 = V2 (0x1.5555555555555p-2), /* 1/3, used to compute 2/3 and 1/6. */
+ .p40 = V2 (-0x1.999999999999ap-4), /* 1/10. */
+ .p41 = V2 (-0x1.999999999999ap-2), /* 2/5. */
+ .p42 = V2 (0x1.1111111111111p-3), /* 2/15. */
+ .p51 = V2 (-0x1.c71c71c71c71cp-3), /* 2/9. */
+ .p52 = V2 (0x1.6c16c16c16c17p-5), /* 2/45. */
+ /* Qi = (i+1) / i, Ri = -2 * i / ((i+1)*(i+2)), for i = 5, ..., 9. */
+ .qr5 = { 0x1.3333333333333p0, -0x1.e79e79e79e79ep-3 },
+ .qr6 = { 0x1.2aaaaaaaaaaabp0, -0x1.b6db6db6db6dbp-3 },
+ .qr7 = { 0x1.2492492492492p0, -0x1.8e38e38e38e39p-3 },
+ .qr8 = { 0x1.2p0, -0x1.6c16c16c16c17p-3 },
+ .qr9 = { 0x1.1c71c71c71c72p0, -0x1.4f2094f2094f2p-3 },
+#if WANT_SIMD_EXCEPT
+ .uflow_bound = V2 (0x1.a8b12fc6e4892p+4),
+#endif
+};
+
+#define TinyBound 0x4000000000000000 /* 0x1p-511 << 1. */
+#define Off 0xfffffffffffff260 /* 0xffffffffffffffff - 3487. */
+
+struct entry
+{
+ float64x2_t erfc;
+ float64x2_t scale;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+ struct entry e;
+ float64x2_t e1 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[0])),
+ e2 = vld1q_f64 ((float64_t *) (__erfc_data.tab - Off + i[1]));
+ e.erfc = vuzp1q_f64 (e1, e2);
+ e.scale = vuzp2q_f64 (e1, e2);
+ return e;
+}
+
+#if WANT_SIMD_EXCEPT
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
+{
+ return v_call_f64 (erfc, x, y, cmp);
+}
+#endif
+
+/* Optimized double-precision vector erfc(x).
+ Approximation based on series expansion near x rounded to
+ nearest multiple of 1/128.
+
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
+
+ poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
+ + (2/15 r^4 - 2/5 r^2 + 1/10) d^4
+ - r * (2/45 r^4 - 2/9 r^2 + 1/6) d^5
+ + p6(r) d^6 + ... + p10(r) d^10
+
+ Polynomials p6(r) to p10(r) are computed using recurrence relation
+
+ 2(i+1)p_i + 2r(i+2)p_{i+1} + (i+2)(i+3)p_{i+2} = 0,
+ with p0 = 1, and p1(r) = -r.
+
+ Values of erfc(r) and scale are read from lookup tables. Stored values
+ are scaled to avoid hitting the subnormal range.
+
+ Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
+
+ Maximum measured error: 1.71 ULP
+ V_NAME_D1 (erfc)(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608
+ want 0x1.e15fcbea3e7adp-608. */
+VPCS_ATTR
+float64x2_t V_NAME_D1 (erfc) (float64x2_t x)
+{
+ const struct data *dat = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+ /* |x| < 2^-511. Avoid fabs by left-shifting by 1. */
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+ uint64x2_t cmp = vcltq_u64 (vaddq_u64 (ix, ix), v_u64 (TinyBound));
+ /* x >= ~26.54 (into subnormal case and uflow case). Comparison is done in
+ integer domain to avoid raising exceptions in presence of nans. */
+ uint64x2_t uflow = vcgeq_s64 (vreinterpretq_s64_f64 (x),
+ vreinterpretq_s64_f64 (dat->uflow_bound));
+ cmp = vorrq_u64 (cmp, uflow);
+ float64x2_t xm = x;
+ /* If any lanes are special, mask them with 0 and retain a copy of x to allow
+ special case handler to fix special lanes later. This is only necessary if
+ fenv exceptions are to be triggered correctly. */
+ if (unlikely (v_any_u64 (cmp)))
+ x = v_zerofy_f64 (x, cmp);
+#endif
+
+ float64x2_t a = vabsq_f64 (x);
+ a = vminq_f64 (a, dat->max);
+
+ /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 0 and scale to
+ 2/sqrt(pi), when x reduced to r = 0. */
+ float64x2_t shift = dat->shift;
+ float64x2_t z = vaddq_f64 (a, shift);
+
+ /* Clamp index to a range of 3487. A naive approach would use a subtract and
+ min. Instead we offset the table address and the index, then use a
+ saturating add. */
+ uint64x2_t i = vqaddq_u64 (vreinterpretq_u64_f64 (z), dat->offset);
+
+ struct entry e = lookup (i);
+
+ /* erfc(x) ~ erfc(r) - scale * d * poly(r, d). */
+ float64x2_t r = vsubq_f64 (z, shift);
+ float64x2_t d = vsubq_f64 (a, r);
+ float64x2_t d2 = vmulq_f64 (d, d);
+ float64x2_t r2 = vmulq_f64 (r, r);
+
+ float64x2_t p1 = r;
+ float64x2_t p2 = vfmsq_f64 (dat->p20, r2, vaddq_f64 (dat->p20, dat->p20));
+ float64x2_t p3 = vmulq_f64 (r, vfmaq_f64 (v_f64 (-0.5), r2, dat->p20));
+ float64x2_t p4 = vfmaq_f64 (dat->p41, r2, dat->p42);
+ p4 = vfmsq_f64 (dat->p40, r2, p4);
+ float64x2_t p5 = vfmaq_f64 (dat->p51, r2, dat->p52);
+ p5 = vmulq_f64 (r, vfmaq_f64 (vmulq_f64 (v_f64 (0.5), dat->p20), r2, p5));
+ /* Compute p_i using recurrence relation:
+ p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */
+ float64x2_t p6 = vfmaq_f64 (p4, p5, vmulq_laneq_f64 (r, dat->qr5, 0));
+ p6 = vmulq_laneq_f64 (p6, dat->qr5, 1);
+ float64x2_t p7 = vfmaq_f64 (p5, p6, vmulq_laneq_f64 (r, dat->qr6, 0));
+ p7 = vmulq_laneq_f64 (p7, dat->qr6, 1);
+ float64x2_t p8 = vfmaq_f64 (p6, p7, vmulq_laneq_f64 (r, dat->qr7, 0));
+ p8 = vmulq_laneq_f64 (p8, dat->qr7, 1);
+ float64x2_t p9 = vfmaq_f64 (p7, p8, vmulq_laneq_f64 (r, dat->qr8, 0));
+ p9 = vmulq_laneq_f64 (p9, dat->qr8, 1);
+ float64x2_t p10 = vfmaq_f64 (p8, p9, vmulq_laneq_f64 (r, dat->qr9, 0));
+ p10 = vmulq_laneq_f64 (p10, dat->qr9, 1);
+ /* Compute polynomial in d using pairwise Horner scheme. */
+ float64x2_t p90 = vfmaq_f64 (p9, d, p10);
+ float64x2_t p78 = vfmaq_f64 (p7, d, p8);
+ float64x2_t p56 = vfmaq_f64 (p5, d, p6);
+ float64x2_t p34 = vfmaq_f64 (p3, d, p4);
+ float64x2_t p12 = vfmaq_f64 (p1, d, p2);
+ float64x2_t y = vfmaq_f64 (p78, d2, p90);
+ y = vfmaq_f64 (p56, d2, y);
+ y = vfmaq_f64 (p34, d2, y);
+ y = vfmaq_f64 (p12, d2, y);
+
+ y = vfmsq_f64 (e.erfc, e.scale, vfmsq_f64 (d, d2, y));
+
+ /* Offset equals 2.0 if sign, else 0.0. */
+ uint64x2_t sign = vshrq_n_u64 (vreinterpretq_u64_f64 (x), 63);
+ float64x2_t off = vreinterpretq_f64_u64 (vshlq_n_u64 (sign, 62));
+ /* Copy sign and scale back in a single fma. Since the bit patterns do not
+ overlap, then logical or and addition are equivalent here. */
+ float64x2_t fac = vreinterpretq_f64_u64 (
+ vsraq_n_u64 (vshlq_n_u64 (sign, 63), dat->table_scale, 1));
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u64 (cmp)))
+ return special_case (xm, vfmaq_f64 (off, fac, y), cmp);
+#endif
+
+ return vfmaq_f64 (off, fac, y);
+}
+
+PL_SIG (V, D, 1, erfc, -6.0, 28.0)
+PL_TEST_ULP (V_NAME_D1 (erfc), 1.21)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (erfc), 0, 0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME_D1 (erfc), 0x1p-26, 28.0, 40000)
+PL_TEST_INTERVAL (V_NAME_D1 (erfc), -0x1p-26, -6.0, 40000)
+PL_TEST_INTERVAL (V_NAME_D1 (erfc), 28.0, inf, 40000)
+PL_TEST_INTERVAL (V_NAME_D1 (erfc), -6.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erfcf_1u7.c b/contrib/arm-optimized-routines/pl/math/v_erfcf_1u7.c
new file mode 100644
index 000000000000..c361d0704438
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_erfcf_1u7.c
@@ -0,0 +1,166 @@
+/*
+ * Single-precision vector erfc(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ uint32x4_t offset, table_scale;
+ float32x4_t max, shift;
+ float32x4_t coeffs, third, two_over_five, tenth;
+#if WANT_SIMD_EXCEPT
+ float32x4_t uflow_bound;
+#endif
+
+} data = {
+ /* Set an offset so the range of the index used for lookup is 644, and it can
+ be clamped using a saturated add. */
+ .offset = V4 (0xb7fffd7b), /* 0xffffffff - asuint(shift) - 644. */
+ .table_scale = V4 (0x28000000 << 1), /* asuint (2^-47) << 1. */
+ .max = V4 (10.0625f), /* 10 + 1/16 = 644/64. */
+ .shift = V4 (0x1p17f),
+ /* Store 1/3, 2/3 and 2/15 in a single register for use with indexed muls and
+ fmas. */
+ .coeffs = (float32x4_t){ 0x1.555556p-2f, 0x1.555556p-1f, 0x1.111112p-3f, 0 },
+ .third = V4 (0x1.555556p-2f),
+ .two_over_five = V4 (-0x1.99999ap-2f),
+ .tenth = V4 (-0x1.99999ap-4f),
+#if WANT_SIMD_EXCEPT
+ .uflow_bound = V4 (0x1.2639cp+3f),
+#endif
+};
+
+#define TinyBound 0x41000000 /* 0x1p-62f << 1. */
+#define Thres 0xbe000000 /* asuint(infinity) << 1 - TinyBound. */
+#define Off 0xfffffd7b /* 0xffffffff - 644. */
+
+struct entry
+{
+ float32x4_t erfc;
+ float32x4_t scale;
+};
+
+static inline struct entry
+lookup (uint32x4_t i)
+{
+ struct entry e;
+ float64_t t0 = *((float64_t *) (__erfcf_data.tab - Off + i[0]));
+ float64_t t1 = *((float64_t *) (__erfcf_data.tab - Off + i[1]));
+ float64_t t2 = *((float64_t *) (__erfcf_data.tab - Off + i[2]));
+ float64_t t3 = *((float64_t *) (__erfcf_data.tab - Off + i[3]));
+ float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
+ float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+ e.erfc = vuzp1q_f32 (e1, e2);
+ e.scale = vuzp2q_f32 (e1, e2);
+ return e;
+}
+
+#if WANT_SIMD_EXCEPT
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
+{
+ return v_call_f32 (erfcf, x, y, cmp);
+}
+#endif
+
+/* Optimized single-precision vector erfcf(x).
+ Approximation based on series expansion near x rounded to
+ nearest multiple of 1/64.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
+
+ poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
+ + (2/15 r^4 - 2/5 r^2 + 1/10) d^4
+
+ Values of erfc(r) and scale are read from lookup tables. Stored values
+ are scaled to avoid hitting the subnormal range.
+
+ Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
+ Maximum error: 1.63 ULP (~1.0 ULP for x < 0.0).
+ _ZGVnN4v_erfcf(0x1.1dbf7ap+3) got 0x1.f51212p-120
+ want 0x1.f51216p-120. */
+VPCS_ATTR
+float32x4_t V_NAME_F1 (erfc) (float32x4_t x)
+{
+ const struct data *dat = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+ /* |x| < 2^-62. Avoid fabs by left-shifting by 1. */
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ uint32x4_t cmp = vcltq_u32 (vaddq_u32 (ix, ix), v_u32 (TinyBound));
+ /* x >= ~9.19 (into subnormal case and uflow case). Comparison is done in
+ integer domain to avoid raising exceptions in presence of nans. */
+ uint32x4_t uflow = vcgeq_s32 (vreinterpretq_s32_f32 (x),
+ vreinterpretq_s32_f32 (dat->uflow_bound));
+ cmp = vorrq_u32 (cmp, uflow);
+ float32x4_t xm = x;
+ /* If any lanes are special, mask them with 0 and retain a copy of x to allow
+ special case handler to fix special lanes later. This is only necessary if
+ fenv exceptions are to be triggered correctly. */
+ if (unlikely (v_any_u32 (cmp)))
+ x = v_zerofy_f32 (x, cmp);
+#endif
+
+ float32x4_t a = vabsq_f32 (x);
+ a = vminq_f32 (a, dat->max);
+
+ /* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 0 and scale to
+ 2/sqrt(pi), when x reduced to r = 0. */
+ float32x4_t shift = dat->shift;
+ float32x4_t z = vaddq_f32 (a, shift);
+
+ /* Clamp index to a range of 644. A naive approach would use a subtract and
+ min. Instead we offset the table address and the index, then use a
+ saturating add. */
+ uint32x4_t i = vqaddq_u32 (vreinterpretq_u32_f32 (z), dat->offset);
+
+ struct entry e = lookup (i);
+
+ /* erfc(x) ~ erfc(r) - scale * d * poly(r, d). */
+ float32x4_t r = vsubq_f32 (z, shift);
+ float32x4_t d = vsubq_f32 (a, r);
+ float32x4_t d2 = vmulq_f32 (d, d);
+ float32x4_t r2 = vmulq_f32 (r, r);
+
+ float32x4_t p1 = r;
+ float32x4_t p2 = vfmsq_laneq_f32 (dat->third, r2, dat->coeffs, 1);
+ float32x4_t p3
+ = vmulq_f32 (r, vfmaq_laneq_f32 (v_f32 (-0.5), r2, dat->coeffs, 0));
+ float32x4_t p4 = vfmaq_laneq_f32 (dat->two_over_five, r2, dat->coeffs, 2);
+ p4 = vfmsq_f32 (dat->tenth, r2, p4);
+
+ float32x4_t y = vfmaq_f32 (p3, d, p4);
+ y = vfmaq_f32 (p2, d, y);
+ y = vfmaq_f32 (p1, d, y);
+ y = vfmsq_f32 (e.erfc, e.scale, vfmsq_f32 (d, d2, y));
+
+ /* Offset equals 2.0f if sign, else 0.0f. */
+ uint32x4_t sign = vshrq_n_u32 (vreinterpretq_u32_f32 (x), 31);
+ float32x4_t off = vreinterpretq_f32_u32 (vshlq_n_u32 (sign, 30));
+ /* Copy sign and scale back in a single fma. Since the bit patterns do not
+ overlap, then logical or and addition are equivalent here. */
+ float32x4_t fac = vreinterpretq_f32_u32 (
+ vsraq_n_u32 (vshlq_n_u32 (sign, 31), dat->table_scale, 1));
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u32 (cmp)))
+ return special_case (xm, vfmaq_f32 (off, fac, y), cmp);
+#endif
+
+ return vfmaq_f32 (off, fac, y);
+}
+
+PL_SIG (V, F, 1, erfc, -4.0, 10.0)
+PL_TEST_ULP (V_NAME_F1 (erfc), 1.14)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (erfc), 0, 0x1p-26, 40000)
+PL_TEST_INTERVAL (V_NAME_F1 (erfc), 0x1p-26, 10.0625, 40000)
+PL_TEST_INTERVAL (V_NAME_F1 (erfc), -0x1p-26, -4.0, 40000)
+PL_TEST_INTERVAL (V_NAME_F1 (erfc), 10.0625, inf, 40000)
+PL_TEST_INTERVAL (V_NAME_F1 (erfc), -4.0, -inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erff_2u.c b/contrib/arm-optimized-routines/pl/math/v_erff_2u.c
new file mode 100644
index 000000000000..502526407df2
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_erff_2u.c
@@ -0,0 +1,118 @@
+/*
+ * Single-precision vector erf(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float32x4_t max, shift, third;
+#if WANT_SIMD_EXCEPT
+ float32x4_t tiny_bound, scale_minus_one;
+#endif
+} data = {
+ .max = V4 (3.9375), /* 4 - 8/128. */
+ .shift = V4 (0x1p16f),
+ .third = V4 (0x1.555556p-2f), /* 1/3. */
+#if WANT_SIMD_EXCEPT
+ .tiny_bound = V4 (0x1p-62f),
+ .scale_minus_one = V4 (0x1.06eba8p-3f), /* scale - 1.0. */
+#endif
+};
+
+#define AbsMask 0x7fffffff
+
+struct entry
+{
+ float32x4_t erf;
+ float32x4_t scale;
+};
+
+static inline struct entry
+lookup (uint32x4_t i)
+{
+ struct entry e;
+ float64_t t0 = *((float64_t *) (__erff_data.tab + i[0]));
+ float64_t t1 = *((float64_t *) (__erff_data.tab + i[1]));
+ float64_t t2 = *((float64_t *) (__erff_data.tab + i[2]));
+ float64_t t3 = *((float64_t *) (__erff_data.tab + i[3]));
+ float32x4_t e1 = vreinterpretq_f32_f64 ((float64x2_t){ t0, t1 });
+ float32x4_t e2 = vreinterpretq_f32_f64 ((float64x2_t){ t2, t3 });
+ e.erf = vuzp1q_f32 (e1, e2);
+ e.scale = vuzp2q_f32 (e1, e2);
+ return e;
+}
+
+/* Single-precision implementation of vector erf(x).
+ Approximation based on series expansion near x rounded to
+ nearest multiple of 1/128.
+ Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
+
+ erf(x) ~ erf(r) + scale * d * [1 - r * d - 1/3 * d^2]
+
+ Values of erf(r) and scale are read from lookup tables.
+ For |x| > 3.9375, erf(|x|) rounds to 1.0f.
+
+ Maximum error: 1.93 ULP
+ _ZGVnN4v_erff(0x1.c373e6p-9) got 0x1.fd686cp-9
+ want 0x1.fd6868p-9. */
+float32x4_t VPCS_ATTR V_NAME_F1 (erf) (float32x4_t x)
+{
+ const struct data *dat = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+ /* |x| < 2^-62. */
+ uint32x4_t cmp = vcaltq_f32 (x, dat->tiny_bound);
+ float32x4_t xm = x;
+ /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+ special case handler to fix special lanes later. This is only necessary if
+ fenv exceptions are to be triggered correctly. */
+ if (unlikely (v_any_u32 (cmp)))
+ x = vbslq_f32 (cmp, v_f32 (1), x);
+#endif
+
+ float32x4_t a = vabsq_f32 (x);
+ uint32x4_t a_gt_max = vcgtq_f32 (a, dat->max);
+
+ /* Lookup erf(r) and scale(r) in tables, e.g. set erf(r) to 0 and scale to
+ 2/sqrt(pi), when x reduced to r = 0. */
+ float32x4_t shift = dat->shift;
+ float32x4_t z = vaddq_f32 (a, shift);
+
+ uint32x4_t i
+ = vsubq_u32 (vreinterpretq_u32_f32 (z), vreinterpretq_u32_f32 (shift));
+ i = vminq_u32 (i, v_u32 (512));
+ struct entry e = lookup (i);
+
+ float32x4_t r = vsubq_f32 (z, shift);
+
+ /* erf(x) ~ erf(r) + scale * d * (1 - r * d - 1/3 * d^2). */
+ float32x4_t d = vsubq_f32 (a, r);
+ float32x4_t d2 = vmulq_f32 (d, d);
+ float32x4_t y = vfmaq_f32 (r, dat->third, d);
+ y = vfmaq_f32 (e.erf, e.scale, vfmsq_f32 (d, d2, y));
+
+ /* Solves the |x| = inf case. */
+ y = vbslq_f32 (a_gt_max, v_f32 (1.0f), y);
+
+ /* Copy sign. */
+ y = vbslq_f32 (v_u32 (AbsMask), y, x);
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u32 (cmp)))
+ return vbslq_f32 (cmp, vfmaq_f32 (xm, dat->scale_minus_one, xm), y);
+#endif
+ return y;
+}
+
+PL_SIG (V, F, 1, erf, -4.0, 4.0)
+PL_TEST_ULP (V_NAME_F1 (erf), 1.43)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (erf), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, 3.9375, 40000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (erf), 3.9375, inf, 40000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (erf), 0, inf, 40000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erfinv_25u.c b/contrib/arm-optimized-routines/pl/math/v_erfinv_25u.c
new file mode 100644
index 000000000000..654a7336e85b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_erfinv_25u.c
@@ -0,0 +1,161 @@
+/*
+ * Double-precision inverse error function (AdvSIMD variant).
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_math.h"
+#include "pl_test.h"
+#include "mathlib.h"
+#include "math_config.h"
+#include "pl_sig.h"
+#include "poly_advsimd_f64.h"
+#define V_LOG_INLINE_POLY_ORDER 4
+#include "v_log_inline.h"
+
+const static struct data
+{
+ /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
+ coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
+ of the denominator. P is interleaved P_17 and P_37, similar for Q. P17
+ and Q17 are provided as homogenous vectors as well for when the shortcut
+ can be taken. */
+ double P[8][2], Q[7][2];
+ float64x2_t tailshift;
+ uint8x16_t idx;
+ struct v_log_inline_data log_tbl;
+ float64x2_t P_57[9], Q_57[10], P_17[7], Q_17[6];
+} data = { .P = { { 0x1.007ce8f01b2e8p+4, -0x1.f3596123109edp-7 },
+ { -0x1.6b23cc5c6c6d7p+6, 0x1.60b8fe375999ep-2 },
+ { 0x1.74e5f6ceb3548p+7, -0x1.779bb9bef7c0fp+1 },
+ { -0x1.5200bb15cc6bbp+7, 0x1.786ea384470a2p+3 },
+ { 0x1.05d193233a849p+6, -0x1.6a7c1453c85d3p+4 },
+ { -0x1.148c5474ee5e1p+3, 0x1.31f0fc5613142p+4 },
+ { 0x1.689181bbafd0cp-3, -0x1.5ea6c007d4dbbp+2 },
+ { 0, 0x1.e66f265ce9e5p-3 } },
+ .Q = { { 0x1.d8fb0f913bd7bp+3, -0x1.636b2dcf4edbep-7 },
+ { -0x1.6d7f25a3f1c24p+6, 0x1.0b5411e2acf29p-2 },
+ { 0x1.a450d8e7f4cbbp+7, -0x1.3413109467a0bp+1 },
+ { -0x1.bc3480485857p+7, 0x1.563e8136c554ap+3 },
+ { 0x1.ae6b0c504ee02p+6, -0x1.7b77aab1dcafbp+4 },
+ { -0x1.499dfec1a7f5fp+4, 0x1.8a3e174e05ddcp+4 },
+ { 0x1p+0, -0x1.4075c56404eecp+3 } },
+ .P_57 = { V2 (0x1.b874f9516f7f1p-14), V2 (0x1.5921f2916c1c4p-7),
+ V2 (0x1.145ae7d5b8fa4p-2), V2 (0x1.29d6dcc3b2fb7p+1),
+ V2 (0x1.cabe2209a7985p+2), V2 (0x1.11859f0745c4p+3),
+ V2 (0x1.b7ec7bc6a2ce5p+2), V2 (0x1.d0419e0bb42aep+1),
+ V2 (0x1.c5aa03eef7258p-1) },
+ .Q_57 = { V2 (0x1.b8747e12691f1p-14), V2 (0x1.59240d8ed1e0ap-7),
+ V2 (0x1.14aef2b181e2p-2), V2 (0x1.2cd181bcea52p+1),
+ V2 (0x1.e6e63e0b7aa4cp+2), V2 (0x1.65cf8da94aa3ap+3),
+ V2 (0x1.7e5c787b10a36p+3), V2 (0x1.0626d68b6cea3p+3),
+ V2 (0x1.065c5f193abf6p+2), V2 (0x1p+0) },
+ .P_17 = { V2 (0x1.007ce8f01b2e8p+4), V2 (-0x1.6b23cc5c6c6d7p+6),
+ V2 (0x1.74e5f6ceb3548p+7), V2 (-0x1.5200bb15cc6bbp+7),
+ V2 (0x1.05d193233a849p+6), V2 (-0x1.148c5474ee5e1p+3),
+ V2 (0x1.689181bbafd0cp-3) },
+ .Q_17 = { V2 (0x1.d8fb0f913bd7bp+3), V2 (-0x1.6d7f25a3f1c24p+6),
+ V2 (0x1.a450d8e7f4cbbp+7), V2 (-0x1.bc3480485857p+7),
+ V2 (0x1.ae6b0c504ee02p+6), V2 (-0x1.499dfec1a7f5fp+4) },
+ .tailshift = V2 (-0.87890625),
+ .idx = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ .log_tbl = V_LOG_CONSTANTS };
+
+static inline float64x2_t
+special (float64x2_t x, const struct data *d)
+{
+ /* Note erfinv(inf) should return NaN, and erfinv(1) should return Inf.
+ By using log here, instead of log1p, we return finite values for both
+ these inputs, and values outside [-1, 1]. This is non-compliant, but is an
+ acceptable optimisation at Ofast. To get correct behaviour for all finite
+ values use the log1p_inline helper on -abs(x) - note that erfinv(inf)
+ will still be finite. */
+ float64x2_t t = vnegq_f64 (
+ v_log_inline (vsubq_f64 (v_f64 (1), vabsq_f64 (x)), &d->log_tbl));
+ t = vdivq_f64 (v_f64 (1), vsqrtq_f64 (t));
+ float64x2_t ts = vbslq_f64 (v_u64 (0x7fffffffffffffff), t, x);
+ return vdivq_f64 (v_horner_8_f64 (t, d->P_57),
+ vmulq_f64 (ts, v_horner_9_f64 (t, d->Q_57)));
+}
+
+static inline float64x2_t
+lookup (const double *c, uint8x16_t idx)
+{
+ float64x2_t x = vld1q_f64 (c);
+ return vreinterpretq_f64_u8 (vqtbl1q_u8 (vreinterpretq_u8_f64 (x), idx));
+}
+
+static inline float64x2_t VPCS_ATTR
+notails (float64x2_t x, const struct data *d)
+{
+ /* Shortcut when no input is in a tail region - no need to gather shift or
+ coefficients. */
+ float64x2_t t = vfmaq_f64 (v_f64 (-0.5625), x, x);
+ float64x2_t p = vmulq_f64 (v_horner_6_f64 (t, d->P_17), x);
+ float64x2_t q = vaddq_f64 (d->Q_17[5], t);
+ for (int i = 4; i >= 0; i--)
+ q = vfmaq_f64 (d->Q_17[i], q, t);
+ return vdivq_f64 (p, q);
+}
+
+/* Vector implementation of Blair et al's rational approximation to inverse
+ error function in single-precision. Largest observed error is 24.75 ULP:
+ _ZGVnN2v_erfinv(0x1.fc861d81c2ba8p-1) got 0x1.ea05472686625p+0
+ want 0x1.ea0547268660cp+0. */
+float64x2_t VPCS_ATTR V_NAME_D1 (erfinv) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ /* Calculate inverse error using algorithm described in
+ J. M. Blair, C. A. Edwards, and J. H. Johnson,
+ "Rational Chebyshev approximations for the inverse of the error function",
+ Math. Comp. 30, pp. 827--830 (1976).
+ https://doi.org/10.1090/S0025-5718-1976-0421040-7.
+
+ Algorithm has 3 intervals:
+ - 'Normal' region [-0.75, 0.75]
+ - Tail region [0.75, 0.9375] U [-0.9375, -0.75]
+ - Extreme tail [-1, -0.9375] U [0.9375, 1]
+ Normal and tail are both rational approximation of similar order on
+ shifted input - these are typically performed in parallel using gather
+ loads to obtain correct coefficients depending on interval. */
+ uint64x2_t is_tail = vcagtq_f64 (x, v_f64 (0.75));
+
+ if (unlikely (!v_any_u64 (is_tail)))
+ /* If input is normally distributed in [-1, 1] then likelihood of this is
+ 0.75^2 ~= 0.56. */
+ return notails (x, d);
+
+ uint64x2_t extreme_tail = vcagtq_f64 (x, v_f64 (0.9375));
+
+ uint8x16_t off = vandq_u8 (vreinterpretq_u8_u64 (is_tail), vdupq_n_u8 (8));
+ uint8x16_t idx = vaddq_u8 (d->idx, off);
+
+ float64x2_t t = vbslq_f64 (is_tail, d->tailshift, v_f64 (-0.5625));
+ t = vfmaq_f64 (t, x, x);
+
+ float64x2_t p = lookup (&d->P[7][0], idx);
+ /* Last coeff of q is either 0 or 1 - use mask instead of load. */
+ float64x2_t q = vreinterpretq_f64_u64 (
+ vandq_u64 (is_tail, vreinterpretq_u64_f64 (v_f64 (1))));
+ for (int i = 6; i >= 0; i--)
+ {
+ p = vfmaq_f64 (lookup (&d->P[i][0], idx), p, t);
+ q = vfmaq_f64 (lookup (&d->Q[i][0], idx), q, t);
+ }
+ p = vmulq_f64 (p, x);
+
+ if (unlikely (v_any_u64 (extreme_tail)))
+ return vbslq_f64 (extreme_tail, special (x, d), vdivq_f64 (p, q));
+
+ return vdivq_f64 (p, q);
+}
+
+PL_SIG (V, D, 1, erfinv, -0.99, 0.99)
+PL_TEST_ULP (V_NAME_D1 (erfinv), 24.8)
+/* Test with control lane in each interval. */
+PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000,
+ 0.5)
+PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000,
+ 0.8)
+PL_TEST_SYM_INTERVAL_C (V_NAME_D1 (erfinv), 0, 0x1.fffffffffffffp-1, 100000,
+ 0.95)
diff --git a/contrib/arm-optimized-routines/pl/math/v_erfinvf_5u.c b/contrib/arm-optimized-routines/pl/math/v_erfinvf_5u.c
new file mode 100644
index 000000000000..5a6800b86ae9
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_erfinvf_5u.c
@@ -0,0 +1,163 @@
+/*
+ * Single-precision inverse error function (AdvSIMD variant).
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f32.h"
+#include "v_logf_inline.h"
+
+const static struct data
+{
+ /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
+ coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
+ of the denominator. Coefficients are stored in various interleaved
+ formats to allow for table-based (vector-to-vector) lookup.
+
+ Plo is first two coefficients of P_10 and P_29 interleaved.
+ PQ is third coeff of P_10 and first of Q_29 interleaved.
+ Qhi is second and third coeffs of Q_29 interleaved.
+ P29_3 is a homogenous vector with fourth coeff of P_29.
+
+ P_10 and Q_10 are also stored in homogenous vectors to allow better
+ memory access when no lanes are in a tail region. */
+ float32x4_t Plo, PQ, Qhi, P29_3, tailshift;
+ float32x4_t P_50[6], Q_50[2];
+ float32x4_t P_10[3], Q_10[3];
+ uint8x16_t idxhi, idxlo;
+ struct v_logf_data logf_tbl;
+} data = {
+ .idxlo = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ .idxhi = { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
+ .P29_3 = V4 (0x1.b13626p-2),
+ .tailshift = V4 (-0.87890625),
+ .Plo = { -0x1.a31268p+3, -0x1.fc0252p-4, 0x1.ac9048p+4, 0x1.119d44p+0 },
+ .PQ = { -0x1.293ff6p+3, -0x1.f59ee2p+0, -0x1.8265eep+3, -0x1.69952p-4 },
+ .Qhi = { 0x1.ef5eaep+4, 0x1.c7b7d2p-1, -0x1.12665p+4, -0x1.167d7p+1 },
+ .P_50 = { V4 (0x1.3d8948p-3), V4 (0x1.61f9eap+0), V4 (0x1.61c6bcp-1),
+ V4 (-0x1.20c9f2p+0), V4 (0x1.5c704cp-1), V4 (-0x1.50c6bep-3) },
+ .Q_50 = { V4 (0x1.3d7dacp-3), V4 (0x1.629e5p+0) },
+ .P_10 = { V4 (-0x1.a31268p+3), V4 (0x1.ac9048p+4), V4 (-0x1.293ff6p+3) },
+ .Q_10 = { V4 (-0x1.8265eep+3), V4 (0x1.ef5eaep+4), V4 (-0x1.12665p+4) },
+ .logf_tbl = V_LOGF_CONSTANTS
+};
+
+static inline float32x4_t
+special (float32x4_t x, const struct data *d)
+{
+ /* Note erfinvf(inf) should return NaN, and erfinvf(1) should return Inf.
+ By using log here, instead of log1p, we return finite values for both
+ these inputs, and values outside [-1, 1]. This is non-compliant, but is an
+ acceptable optimisation at Ofast. To get correct behaviour for all finite
+ values use the log1pf_inline helper on -abs(x) - note that erfinvf(inf)
+ will still be finite. */
+ float32x4_t t = vdivq_f32 (
+ v_f32 (1), vsqrtq_f32 (vnegq_f32 (v_logf_inline (
+ vsubq_f32 (v_f32 (1), vabsq_f32 (x)), &d->logf_tbl))));
+ float32x4_t ts = vbslq_f32 (v_u32 (0x7fffffff), t, x);
+ float32x4_t q = vfmaq_f32 (d->Q_50[0], vaddq_f32 (t, d->Q_50[1]), t);
+ return vdivq_f32 (v_horner_5_f32 (t, d->P_50), vmulq_f32 (ts, q));
+}
+
+static inline float32x4_t
+notails (float32x4_t x, const struct data *d)
+{
+ /* Shortcut when no input is in a tail region - no need to gather shift or
+ coefficients. */
+ float32x4_t t = vfmaq_f32 (v_f32 (-0.5625), x, x);
+ float32x4_t q = vaddq_f32 (t, d->Q_10[2]);
+ q = vfmaq_f32 (d->Q_10[1], t, q);
+ q = vfmaq_f32 (d->Q_10[0], t, q);
+
+ return vdivq_f32 (vmulq_f32 (x, v_horner_2_f32 (t, d->P_10)), q);
+}
+
+static inline float32x4_t
+lookup (float32x4_t tbl, uint8x16_t idx)
+{
+ return vreinterpretq_f32_u8 (vqtbl1q_u8 (vreinterpretq_u8_f32 (tbl), idx));
+}
+
+/* Vector implementation of Blair et al's rational approximation to inverse
+ error function in single-precision. Worst-case error is 4.98 ULP, in the
+ tail region:
+ _ZGVnN4v_erfinvf(0x1.f7dbeep-1) got 0x1.b4793p+0
+ want 0x1.b4793ap+0 . */
+float32x4_t VPCS_ATTR V_NAME_F1 (erfinv) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Calculate inverse error using algorithm described in
+ J. M. Blair, C. A. Edwards, and J. H. Johnson,
+ "Rational Chebyshev approximations for the inverse of the error
+ function", Math. Comp. 30, pp. 827--830 (1976).
+ https://doi.org/10.1090/S0025-5718-1976-0421040-7.
+
+ Algorithm has 3 intervals:
+ - 'Normal' region [-0.75, 0.75]
+ - Tail region [0.75, 0.9375] U [-0.9375, -0.75]
+ - Extreme tail [-1, -0.9375] U [0.9375, 1]
+ Normal and tail are both rational approximation of similar order on
+ shifted input - these are typically performed in parallel using gather
+ loads to obtain correct coefficients depending on interval. */
+ uint32x4_t is_tail = vcageq_f32 (x, v_f32 (0.75));
+ uint32x4_t extreme_tail = vcageq_f32 (x, v_f32 (0.9375));
+
+ if (unlikely (!v_any_u32 (is_tail)))
+ /* Shortcut for if all lanes are in [-0.75, 0.75] - can avoid having to
+ gather coefficients. If input is uniform in [-1, 1] then likelihood of
+ this is 0.75^4 ~= 0.31. */
+ return notails (x, d);
+
+ /* Select requisite shift depending on interval: polynomial is evaluated on
+ x * x - shift.
+ Normal shift = 0.5625
+ Tail shift = 0.87890625. */
+ float32x4_t t
+ = vfmaq_f32 (vbslq_f32 (is_tail, d->tailshift, v_f32 (-0.5625)), x, x);
+
+ /* Calculate indexes for tbl: tbl is byte-wise, so:
+ [0, 1, 2, 3, 4, 5, 6, ....] copies the vector
+ Add 4 * i to a group of 4 lanes to copy 32-bit lane i. Each vector stores
+ two pairs of coeffs, so we need two idx vectors - one for each pair. */
+ uint8x16_t off = vandq_u8 (vreinterpretq_u8_u32 (is_tail), vdupq_n_u8 (4));
+ uint8x16_t idx_lo = vaddq_u8 (d->idxlo, off);
+ uint8x16_t idx_hi = vaddq_u8 (d->idxhi, off);
+
+ /* Load the tables. */
+ float32x4_t p_lo = d->Plo;
+ float32x4_t pq = d->PQ;
+ float32x4_t qhi = d->Qhi;
+
+ /* Do the lookup (and calculate p3 by masking non-tail lanes). */
+ float32x4_t p3 = vreinterpretq_f32_u32 (
+ vandq_u32 (is_tail, vreinterpretq_u32_f32 (d->P29_3)));
+ float32x4_t p0 = lookup (p_lo, idx_lo), p1 = lookup (p_lo, idx_hi),
+ p2 = lookup (pq, idx_lo), q0 = lookup (pq, idx_hi),
+ q1 = lookup (qhi, idx_lo), q2 = lookup (qhi, idx_hi);
+
+ float32x4_t p = vfmaq_f32 (p2, p3, t);
+ p = vfmaq_f32 (p1, p, t);
+ p = vfmaq_f32 (p0, p, t);
+ p = vmulq_f32 (x, p);
+
+ float32x4_t q = vfmaq_f32 (q1, vaddq_f32 (q2, t), t);
+ q = vfmaq_f32 (q0, q, t);
+
+ if (unlikely (v_any_u32 (extreme_tail)))
+ /* At least one lane is in the extreme tail - if input is uniform in
+ [-1, 1] the likelihood of this is ~0.23. */
+ return vbslq_f32 (extreme_tail, special (x, d), vdivq_f32 (p, q));
+
+ return vdivq_f32 (p, q);
+}
+
+PL_SIG (V, F, 1, erfinv, -0.99, 0.99)
+PL_TEST_ULP (V_NAME_F1 (erfinv), 4.49)
+/* Test with control lane in each interval. */
+PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000, 0.5)
+PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000, 0.8)
+PL_TEST_SYM_INTERVAL_C (V_NAME_F1 (erfinv), 0, 0x1.fffffep-1, 40000, 0.95)
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp10_2u.c b/contrib/arm-optimized-routines/pl/math/v_exp10_2u.c
new file mode 100644
index 000000000000..29072a60fb3a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_exp10_2u.c
@@ -0,0 +1,144 @@
+/*
+ * Double-precision vector 10^x function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* Value of |x| above which scale overflows without special treatment. */
+#define SpecialBound 306.0 /* floor (log10 (2^1023)) - 1. */
+/* Value of n above which scale overflows even with special treatment. */
+#define ScaleBound 163840.0 /* 1280.0 * N. */
+
+const static struct data
+{
+ float64x2_t poly[4];
+ float64x2_t log10_2, log2_10_hi, log2_10_lo, shift;
+#if !WANT_SIMD_EXCEPT
+ float64x2_t special_bound, scale_thresh;
+#endif
+} data = {
+ /* Coefficients generated using Remez algorithm.
+ rel error: 0x1.5ddf8f28p-54
+ abs error: 0x1.5ed266c8p-54 in [ -log10(2)/256, log10(2)/256 ]
+ maxerr: 1.14432 +0.5 ulp. */
+ .poly = { V2 (0x1.26bb1bbb5524p1), V2 (0x1.53524c73cecdap1),
+ V2 (0x1.047060efb781cp1), V2 (0x1.2bd76040f0d16p0) },
+ .log10_2 = V2 (0x1.a934f0979a371p8), /* N/log2(10). */
+ .log2_10_hi = V2 (0x1.34413509f79ffp-9), /* log2(10)/N. */
+ .log2_10_lo = V2 (-0x1.9dc1da994fd21p-66),
+ .shift = V2 (0x1.8p+52),
+#if !WANT_SIMD_EXCEPT
+ .scale_thresh = V2 (ScaleBound),
+ .special_bound = V2 (SpecialBound),
+#endif
+};
+
+#define N (1 << V_EXP_TABLE_BITS)
+#define IndexMask v_u64 (N - 1)
+
+#if WANT_SIMD_EXCEPT
+
+# define TinyBound v_u64 (0x2000000000000000) /* asuint64 (0x1p-511). */
+# define BigBound v_u64 (0x4070000000000000) /* asuint64 (0x1p8). */
+# define Thres v_u64 (0x2070000000000000) /* BigBound - TinyBound. */
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
+{
+ /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+ routine for special lanes. */
+ return v_call_f64 (exp10, x, y, cmp);
+}
+
+#else
+
+# define SpecialOffset v_u64 (0x6000000000000000) /* 0x1p513. */
+/* SpecialBias1 + SpecialBias1 = asuint(1.0). */
+# define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */
+# define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */
+
+static inline float64x2_t VPCS_ATTR
+special_case (float64x2_t s, float64x2_t y, float64x2_t n,
+ const struct data *d)
+{
+ /* 2^(n/N) may overflow, break it up into s1*s2. */
+ uint64x2_t b = vandq_u64 (vcltzq_f64 (n), SpecialOffset);
+ float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b));
+ float64x2_t s2 = vreinterpretq_f64_u64 (
+ vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), SpecialBias2), b));
+ uint64x2_t cmp = vcagtq_f64 (n, d->scale_thresh);
+ float64x2_t r1 = vmulq_f64 (s1, s1);
+ float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, y, s2), s1);
+ return vbslq_f64 (cmp, r1, r0);
+}
+
+#endif
+
+/* Fast vector implementation of exp10.
+ Maximum measured error is 1.64 ulp.
+ _ZGVnN2v_exp10(0x1.ccd1c9d82cc8cp+0) got 0x1.f8dab6d7fed0cp+5
+ want 0x1.f8dab6d7fed0ap+5. */
+float64x2_t VPCS_ATTR V_NAME_D1 (exp10) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint64x2_t cmp;
+#if WANT_SIMD_EXCEPT
+ /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+ special_case to fix special lanes later. This is only necessary if fenv
+ exceptions are to be triggered correctly. */
+ float64x2_t xm = x;
+ uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
+ cmp = vcgeq_u64 (vsubq_u64 (iax, TinyBound), Thres);
+ if (unlikely (v_any_u64 (cmp)))
+ x = vbslq_f64 (cmp, v_f64 (1), x);
+#else
+ cmp = vcageq_f64 (x, d->special_bound);
+#endif
+
+ /* n = round(x/(log10(2)/N)). */
+ float64x2_t z = vfmaq_f64 (d->shift, x, d->log10_2);
+ uint64x2_t u = vreinterpretq_u64_f64 (z);
+ float64x2_t n = vsubq_f64 (z, d->shift);
+
+ /* r = x - n*log10(2)/N. */
+ float64x2_t r = x;
+ r = vfmsq_f64 (r, d->log2_10_hi, n);
+ r = vfmsq_f64 (r, d->log2_10_lo, n);
+
+ uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TABLE_BITS);
+ uint64x2_t i = vandq_u64 (u, IndexMask);
+
+ /* y = exp10(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4. */
+ float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t p = vfmaq_f64 (d->poly[0], r, d->poly[1]);
+ float64x2_t y = vfmaq_f64 (d->poly[2], r, d->poly[3]);
+ p = vfmaq_f64 (p, y, r2);
+ y = vmulq_f64 (r, p);
+
+ /* s = 2^(n/N). */
+ u = v_lookup_u64 (__v_exp_data, i);
+ float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
+
+ if (unlikely (v_any_u64 (cmp)))
+#if WANT_SIMD_EXCEPT
+ return special_case (xm, vfmaq_f64 (s, y, s), cmp);
+#else
+ return special_case (s, y, n, d);
+#endif
+
+ return vfmaq_f64 (s, y, s);
+}
+
+PL_SIG (S, D, 1, exp10, -9.9, 9.9)
+PL_SIG (V, D, 1, exp10, -9.9, 9.9)
+PL_TEST_ULP (V_NAME_D1 (exp10), 1.15)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (exp10), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp10), 0, SpecialBound, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp10), SpecialBound, ScaleBound, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp10), ScaleBound, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp10f_2u4.c b/contrib/arm-optimized-routines/pl/math/v_exp10f_2u4.c
new file mode 100644
index 000000000000..0e91becfa612
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_exp10f_2u4.c
@@ -0,0 +1,138 @@
+/*
+ * Single-precision vector 10^x function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f32.h"
+
+#define ScaleBound 192.0f
+
+static const struct data
+{
+ float32x4_t poly[5];
+ float32x4_t log10_2_and_inv, shift;
+
+#if !WANT_SIMD_EXCEPT
+ float32x4_t scale_thresh;
+#endif
+} data = {
+ /* Coefficients generated using Remez algorithm with minimisation of relative
+ error.
+ rel error: 0x1.89dafa3p-24
+ abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
+ maxerr: 1.85943 +0.5 ulp. */
+ .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f),
+ V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) },
+ .shift = V4 (0x1.8p23f),
+
+ /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0. */
+ .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 },
+#if !WANT_SIMD_EXCEPT
+ .scale_thresh = V4 (ScaleBound)
+#endif
+};
+
+#define ExponentBias v_u32 (0x3f800000)
+
+#if WANT_SIMD_EXCEPT
+
+# define SpecialBound 38.0f /* rint(log10(2^127)). */
+# define TinyBound v_u32 (0x20000000) /* asuint (0x1p-63). */
+# define BigBound v_u32 (0x42180000) /* asuint (SpecialBound). */
+# define Thres v_u32 (0x22180000) /* BigBound - TinyBound. */
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
+{
+ /* If fenv exceptions are to be triggered correctly, fall back to the scalar
+ routine to special lanes. */
+ return v_call_f32 (exp10f, x, y, cmp);
+}
+
+#else
+
+# define SpecialBound 126.0f /* rint (log2 (2^127 / (1 + sqrt (2)))). */
+# define SpecialOffset v_u32 (0x82000000)
+# define SpecialBias v_u32 (0x7f000000)
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
+ float32x4_t scale, const struct data *d)
+{
+ /* 2^n may overflow, break it up into s1*s2. */
+ uint32x4_t b = vandq_u32 (vclezq_f32 (n), SpecialOffset);
+ float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, SpecialBias));
+ float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
+ uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
+ float32x4_t r2 = vmulq_f32 (s1, s1);
+ float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1);
+ /* Similar to r1 but avoids double rounding in the subnormal range. */
+ float32x4_t r0 = vfmaq_f32 (scale, poly, scale);
+ float32x4_t r = vbslq_f32 (cmp1, r1, r0);
+ return vbslq_f32 (cmp2, r2, r);
+}
+
+#endif
+
+/* Fast vector implementation of single-precision exp10.
+ Algorithm is accurate to 2.36 ULP.
+ _ZGVnN4v_exp10f(0x1.be2b36p+1) got 0x1.7e79c4p+11
+ want 0x1.7e79cp+11. */
+float32x4_t VPCS_ATTR V_NAME_F1 (exp10) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+#if WANT_SIMD_EXCEPT
+ /* asuint(x) - TinyBound >= BigBound - TinyBound. */
+ uint32x4_t cmp = vcgeq_u32 (
+ vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres);
+ float32x4_t xm = x;
+ /* If any lanes are special, mask them with 1 and retain a copy of x to allow
+ special case handler to fix special lanes later. This is only necessary if
+ fenv exceptions are to be triggered correctly. */
+ if (unlikely (v_any_u32 (cmp)))
+ x = v_zerofy_f32 (x, cmp);
+#endif
+
+ /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)),
+ with poly(r) in [1/sqrt(2), sqrt(2)] and
+ x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2]. */
+ float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0);
+ float32x4_t n = vsubq_f32 (z, d->shift);
+ float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1);
+ r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2);
+ uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+
+ float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
+
+#if !WANT_SIMD_EXCEPT
+ uint32x4_t cmp = vcagtq_f32 (n, v_f32 (SpecialBound));
+#endif
+
+ float32x4_t r2 = vmulq_f32 (r, r);
+ float32x4_t poly
+ = vfmaq_f32 (vmulq_f32 (r, d->poly[0]),
+ v_pairwise_poly_3_f32 (r, r2, d->poly + 1), r2);
+
+ if (unlikely (v_any_u32 (cmp)))
+#if WANT_SIMD_EXCEPT
+ return special_case (xm, vfmaq_f32 (scale, poly, scale), cmp);
+#else
+ return special_case (poly, n, e, cmp, scale, d);
+#endif
+
+ return vfmaq_f32 (scale, poly, scale);
+}
+
+PL_SIG (S, F, 1, exp10, -9.9, 9.9)
+PL_SIG (V, F, 1, exp10, -9.9, 9.9)
+PL_TEST_ULP (V_NAME_F1 (exp10), 1.86)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (exp10), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (exp10), 0, SpecialBound, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (exp10), SpecialBound, ScaleBound, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (exp10), ScaleBound, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp2_2u.c b/contrib/arm-optimized-routines/pl/math/v_exp2_2u.c
new file mode 100644
index 000000000000..de59779689f5
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_exp2_2u.c
@@ -0,0 +1,128 @@
+/*
+ * Double-precision vector 2^x function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#define N (1 << V_EXP_TABLE_BITS)
+#define IndexMask (N - 1)
+#define BigBound 1022.0
+#define UOFlowBound 1280.0
+
+static const struct data
+{
+ float64x2_t poly[4];
+ float64x2_t shift, scale_big_bound, scale_uoflow_bound;
+} data = {
+ /* Coefficients are computed using Remez algorithm with
+ minimisation of the absolute error. */
+ .poly = { V2 (0x1.62e42fefa3686p-1), V2 (0x1.ebfbdff82c241p-3),
+ V2 (0x1.c6b09b16de99ap-5), V2 (0x1.3b2abf5571ad8p-7) },
+ .shift = V2 (0x1.8p52 / N),
+ .scale_big_bound = V2 (BigBound),
+ .scale_uoflow_bound = V2 (UOFlowBound),
+};
+
+static inline uint64x2_t
+lookup_sbits (uint64x2_t i)
+{
+ return (uint64x2_t){ __v_exp_data[i[0] & IndexMask],
+ __v_exp_data[i[1] & IndexMask] };
+}
+
+#if WANT_SIMD_EXCEPT
+
+# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */
+# define Thres 0x2080000000000000 /* asuint64(512.0) - TinyBound. */
+
+/* Call scalar exp2 as a fallback. */
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special)
+{
+ return v_call_f64 (exp2, x, y, is_special);
+}
+
+#else
+
+# define SpecialOffset 0x6000000000000000 /* 0x1p513. */
+/* SpecialBias1 + SpecialBias1 = asuint(1.0). */
+# define SpecialBias1 0x7000000000000000 /* 0x1p769. */
+# define SpecialBias2 0x3010000000000000 /* 0x1p-254. */
+
+static inline float64x2_t VPCS_ATTR
+special_case (float64x2_t s, float64x2_t y, float64x2_t n,
+ const struct data *d)
+{
+ /* 2^(n/N) may overflow, break it up into s1*s2. */
+ uint64x2_t b = vandq_u64 (vclezq_f64 (n), v_u64 (SpecialOffset));
+ float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (v_u64 (SpecialBias1), b));
+ float64x2_t s2 = vreinterpretq_f64_u64 (
+ vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), v_u64 (SpecialBias2)), b));
+ uint64x2_t cmp = vcagtq_f64 (n, d->scale_uoflow_bound);
+ float64x2_t r1 = vmulq_f64 (s1, s1);
+ float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, s2, y), s1);
+ return vbslq_f64 (cmp, r1, r0);
+}
+
+#endif
+
+/* Fast vector implementation of exp2.
+ Maximum measured error is 1.65 ulp.
+ _ZGVnN2v_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
+ want 0x1.f8db0d4df721dp-1. */
+VPCS_ATTR
+float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint64x2_t cmp;
+#if WANT_SIMD_EXCEPT
+ uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
+ cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres));
+ /* Mask special lanes and retain a copy of x for passing to special-case
+ handler. */
+ float64x2_t xc = x;
+ x = v_zerofy_f64 (x, cmp);
+#else
+ cmp = vcagtq_f64 (x, d->scale_big_bound);
+#endif
+
+ /* n = round(x/N). */
+ float64x2_t z = vaddq_f64 (d->shift, x);
+ uint64x2_t u = vreinterpretq_u64_f64 (z);
+ float64x2_t n = vsubq_f64 (z, d->shift);
+
+ /* r = x - n/N. */
+ float64x2_t r = vsubq_f64 (x, n);
+
+ /* s = 2^(n/N). */
+ uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TABLE_BITS);
+ u = lookup_sbits (u);
+ float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
+
+ /* y ~ exp2(r) - 1. */
+ float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly);
+ y = vmulq_f64 (r, y);
+
+ if (unlikely (v_any_u64 (cmp)))
+#if !WANT_SIMD_EXCEPT
+ return special_case (s, y, n, d);
+#else
+ return special_case (xc, vfmaq_f64 (s, s, y), cmp);
+#endif
+ return vfmaq_f64 (s, s, y);
+}
+
+PL_SIG (V, D, 1, exp2, -9.9, 9.9)
+PL_TEST_ULP (V_NAME_D1 (exp2), 1.15)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (exp2), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), 0, TinyBound, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), TinyBound, BigBound, 10000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), BigBound, UOFlowBound, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (exp2), UOFlowBound, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp_data.c b/contrib/arm-optimized-routines/pl/math/v_exp_data.c
new file mode 100644
index 000000000000..fd01cf27606f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_exp_data.c
@@ -0,0 +1,55 @@
+/*
+ * Scale values for vector exp and exp2
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* 2^(j/N), j=0..N, N=2^7=128. Copied from math/v_exp_data.c. */
+const uint64_t __v_exp_data[] = {
+ 0x3ff0000000000000, 0x3feff63da9fb3335, 0x3fefec9a3e778061,
+ 0x3fefe315e86e7f85, 0x3fefd9b0d3158574, 0x3fefd06b29ddf6de,
+ 0x3fefc74518759bc8, 0x3fefbe3ecac6f383, 0x3fefb5586cf9890f,
+ 0x3fefac922b7247f7, 0x3fefa3ec32d3d1a2, 0x3fef9b66affed31b,
+ 0x3fef9301d0125b51, 0x3fef8abdc06c31cc, 0x3fef829aaea92de0,
+ 0x3fef7a98c8a58e51, 0x3fef72b83c7d517b, 0x3fef6af9388c8dea,
+ 0x3fef635beb6fcb75, 0x3fef5be084045cd4, 0x3fef54873168b9aa,
+ 0x3fef4d5022fcd91d, 0x3fef463b88628cd6, 0x3fef3f49917ddc96,
+ 0x3fef387a6e756238, 0x3fef31ce4fb2a63f, 0x3fef2b4565e27cdd,
+ 0x3fef24dfe1f56381, 0x3fef1e9df51fdee1, 0x3fef187fd0dad990,
+ 0x3fef1285a6e4030b, 0x3fef0cafa93e2f56, 0x3fef06fe0a31b715,
+ 0x3fef0170fc4cd831, 0x3feefc08b26416ff, 0x3feef6c55f929ff1,
+ 0x3feef1a7373aa9cb, 0x3feeecae6d05d866, 0x3feee7db34e59ff7,
+ 0x3feee32dc313a8e5, 0x3feedea64c123422, 0x3feeda4504ac801c,
+ 0x3feed60a21f72e2a, 0x3feed1f5d950a897, 0x3feece086061892d,
+ 0x3feeca41ed1d0057, 0x3feec6a2b5c13cd0, 0x3feec32af0d7d3de,
+ 0x3feebfdad5362a27, 0x3feebcb299fddd0d, 0x3feeb9b2769d2ca7,
+ 0x3feeb6daa2cf6642, 0x3feeb42b569d4f82, 0x3feeb1a4ca5d920f,
+ 0x3feeaf4736b527da, 0x3feead12d497c7fd, 0x3feeab07dd485429,
+ 0x3feea9268a5946b7, 0x3feea76f15ad2148, 0x3feea5e1b976dc09,
+ 0x3feea47eb03a5585, 0x3feea34634ccc320, 0x3feea23882552225,
+ 0x3feea155d44ca973, 0x3feea09e667f3bcd, 0x3feea012750bdabf,
+ 0x3fee9fb23c651a2f, 0x3fee9f7df9519484, 0x3fee9f75e8ec5f74,
+ 0x3fee9f9a48a58174, 0x3fee9feb564267c9, 0x3feea0694fde5d3f,
+ 0x3feea11473eb0187, 0x3feea1ed0130c132, 0x3feea2f336cf4e62,
+ 0x3feea427543e1a12, 0x3feea589994cce13, 0x3feea71a4623c7ad,
+ 0x3feea8d99b4492ed, 0x3feeaac7d98a6699, 0x3feeace5422aa0db,
+ 0x3feeaf3216b5448c, 0x3feeb1ae99157736, 0x3feeb45b0b91ffc6,
+ 0x3feeb737b0cdc5e5, 0x3feeba44cbc8520f, 0x3feebd829fde4e50,
+ 0x3feec0f170ca07ba, 0x3feec49182a3f090, 0x3feec86319e32323,
+ 0x3feecc667b5de565, 0x3feed09bec4a2d33, 0x3feed503b23e255d,
+ 0x3feed99e1330b358, 0x3feede6b5579fdbf, 0x3feee36bbfd3f37a,
+ 0x3feee89f995ad3ad, 0x3feeee07298db666, 0x3feef3a2b84f15fb,
+ 0x3feef9728de5593a, 0x3feeff76f2fb5e47, 0x3fef05b030a1064a,
+ 0x3fef0c1e904bc1d2, 0x3fef12c25bd71e09, 0x3fef199bdd85529c,
+ 0x3fef20ab5fffd07a, 0x3fef27f12e57d14b, 0x3fef2f6d9406e7b5,
+ 0x3fef3720dcef9069, 0x3fef3f0b555dc3fa, 0x3fef472d4a07897c,
+ 0x3fef4f87080d89f2, 0x3fef5818dcfba487, 0x3fef60e316c98398,
+ 0x3fef69e603db3285, 0x3fef7321f301b460, 0x3fef7c97337b9b5f,
+ 0x3fef864614f5a129, 0x3fef902ee78b3ff6, 0x3fef9a51fbc74c83,
+ 0x3fefa4afa2a490da, 0x3fefaf482d8e67f1, 0x3fefba1bee615a27,
+ 0x3fefc52b376bba97, 0x3fefd0765b6e4540, 0x3fefdbfdad9cbe14,
+ 0x3fefe7c1819e90d8, 0x3feff3c22b8f71f1,
+};
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp_tail.h b/contrib/arm-optimized-routines/pl/math/v_exp_tail.h
new file mode 100644
index 000000000000..903f1fd95717
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_exp_tail.h
@@ -0,0 +1,21 @@
+/*
+ * Constants for double-precision e^(x+tail) vector function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define C1_scal 0x1.fffffffffffd4p-2
+#define C2_scal 0x1.5555571d6b68cp-3
+#define C3_scal 0x1.5555576a59599p-5
+#define InvLn2_scal 0x1.71547652b82fep8 /* N/ln2. */
+#define Ln2hi_scal 0x1.62e42fefa39efp-9 /* ln2/N. */
+#define Ln2lo_scal 0x1.abc9e3b39803f3p-64
+
+#define N (1 << V_EXP_TAIL_TABLE_BITS)
+#define Tab __v_exp_tail_data
+#define IndexMask_scal (N - 1)
+#define Shift_scal 0x1.8p+52
+#define Thres_scal 704.0
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp_tail_data.c b/contrib/arm-optimized-routines/pl/math/v_exp_tail_data.c
new file mode 100644
index 000000000000..989dd41d949a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_exp_tail_data.c
@@ -0,0 +1,98 @@
+/*
+ * Lookup table for double-precision e^x vector function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+/* 2^(j/N), j=0..N, N=2^8=256. Copied from math/v_exp_data.c. */
+const uint64_t __v_exp_tail_data[] = {
+ 0x3ff0000000000000, 0x3feffb1afa5abcbf, 0x3feff63da9fb3335,
+ 0x3feff168143b0281, 0x3fefec9a3e778061, 0x3fefe7d42e11bbcc,
+ 0x3fefe315e86e7f85, 0x3fefde5f72f654b1, 0x3fefd9b0d3158574,
+ 0x3fefd50a0e3c1f89, 0x3fefd06b29ddf6de, 0x3fefcbd42b72a836,
+ 0x3fefc74518759bc8, 0x3fefc2bdf66607e0, 0x3fefbe3ecac6f383,
+ 0x3fefb9c79b1f3919, 0x3fefb5586cf9890f, 0x3fefb0f145e46c85,
+ 0x3fefac922b7247f7, 0x3fefa83b23395dec, 0x3fefa3ec32d3d1a2,
+ 0x3fef9fa55fdfa9c5, 0x3fef9b66affed31b, 0x3fef973028d7233e,
+ 0x3fef9301d0125b51, 0x3fef8edbab5e2ab6, 0x3fef8abdc06c31cc,
+ 0x3fef86a814f204ab, 0x3fef829aaea92de0, 0x3fef7e95934f312e,
+ 0x3fef7a98c8a58e51, 0x3fef76a45471c3c2, 0x3fef72b83c7d517b,
+ 0x3fef6ed48695bbc0, 0x3fef6af9388c8dea, 0x3fef672658375d2f,
+ 0x3fef635beb6fcb75, 0x3fef5f99f8138a1c, 0x3fef5be084045cd4,
+ 0x3fef582f95281c6b, 0x3fef54873168b9aa, 0x3fef50e75eb44027,
+ 0x3fef4d5022fcd91d, 0x3fef49c18438ce4d, 0x3fef463b88628cd6,
+ 0x3fef42be3578a819, 0x3fef3f49917ddc96, 0x3fef3bdda27912d1,
+ 0x3fef387a6e756238, 0x3fef351ffb82140a, 0x3fef31ce4fb2a63f,
+ 0x3fef2e85711ece75, 0x3fef2b4565e27cdd, 0x3fef280e341ddf29,
+ 0x3fef24dfe1f56381, 0x3fef21ba7591bb70, 0x3fef1e9df51fdee1,
+ 0x3fef1b8a66d10f13, 0x3fef187fd0dad990, 0x3fef157e39771b2f,
+ 0x3fef1285a6e4030b, 0x3fef0f961f641589, 0x3fef0cafa93e2f56,
+ 0x3fef09d24abd886b, 0x3fef06fe0a31b715, 0x3fef0432edeeb2fd,
+ 0x3fef0170fc4cd831, 0x3feefeb83ba8ea32, 0x3feefc08b26416ff,
+ 0x3feef96266e3fa2d, 0x3feef6c55f929ff1, 0x3feef431a2de883b,
+ 0x3feef1a7373aa9cb, 0x3feeef26231e754a, 0x3feeecae6d05d866,
+ 0x3feeea401b7140ef, 0x3feee7db34e59ff7, 0x3feee57fbfec6cf4,
+ 0x3feee32dc313a8e5, 0x3feee0e544ede173, 0x3feedea64c123422,
+ 0x3feedc70df1c5175, 0x3feeda4504ac801c, 0x3feed822c367a024,
+ 0x3feed60a21f72e2a, 0x3feed3fb2709468a, 0x3feed1f5d950a897,
+ 0x3feecffa3f84b9d4, 0x3feece086061892d, 0x3feecc2042a7d232,
+ 0x3feeca41ed1d0057, 0x3feec86d668b3237, 0x3feec6a2b5c13cd0,
+ 0x3feec4e1e192aed2, 0x3feec32af0d7d3de, 0x3feec17dea6db7d7,
+ 0x3feebfdad5362a27, 0x3feebe41b817c114, 0x3feebcb299fddd0d,
+ 0x3feebb2d81d8abff, 0x3feeb9b2769d2ca7, 0x3feeb8417f4531ee,
+ 0x3feeb6daa2cf6642, 0x3feeb57de83f4eef, 0x3feeb42b569d4f82,
+ 0x3feeb2e2f4f6ad27, 0x3feeb1a4ca5d920f, 0x3feeb070dde910d2,
+ 0x3feeaf4736b527da, 0x3feeae27dbe2c4cf, 0x3feead12d497c7fd,
+ 0x3feeac0827ff07cc, 0x3feeab07dd485429, 0x3feeaa11fba87a03,
+ 0x3feea9268a5946b7, 0x3feea84590998b93, 0x3feea76f15ad2148,
+ 0x3feea6a320dceb71, 0x3feea5e1b976dc09, 0x3feea52ae6cdf6f4,
+ 0x3feea47eb03a5585, 0x3feea3dd1d1929fd, 0x3feea34634ccc320,
+ 0x3feea2b9febc8fb7, 0x3feea23882552225, 0x3feea1c1c70833f6,
+ 0x3feea155d44ca973, 0x3feea0f4b19e9538, 0x3feea09e667f3bcd,
+ 0x3feea052fa75173e, 0x3feea012750bdabf, 0x3fee9fdcddd47645,
+ 0x3fee9fb23c651a2f, 0x3fee9f9298593ae5, 0x3fee9f7df9519484,
+ 0x3fee9f7466f42e87, 0x3fee9f75e8ec5f74, 0x3fee9f8286ead08a,
+ 0x3fee9f9a48a58174, 0x3fee9fbd35d7cbfd, 0x3fee9feb564267c9,
+ 0x3feea024b1ab6e09, 0x3feea0694fde5d3f, 0x3feea0b938ac1cf6,
+ 0x3feea11473eb0187, 0x3feea17b0976cfdb, 0x3feea1ed0130c132,
+ 0x3feea26a62ff86f0, 0x3feea2f336cf4e62, 0x3feea3878491c491,
+ 0x3feea427543e1a12, 0x3feea4d2add106d9, 0x3feea589994cce13,
+ 0x3feea64c1eb941f7, 0x3feea71a4623c7ad, 0x3feea7f4179f5b21,
+ 0x3feea8d99b4492ed, 0x3feea9cad931a436, 0x3feeaac7d98a6699,
+ 0x3feeabd0a478580f, 0x3feeace5422aa0db, 0x3feeae05bad61778,
+ 0x3feeaf3216b5448c, 0x3feeb06a5e0866d9, 0x3feeb1ae99157736,
+ 0x3feeb2fed0282c8a, 0x3feeb45b0b91ffc6, 0x3feeb5c353aa2fe2,
+ 0x3feeb737b0cdc5e5, 0x3feeb8b82b5f98e5, 0x3feeba44cbc8520f,
+ 0x3feebbdd9a7670b3, 0x3feebd829fde4e50, 0x3feebf33e47a22a2,
+ 0x3feec0f170ca07ba, 0x3feec2bb4d53fe0d, 0x3feec49182a3f090,
+ 0x3feec674194bb8d5, 0x3feec86319e32323, 0x3feeca5e8d07f29e,
+ 0x3feecc667b5de565, 0x3feece7aed8eb8bb, 0x3feed09bec4a2d33,
+ 0x3feed2c980460ad8, 0x3feed503b23e255d, 0x3feed74a8af46052,
+ 0x3feed99e1330b358, 0x3feedbfe53c12e59, 0x3feede6b5579fdbf,
+ 0x3feee0e521356eba, 0x3feee36bbfd3f37a, 0x3feee5ff3a3c2774,
+ 0x3feee89f995ad3ad, 0x3feeeb4ce622f2ff, 0x3feeee07298db666,
+ 0x3feef0ce6c9a8952, 0x3feef3a2b84f15fb, 0x3feef68415b749b1,
+ 0x3feef9728de5593a, 0x3feefc6e29f1c52a, 0x3feeff76f2fb5e47,
+ 0x3fef028cf22749e4, 0x3fef05b030a1064a, 0x3fef08e0b79a6f1f,
+ 0x3fef0c1e904bc1d2, 0x3fef0f69c3f3a207, 0x3fef12c25bd71e09,
+ 0x3fef16286141b33d, 0x3fef199bdd85529c, 0x3fef1d1cd9fa652c,
+ 0x3fef20ab5fffd07a, 0x3fef244778fafb22, 0x3fef27f12e57d14b,
+ 0x3fef2ba88988c933, 0x3fef2f6d9406e7b5, 0x3fef33405751c4db,
+ 0x3fef3720dcef9069, 0x3fef3b0f2e6d1675, 0x3fef3f0b555dc3fa,
+ 0x3fef43155b5bab74, 0x3fef472d4a07897c, 0x3fef4b532b08c968,
+ 0x3fef4f87080d89f2, 0x3fef53c8eacaa1d6, 0x3fef5818dcfba487,
+ 0x3fef5c76e862e6d3, 0x3fef60e316c98398, 0x3fef655d71ff6075,
+ 0x3fef69e603db3285, 0x3fef6e7cd63a8315, 0x3fef7321f301b460,
+ 0x3fef77d5641c0658, 0x3fef7c97337b9b5f, 0x3fef81676b197d17,
+ 0x3fef864614f5a129, 0x3fef8b333b16ee12, 0x3fef902ee78b3ff6,
+ 0x3fef953924676d76, 0x3fef9a51fbc74c83, 0x3fef9f7977cdb740,
+ 0x3fefa4afa2a490da, 0x3fefa9f4867cca6e, 0x3fefaf482d8e67f1,
+ 0x3fefb4aaa2188510, 0x3fefba1bee615a27, 0x3fefbf9c1cb6412a,
+ 0x3fefc52b376bba97, 0x3fefcac948dd7274, 0x3fefd0765b6e4540,
+ 0x3fefd632798844f8, 0x3fefdbfdad9cbe14, 0x3fefe1d802243c89,
+ 0x3fefe7c1819e90d8, 0x3fefedba3692d514, 0x3feff3c22b8f71f1,
+ 0x3feff9d96b2a23d9,
+};
diff --git a/contrib/arm-optimized-routines/pl/math/v_exp_tail_inline.h b/contrib/arm-optimized-routines/pl/math/v_exp_tail_inline.h
new file mode 100644
index 000000000000..76ecc6b0a33a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_exp_tail_inline.h
@@ -0,0 +1,102 @@
+/*
+ * Double-precision vector e^(x+tail) function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#ifndef PL_MATH_V_EXP_TAIL_INLINE_H
+#define PL_MATH_V_EXP_TAIL_INLINE_H
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+#ifndef WANT_V_EXP_TAIL_SPECIALCASE
+#error \
+ "Cannot use v_exp_tail_inline.h without specifying whether you need the special case computation."
+#endif
+
+#define N (1 << V_EXP_TAIL_TABLE_BITS)
+
+static const struct data
+{
+ float64x2_t poly[4];
+#if WANT_V_EXP_TAIL_SPECIALCASE
+ float64x2_t big_bound, huge_bound;
+#endif
+ float64x2_t shift, invln2, ln2_hi, ln2_lo;
+} data = {
+#if WANT_V_EXP_TAIL_SPECIALCASE
+ .big_bound = V2 (704.0),
+ .huge_bound = V2 (1280.0 * N),
+#endif
+ .shift = V2 (0x1.8p52),
+ .invln2 = V2 (0x1.71547652b82fep8), /* N/ln2. */
+ .ln2_hi = V2 (0x1.62e42fefa39efp-9), /* ln2/N. */
+ .ln2_lo = V2 (0x1.abc9e3b39803f3p-64),
+ .poly = { V2 (1.0), V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6b68cp-3),
+ V2 (0x1.5555576a59599p-5) },
+};
+
+static inline uint64x2_t
+lookup_sbits (uint64x2_t i)
+{
+ return (uint64x2_t){__v_exp_tail_data[i[0]], __v_exp_tail_data[i[1]]};
+}
+
+#if WANT_V_EXP_TAIL_SPECIALCASE
+#define SpecialOffset v_u64 (0x6000000000000000) /* 0x1p513. */
+/* The following 2 bias when combined form the exponent bias:
+ SpecialBias1 - SpecialBias2 = asuint64(1.0). */
+#define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */
+#define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */
+static float64x2_t VPCS_ATTR
+v_exp_tail_special_case (float64x2_t s, float64x2_t y, float64x2_t n,
+ const struct data *d)
+{
+ /* 2^(n/N) may overflow, break it up into s1*s2. */
+ uint64x2_t b = vandq_u64 (vclezq_f64 (n), SpecialOffset);
+ float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b));
+ float64x2_t s2 = vreinterpretq_f64_u64 (
+ vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), SpecialBias2), b));
+ uint64x2_t oflow = vcagtq_f64 (n, d->huge_bound);
+ float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, y, s2), s1);
+ float64x2_t r1 = vmulq_f64 (s1, s1);
+ return vbslq_f64 (oflow, r1, r0);
+}
+#endif
+
+static inline float64x2_t VPCS_ATTR
+v_exp_tail_inline (float64x2_t x, float64x2_t xtail)
+{
+ const struct data *d = ptr_barrier (&data);
+#if WANT_V_EXP_TAIL_SPECIALCASE
+ uint64x2_t special = vcgtq_f64 (vabsq_f64 (x), d->big_bound);
+#endif
+ /* n = round(x/(ln2/N)). */
+ float64x2_t z = vfmaq_f64 (d->shift, x, d->invln2);
+ uint64x2_t u = vreinterpretq_u64_f64 (z);
+ float64x2_t n = vsubq_f64 (z, d->shift);
+
+ /* r = x - n*ln2/N. */
+ float64x2_t r = x;
+ r = vfmsq_f64 (r, d->ln2_hi, n);
+ r = vfmsq_f64 (r, d->ln2_lo, n);
+
+ uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TAIL_TABLE_BITS);
+ uint64x2_t i = vandq_u64 (u, v_u64 (N - 1));
+
+ /* y = tail + exp(r) - 1 ~= r + C1 r^2 + C2 r^3 + C3 r^4, using Horner. */
+ float64x2_t y = v_horner_3_f64 (r, d->poly);
+ y = vfmaq_f64 (xtail, y, r);
+
+ /* s = 2^(n/N). */
+ u = lookup_sbits (i);
+ float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
+
+#if WANT_V_EXP_TAIL_SPECIALCASE
+ if (unlikely (v_any_u64 (special)))
+ return v_exp_tail_special_case (s, y, n, d);
+#endif
+ return vfmaq_f64 (s, y, s);
+}
+#endif // PL_MATH_V_EXP_TAIL_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_expf_inline.h b/contrib/arm-optimized-routines/pl/math/v_expf_inline.h
new file mode 100644
index 000000000000..166683726b4d
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_expf_inline.h
@@ -0,0 +1,60 @@
+/*
+ * Helper for single-precision routines which calculate exp(x) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_V_EXPF_INLINE_H
+#define PL_MATH_V_EXPF_INLINE_H
+
+#include "v_math.h"
+
+struct v_expf_data
+{
+ float32x4_t poly[5];
+ float32x4_t shift, invln2_and_ln2;
+};
+
+/* maxerr: 1.45358 +0.5 ulp. */
+#define V_EXPF_DATA \
+ { \
+ .poly = { V4 (0x1.0e4020p-7f), V4 (0x1.573e2ep-5f), V4 (0x1.555e66p-3f), \
+ V4 (0x1.fffdb6p-2f), V4 (0x1.ffffecp-1f) }, \
+ .shift = V4 (0x1.8p23f), \
+ .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \
+ }
+
+#define ExponentBias v_u32 (0x3f800000) /* asuint(1.0f). */
+#define C(i) d->poly[i]
+
+static inline float32x4_t
+v_expf_inline (float32x4_t x, const struct v_expf_data *d)
+{
+ /* Helper routine for calculating exp(x).
+ Copied from v_expf.c, with all special-case handling removed - the
+ calling routine should handle special values if required. */
+
+ /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
+ x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
+ float32x4_t n, r, z;
+ z = vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0);
+ n = vsubq_f32 (z, d->shift);
+ r = vfmsq_laneq_f32 (x, n, d->invln2_and_ln2, 1);
+ r = vfmsq_laneq_f32 (r, n, d->invln2_and_ln2, 2);
+ uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23);
+ float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias));
+
+ /* Custom order-4 Estrin avoids building high order monomial. */
+ float32x4_t r2 = vmulq_f32 (r, r);
+ float32x4_t p, q, poly;
+ p = vfmaq_f32 (C (1), C (0), r);
+ q = vfmaq_f32 (C (3), C (2), r);
+ q = vfmaq_f32 (q, p, r2);
+ p = vmulq_f32 (C (4), r);
+ poly = vfmaq_f32 (p, q, r2);
+ return vfmaq_f32 (scale, poly, scale);
+}
+
+#endif // PL_MATH_V_EXPF_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_expm1_2u5.c b/contrib/arm-optimized-routines/pl/math/v_expm1_2u5.c
new file mode 100644
index 000000000000..dd255472cec0
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_expm1_2u5.c
@@ -0,0 +1,118 @@
+/*
+ * Double-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t poly[11];
+ float64x2_t invln2, ln2, shift;
+ int64x2_t exponent_bias;
+#if WANT_SIMD_EXCEPT
+ uint64x2_t thresh, tiny_bound;
+#else
+ float64x2_t oflow_bound;
+#endif
+} data = {
+ /* Generated using fpminimax, with degree=12 in [log(2)/2, log(2)/2]. */
+ .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
+ V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
+ V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
+ V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
+ V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) },
+ .invln2 = V2 (0x1.71547652b82fep0),
+ .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 },
+ .shift = V2 (0x1.8p52),
+ .exponent_bias = V2 (0x3ff0000000000000),
+#if WANT_SIMD_EXCEPT
+ /* asuint64(oflow_bound) - asuint64(0x1p-51), shifted left by 1 for abs
+ compare. */
+ .thresh = V2 (0x78c56fa6d34b552),
+ /* asuint64(0x1p-51) << 1. */
+ .tiny_bound = V2 (0x3cc0000000000000 << 1),
+#else
+ /* Value above which expm1(x) should overflow. Absolute value of the
+ underflow bound is greater than this, so it catches both cases - there is
+ a small window where fallbacks are triggered unnecessarily. */
+ .oflow_bound = V2 (0x1.62b7d369a5aa9p+9),
+#endif
+};
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (expm1, x, y, special);
+}
+
+/* Double-precision vector exp(x) - 1 function.
+ The maximum error observed error is 2.18 ULP:
+ _ZGVnN2v_expm1 (0x1.634ba0c237d7bp-2) got 0x1.a8b9ea8d66e22p-2
+ want 0x1.a8b9ea8d66e2p-2. */
+float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered correctly, fall back to scalar for
+ |x| < 2^-51, |x| > oflow_bound, Inf & NaN. Add ix to itself for
+ shift-left by 1, and compare with thresh which was left-shifted offline -
+ this is effectively an absolute compare. */
+ uint64x2_t special
+ = vcgeq_u64 (vsubq_u64 (vaddq_u64 (ix, ix), d->tiny_bound), d->thresh);
+ if (unlikely (v_any_u64 (special)))
+ x = v_zerofy_f64 (x, special);
+#else
+ /* Large input, NaNs and Infs. */
+ uint64x2_t special = vcageq_f64 (x, d->oflow_bound);
+#endif
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift);
+ int64x2_t i = vcvtq_s64_f64 (n);
+ float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0);
+ f = vfmsq_laneq_f64 (f, n, d->ln2, 1);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t f4 = vmulq_f64 (f2, f2);
+ float64x2_t f8 = vmulq_f64 (f4, f4);
+ float64x2_t p = vfmaq_f64 (f, f2, v_estrin_10_f64 (f, f2, f4, f8, d->poly));
+
+ /* Assemble the result.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^i. */
+ int64x2_t u = vaddq_s64 (vshlq_n_s64 (i, 52), d->exponent_bias);
+ float64x2_t t = vreinterpretq_f64_s64 (u);
+
+ if (unlikely (v_any_u64 (special)))
+ return special_case (vreinterpretq_f64_u64 (ix),
+ vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t),
+ special);
+
+ /* expm1(x) ~= p * t + (t - 1). */
+ return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
+}
+
+PL_SIG (V, D, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (V_NAME_D1 (expm1), 1.68)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (expm1), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0, 0x1p-51, 1000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1p-51, 0x1.62b7d369a5aa9p+9, 100000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (expm1), 0x1.62b7d369a5aa9p+9, inf, 100)
diff --git a/contrib/arm-optimized-routines/pl/math/v_expm1f_1u6.c b/contrib/arm-optimized-routines/pl/math/v_expm1f_1u6.c
new file mode 100644
index 000000000000..6b282d0cc00f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_expm1f_1u6.c
@@ -0,0 +1,117 @@
+/*
+ * Single-precision vector exp(x) - 1 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float32x4_t poly[5];
+ float32x4_t invln2_and_ln2;
+ float32x4_t shift;
+ int32x4_t exponent_bias;
+#if WANT_SIMD_EXCEPT
+ uint32x4_t thresh;
+#else
+ float32x4_t oflow_bound;
+#endif
+} data = {
+ /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */
+ .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5),
+ V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) },
+ /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */
+ .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 },
+ .shift = V4 (0x1.8p23f),
+ .exponent_bias = V4 (0x3f800000),
+#if !WANT_SIMD_EXCEPT
+ /* Value above which expm1f(x) should overflow. Absolute value of the
+ underflow bound is greater than this, so it catches both cases - there is
+ a small window where fallbacks are triggered unnecessarily. */
+ .oflow_bound = V4 (0x1.5ebc4p+6),
+#else
+ /* asuint(oflow_bound) - asuint(0x1p-23), shifted left by 1 for absolute
+ compare. */
+ .thresh = V4 (0x1d5ebc40),
+#endif
+};
+
+/* asuint(0x1p-23), shifted by 1 for abs compare. */
+#define TinyBound v_u32 (0x34000000 << 1)
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (expm1f, x, y, special);
+}
+
+/* Single-precision vector exp(x) - 1 function.
+ The maximum error is 1.51 ULP:
+ _ZGVnN4v_expm1f (0x1.8baa96p-2) got 0x1.e2fb9p-2
+ want 0x1.e2fb94p-2. */
+float32x4_t VPCS_ATTR V_NAME_F1 (expm1) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered correctly, fall back to scalar for
+ |x| < 2^-23, |x| > oflow_bound, Inf & NaN. Add ix to itself for
+ shift-left by 1, and compare with thresh which was left-shifted offline -
+ this is effectively an absolute compare. */
+ uint32x4_t special
+ = vcgeq_u32 (vsubq_u32 (vaddq_u32 (ix, ix), TinyBound), d->thresh);
+ if (unlikely (v_any_u32 (special)))
+ x = v_zerofy_f32 (x, special);
+#else
+ /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */
+ uint32x4_t special = vcagtq_f32 (x, d->oflow_bound);
+#endif
+
+ /* Reduce argument to smaller range:
+ Let i = round(x / ln2)
+ and f = x - i * ln2, then f is in [-ln2/2, ln2/2].
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where 2^i is exact because i is an integer. */
+ float32x4_t j = vsubq_f32 (
+ vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
+ int32x4_t i = vcvtq_s32_f32 (j);
+ float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
+ f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
+
+ /* Approximate expm1(f) using polynomial.
+ Taylor expansion for expm1(x) has the form:
+ x + ax^2 + bx^3 + cx^4 ....
+ So we calculate the polynomial P(f) = a + bf + cf^2 + ...
+ and assemble the approximation expm1(f) ~= f + f^2 * P(f). */
+ float32x4_t p = v_horner_4_f32 (f, d->poly);
+ p = vfmaq_f32 (f, vmulq_f32 (f, f), p);
+
+ /* Assemble the result.
+ expm1(x) ~= 2^i * (p + 1) - 1
+ Let t = 2^i. */
+ int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
+ float32x4_t t = vreinterpretq_f32_s32 (u);
+
+ if (unlikely (v_any_u32 (special)))
+ return special_case (vreinterpretq_f32_u32 (ix),
+ vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t),
+ special);
+
+ /* expm1(x) ~= p * t + (t - 1). */
+ return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
+}
+
+PL_SIG (V, F, 1, expm1, -9.9, 9.9)
+PL_TEST_ULP (V_NAME_F1 (expm1), 1.02)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (expm1), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (expm1), 0, 0x1p-23, 1000)
+PL_TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, 0x1.5ebc4p+6, 1000000)
+PL_TEST_INTERVAL (V_NAME_F1 (expm1), -0x1p-23, -0x1.9bbabcp+6, 1000000)
+PL_TEST_INTERVAL (V_NAME_F1 (expm1), 0x1.5ebc4p+6, inf, 1000)
+PL_TEST_INTERVAL (V_NAME_F1 (expm1), -0x1.9bbabcp+6, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_expm1f_inline.h b/contrib/arm-optimized-routines/pl/math/v_expm1f_inline.h
new file mode 100644
index 000000000000..6ae94c452de2
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_expm1f_inline.h
@@ -0,0 +1,63 @@
+/*
+ * Helper for single-precision routines which calculate exp(x) - 1 and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_V_EXPM1F_INLINE_H
+#define PL_MATH_V_EXPM1F_INLINE_H
+
+#include "v_math.h"
+#include "math_config.h"
+#include "poly_advsimd_f32.h"
+
+struct v_expm1f_data
+{
+ float32x4_t poly[5];
+ float32x4_t invln2_and_ln2, shift;
+ int32x4_t exponent_bias;
+};
+
+/* Coefficients generated using fpminimax with degree=5 in [-log(2)/2,
+ log(2)/2]. Exponent bias is asuint(1.0f).
+ invln2_and_ln2 Stores constants: invln2, ln2_lo, ln2_hi, 0. */
+#define V_EXPM1F_DATA \
+ { \
+ .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), \
+ V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, \
+ .shift = V4 (0x1.8p23f), .exponent_bias = V4 (0x3f800000), \
+ .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, \
+ }
+
+static inline float32x4_t
+expm1f_inline (float32x4_t x, const struct v_expm1f_data *d)
+{
+ /* Helper routine for calculating exp(x) - 1.
+ Copied from v_expm1f_1u6.c, with all special-case handling removed - the
+ calling routine should handle special values if required. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ float32x4_t j = vsubq_f32 (
+ vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift);
+ int32x4_t i = vcvtq_s32_f32 (j);
+ float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1);
+ f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2);
+
+ /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f).
+ Uses Estrin scheme, where the main _ZGVnN4v_expm1f routine uses
+ Horner. */
+ float32x4_t f2 = vmulq_f32 (f, f);
+ float32x4_t f4 = vmulq_f32 (f2, f2);
+ float32x4_t p = v_estrin_4_f32 (f, f2, f4, d->poly);
+ p = vfmaq_f32 (f, f2, p);
+
+ /* t = 2^i. */
+ int32x4_t u = vaddq_s32 (vshlq_n_s32 (i, 23), d->exponent_bias);
+ float32x4_t t = vreinterpretq_f32_s32 (u);
+ /* expm1(x) ~= p * t + (t - 1). */
+ return vfmaq_f32 (vsubq_f32 (t, v_f32 (1.0f)), p, t);
+}
+
+#endif // PL_MATH_V_EXPM1F_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_hypot_1u5.c b/contrib/arm-optimized-routines/pl/math/v_hypot_1u5.c
new file mode 100644
index 000000000000..d4ff7be89a8f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_hypot_1u5.c
@@ -0,0 +1,95 @@
+/*
+ * Double-precision vector hypot(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if WANT_SIMD_EXCEPT
+static const struct data
+{
+ uint64x2_t tiny_bound, thres;
+} data = {
+ .tiny_bound = V2 (0x2000000000000000), /* asuint (0x1p-511). */
+ .thres = V2 (0x3fe0000000000000), /* asuint (0x1p511) - tiny_bound. */
+};
+#else
+static const struct data
+{
+ uint64x2_t tiny_bound;
+ uint32x4_t thres;
+} data = {
+ .tiny_bound = V2 (0x0360000000000000), /* asuint (0x1p-969). */
+ .thres = V4 (0x7c900000), /* asuint (inf) - tiny_bound. */
+};
+#endif
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, float64x2_t sqsum,
+ uint32x2_t special)
+{
+ return v_call2_f64 (hypot, x, y, vsqrtq_f64 (sqsum), vmovl_u32 (special));
+}
+
+/* Vector implementation of double-precision hypot.
+ Maximum error observed is 1.21 ULP:
+ _ZGVnN2vv_hypot (0x1.6a1b193ff85b5p-204, 0x1.bc50676c2a447p-222)
+ got 0x1.6a1b19400964ep-204
+ want 0x1.6a1b19400964dp-204. */
+#if WANT_SIMD_EXCEPT
+
+float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ float64x2_t ay = vabsq_f64 (y);
+
+ uint64x2_t ix = vreinterpretq_u64_f64 (ax);
+ uint64x2_t iy = vreinterpretq_u64_f64 (ay);
+
+ /* Extreme values, NaNs, and infinities should be handled by the scalar
+ fallback for correct flag handling. */
+ uint64x2_t specialx = vcgeq_u64 (vsubq_u64 (ix, d->tiny_bound), d->thres);
+ uint64x2_t specialy = vcgeq_u64 (vsubq_u64 (iy, d->tiny_bound), d->thres);
+ ax = v_zerofy_f64 (ax, specialx);
+ ay = v_zerofy_f64 (ay, specialy);
+ uint32x2_t special = vaddhn_u64 (specialx, specialy);
+
+ float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (ax, ax), ay, ay);
+
+ if (unlikely (v_any_u32h (special)))
+ return special_case (x, y, sqsum, special);
+
+ return vsqrtq_f64 (sqsum);
+}
+#else
+
+float64x2_t VPCS_ATTR V_NAME_D2 (hypot) (float64x2_t x, float64x2_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t sqsum = vfmaq_f64 (vmulq_f64 (x, x), y, y);
+
+ uint32x2_t special = vcge_u32 (
+ vsubhn_u64 (vreinterpretq_u64_f64 (sqsum), d->tiny_bound),
+ vget_low_u32 (d->thres));
+
+ if (unlikely (v_any_u32h (special)))
+ return special_case (x, y, sqsum, special);
+
+ return vsqrtq_f64 (sqsum);
+}
+#endif
+
+PL_SIG (V, D, 2, hypot, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_D2 (hypot), 1.21)
+PL_TEST_EXPECT_FENV (V_NAME_D2 (hypot), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
+PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
+PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
+PL_TEST_INTERVAL2 (V_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_hypotf_1u5.c b/contrib/arm-optimized-routines/pl/math/v_hypotf_1u5.c
new file mode 100644
index 000000000000..3227b0a3fd8b
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_hypotf_1u5.c
@@ -0,0 +1,94 @@
+/*
+ * Single-precision vector hypot(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#if WANT_SIMD_EXCEPT
+static const struct data
+{
+ uint32x4_t tiny_bound, thres;
+} data = {
+ .tiny_bound = V4 (0x20000000), /* asuint (0x1p-63). */
+ .thres = V4 (0x3f000000), /* asuint (0x1p63) - tiny_bound. */
+};
+#else
+static const struct data
+{
+ uint32x4_t tiny_bound;
+ uint16x8_t thres;
+} data = {
+ .tiny_bound = V4 (0x0C800000), /* asuint (0x1p-102). */
+ .thres = V8 (0x7300), /* asuint (inf) - tiny_bound. */
+};
+#endif
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t sqsum,
+ uint16x4_t special)
+{
+ return v_call2_f32 (hypotf, x, y, vsqrtq_f32 (sqsum), vmovl_u16 (special));
+}
+
+/* Vector implementation of single-precision hypot.
+ Maximum error observed is 1.21 ULP:
+ _ZGVnN4vv_hypotf (0x1.6a419cp-13, 0x1.82a852p-22) got 0x1.6a41d2p-13
+ want 0x1.6a41dp-13. */
+#if WANT_SIMD_EXCEPT
+
+float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float32x4_t ax = vabsq_f32 (x);
+ float32x4_t ay = vabsq_f32 (y);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (ax);
+ uint32x4_t iy = vreinterpretq_u32_f32 (ay);
+
+ /* Extreme values, NaNs, and infinities should be handled by the scalar
+ fallback for correct flag handling. */
+ uint32x4_t specialx = vcgeq_u32 (vsubq_u32 (ix, d->tiny_bound), d->thres);
+ uint32x4_t specialy = vcgeq_u32 (vsubq_u32 (iy, d->tiny_bound), d->thres);
+ ax = v_zerofy_f32 (ax, specialx);
+ ay = v_zerofy_f32 (ay, specialy);
+ uint16x4_t special = vaddhn_u32 (specialx, specialy);
+
+ float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (ax, ax), ay, ay);
+
+ if (unlikely (v_any_u16h (special)))
+ return special_case (x, y, sqsum, special);
+
+ return vsqrtq_f32 (sqsum);
+}
+#else
+
+float32x4_t VPCS_ATTR V_NAME_F2 (hypot) (float32x4_t x, float32x4_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float32x4_t sqsum = vfmaq_f32 (vmulq_f32 (x, x), y, y);
+
+ uint16x4_t special = vcge_u16 (
+ vsubhn_u32 (vreinterpretq_u32_f32 (sqsum), d->tiny_bound),
+ vget_low_u16 (d->thres));
+
+ if (unlikely (v_any_u16h (special)))
+ return special_case (x, y, sqsum, special);
+
+ return vsqrtq_f32 (sqsum);
+}
+#endif
+
+PL_SIG (V, F, 2, hypot, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_F2 (hypot), 1.21)
+PL_TEST_EXPECT_FENV (V_NAME_F2 (hypot), WANT_SIMD_EXCEPT)
+PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
+PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
+PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
+PL_TEST_INTERVAL2 (V_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log10_2u5.c b/contrib/arm-optimized-routines/pl/math/v_log10_2u5.c
new file mode 100644
index 000000000000..35dd62fe5e3e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log10_2u5.c
@@ -0,0 +1,120 @@
+/*
+ * Double-precision vector log10(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f64.h"
+
+#define N (1 << V_LOG10_TABLE_BITS)
+
+static const struct data
+{
+ uint64x2_t min_norm;
+ uint32x4_t special_bound;
+ float64x2_t poly[5];
+ float64x2_t invln10, log10_2, ln2;
+ uint64x2_t sign_exp_mask;
+} data = {
+ /* Computed from log coefficients divided by log(10) then rounded to double
+ precision. */
+ .poly = { V2 (-0x1.bcb7b1526e506p-3), V2 (0x1.287a7636be1d1p-3),
+ V2 (-0x1.bcb7b158af938p-4), V2 (0x1.63c78734e6d07p-4),
+ V2 (-0x1.287461742fee4p-4) },
+ .ln2 = V2 (0x1.62e42fefa39efp-1),
+ .invln10 = V2 (0x1.bcb7b1526e50ep-2),
+ .log10_2 = V2 (0x1.34413509f79ffp-2),
+ .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022). */
+ .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm. */
+ .sign_exp_mask = V2 (0xfff0000000000000),
+};
+
+#define Off v_u64 (0x3fe6900900000000)
+#define IndexMask (N - 1)
+
+#define T(s, i) __v_log10_data.s[i]
+
+struct entry
+{
+ float64x2_t invc;
+ float64x2_t log10c;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+ struct entry e;
+ uint64_t i0 = (i[0] >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
+ uint64_t i1 = (i[1] >> (52 - V_LOG10_TABLE_BITS)) & IndexMask;
+ float64x2_t e0 = vld1q_f64 (&__v_log10_data.table[i0].invc);
+ float64x2_t e1 = vld1q_f64 (&__v_log10_data.table[i1].invc);
+ e.invc = vuzp1q_f64 (e0, e1);
+ e.log10c = vuzp2q_f64 (e0, e1);
+ return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, float64x2_t hi, float64x2_t r2,
+ uint32x2_t special)
+{
+ return v_call_f64 (log10, x, vfmaq_f64 (hi, r2, y), vmovl_u32 (special));
+}
+
+/* Fast implementation of double-precision vector log10
+ is a slight modification of double-precision vector log.
+ Max ULP error: < 2.5 ulp (nearest rounding.)
+ Maximum measured at 2.46 ulp for x in [0.96, 0.97]
+ _ZGVnN2v_log10(0x1.13192407fcb46p+0) got 0x1.fff6be3cae4bbp-6
+ want 0x1.fff6be3cae4b9p-6. */
+float64x2_t VPCS_ATTR V_NAME_D1 (log10) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+ uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
+ vget_low_u32 (d->special_bound));
+
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ uint64x2_t tmp = vsubq_u64 (ix, Off);
+ int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
+ uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
+ float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+ struct entry e = lookup (tmp);
+
+ /* log10(x) = log1p(z/c-1)/log(10) + log10(c) + k*log10(2). */
+ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+ float64x2_t kd = vcvtq_f64_s64 (k);
+
+ /* hi = r / log(10) + log10(c) + k*log10(2).
+ Constants in v_log10_data.c are computed (in extended precision) as
+ e.log10c := e.logc * ivln10. */
+ float64x2_t w = vfmaq_f64 (e.log10c, r, d->invln10);
+
+ /* y = log10(1+r) + n * log10(2). */
+ float64x2_t hi = vfmaq_f64 (w, kd, d->log10_2);
+
+ /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
+ float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly);
+
+ if (unlikely (v_any_u32h (special)))
+ return special_case (x, y, hi, r2, special);
+ return vfmaq_f64 (hi, r2, y);
+}
+
+PL_SIG (V, D, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (V_NAME_D1 (log10), 1.97)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (log10))
+PL_TEST_INTERVAL (V_NAME_D1 (log10), -0.0, -inf, 1000)
+PL_TEST_INTERVAL (V_NAME_D1 (log10), 0, 0x1p-149, 1000)
+PL_TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (log10), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (log10), 1.0, 100, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (log10), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log10_data.c b/contrib/arm-optimized-routines/pl/math/v_log10_data.c
new file mode 100644
index 000000000000..d9a624dab9ce
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log10_data.c
@@ -0,0 +1,163 @@
+/*
+ * Lookup table for double-precision log10(x) vector function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct v_log10_data __v_log10_data = {
+ /* Computed from log's coefficients div by log(10) then rounded to double
+ precision. */
+ .poly = { -0x1.bcb7b1526e506p-3, 0x1.287a7636be1d1p-3, -0x1.bcb7b158af938p-4,
+ 0x1.63c78734e6d07p-4, -0x1.287461742fee4p-4 },
+ .invln10 = 0x1.bcb7b1526e50ep-2,
+ .log10_2 = 0x1.34413509f79ffp-2,
+ /* Algorithm:
+
+ x = 2^k z
+ log10(x) = k log10(2) + log10(c) + poly(z/c - 1) / log(10)
+
+ where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
+ N=128) and log(c) and 1/c for the ith subinterval comes from lookup
+ tables:
+
+ table[i].invc = 1/c
+ table[i].log10c = (double)log10(c)
+
+ where c is near the center of the subinterval and is chosen by trying
+ several floating point invc candidates around 1/center and selecting one
+ for which the error in (double)log(c) is minimized (< 0x1p-74), except the
+ subinterval that contains 1 and the previous one got tweaked to avoid
+ cancellation. NB: invc should be optimized to minimize error in
+ (double)log10(c) instead. */
+ .table = { { 0x1.6a133d0dec120p+0, -0x1.345825f221684p-3 },
+ { 0x1.6815f2f3e42edp+0, -0x1.2f71a1f0c554ep-3 },
+ { 0x1.661e39be1ac9ep+0, -0x1.2a91fdb30b1f4p-3 },
+ { 0x1.642bfa30ac371p+0, -0x1.25b9260981a04p-3 },
+ { 0x1.623f1d916f323p+0, -0x1.20e7081762193p-3 },
+ { 0x1.60578da220f65p+0, -0x1.1c1b914aeefacp-3 },
+ { 0x1.5e75349dea571p+0, -0x1.1756af5de404dp-3 },
+ { 0x1.5c97fd387a75ap+0, -0x1.12985059c90bfp-3 },
+ { 0x1.5abfd2981f200p+0, -0x1.0de0628f63df4p-3 },
+ { 0x1.58eca051dc99cp+0, -0x1.092ed492e08eep-3 },
+ { 0x1.571e526d9df12p+0, -0x1.0483954caf1dfp-3 },
+ { 0x1.5554d555b3fcbp+0, -0x1.ffbd27a9adbcp-4 },
+ { 0x1.539015e2a20cdp+0, -0x1.f67f7f2e3d1ap-4 },
+ { 0x1.51d0014ee0164p+0, -0x1.ed4e1071ceebep-4 },
+ { 0x1.50148538cd9eep+0, -0x1.e428bb47413c4p-4 },
+ { 0x1.4e5d8f9f698a1p+0, -0x1.db0f6003028d6p-4 },
+ { 0x1.4cab0edca66bep+0, -0x1.d201df6749831p-4 },
+ { 0x1.4afcf1a9db874p+0, -0x1.c9001ac5c9672p-4 },
+ { 0x1.495327136e16fp+0, -0x1.c009f3c78c79p-4 },
+ { 0x1.47ad9e84af28fp+0, -0x1.b71f4cb642e53p-4 },
+ { 0x1.460c47b39ae15p+0, -0x1.ae400818526b2p-4 },
+ { 0x1.446f12b278001p+0, -0x1.a56c091954f87p-4 },
+ { 0x1.42d5efdd720ecp+0, -0x1.9ca3332f096eep-4 },
+ { 0x1.4140cfe001a0fp+0, -0x1.93e56a3f23e55p-4 },
+ { 0x1.3fafa3b421f69p+0, -0x1.8b3292a3903bp-4 },
+ { 0x1.3e225c9c8ece5p+0, -0x1.828a9112d9618p-4 },
+ { 0x1.3c98ec29a211ap+0, -0x1.79ed4ac35f5acp-4 },
+ { 0x1.3b13442a413fep+0, -0x1.715aa51ed28c4p-4 },
+ { 0x1.399156baa3c54p+0, -0x1.68d2861c999e9p-4 },
+ { 0x1.38131639b4cdbp+0, -0x1.6054d40ded21p-4 },
+ { 0x1.36987540fbf53p+0, -0x1.57e17576bc9a2p-4 },
+ { 0x1.352166b648f61p+0, -0x1.4f7851798bb0bp-4 },
+ { 0x1.33adddb3eb575p+0, -0x1.47194f5690ae3p-4 },
+ { 0x1.323dcd99fc1d3p+0, -0x1.3ec456d58ec47p-4 },
+ { 0x1.30d129fefc7d2p+0, -0x1.36794ff3e5f55p-4 },
+ { 0x1.2f67e6b72fe7dp+0, -0x1.2e382315725e4p-4 },
+ { 0x1.2e01f7cf8b187p+0, -0x1.2600b8ed82e91p-4 },
+ { 0x1.2c9f518ddc86ep+0, -0x1.1dd2fa85efc12p-4 },
+ { 0x1.2b3fe86e5f413p+0, -0x1.15aed136e3961p-4 },
+ { 0x1.29e3b1211b25cp+0, -0x1.0d94269d1a30dp-4 },
+ { 0x1.288aa08b373cfp+0, -0x1.0582e4a7659f5p-4 },
+ { 0x1.2734abcaa8467p+0, -0x1.faf5eb655742dp-5 },
+ { 0x1.25e1c82459b81p+0, -0x1.eaf888487e8eep-5 },
+ { 0x1.2491eb1ad59c5p+0, -0x1.db0d75ef25a82p-5 },
+ { 0x1.23450a54048b5p+0, -0x1.cb348a49e6431p-5 },
+ { 0x1.21fb1bb09e578p+0, -0x1.bb6d9c69acdd8p-5 },
+ { 0x1.20b415346d8f7p+0, -0x1.abb88368aa7ap-5 },
+ { 0x1.1f6fed179a1acp+0, -0x1.9c1517476af14p-5 },
+ { 0x1.1e2e99b93c7b3p+0, -0x1.8c833051bfa4dp-5 },
+ { 0x1.1cf011a7a882ap+0, -0x1.7d02a78e7fb31p-5 },
+ { 0x1.1bb44b97dba5ap+0, -0x1.6d93565e97c5fp-5 },
+ { 0x1.1a7b3e66cdd4fp+0, -0x1.5e351695db0c5p-5 },
+ { 0x1.1944e11dc56cdp+0, -0x1.4ee7c2ba67adcp-5 },
+ { 0x1.18112aebb1a6ep+0, -0x1.3fab35ba16c01p-5 },
+ { 0x1.16e013231b7e9p+0, -0x1.307f4ad854bc9p-5 },
+ { 0x1.15b1913f156cfp+0, -0x1.2163ddf4f988cp-5 },
+ { 0x1.14859cdedde13p+0, -0x1.1258cb5d19e22p-5 },
+ { 0x1.135c2dc68cfa4p+0, -0x1.035defdba3188p-5 },
+ { 0x1.12353bdb01684p+0, -0x1.e8e651191bce4p-6 },
+ { 0x1.1110bf25b85b4p+0, -0x1.cb30a62be444cp-6 },
+ { 0x1.0feeafd2f8577p+0, -0x1.ad9a9b3043823p-6 },
+ { 0x1.0ecf062c51c3bp+0, -0x1.9023ecda1ccdep-6 },
+ { 0x1.0db1baa076c8bp+0, -0x1.72cc592bd82dp-6 },
+ { 0x1.0c96c5bb3048ep+0, -0x1.55939eb1f9c6ep-6 },
+ { 0x1.0b7e20263e070p+0, -0x1.38797ca6cc5ap-6 },
+ { 0x1.0a67c2acd0ce3p+0, -0x1.1b7db35c2c072p-6 },
+ { 0x1.0953a6391e982p+0, -0x1.fd400812ee9a2p-7 },
+ { 0x1.0841c3caea380p+0, -0x1.c3c05fb4620f1p-7 },
+ { 0x1.07321489b13eap+0, -0x1.8a7bf3c40e2e3p-7 },
+ { 0x1.062491aee9904p+0, -0x1.517249c15a75cp-7 },
+ { 0x1.05193497a7cc5p+0, -0x1.18a2ea5330c91p-7 },
+ { 0x1.040ff6b5f5e9fp+0, -0x1.c01abc8cdc4e2p-8 },
+ { 0x1.0308d19aa6127p+0, -0x1.4f6261750dec9p-8 },
+ { 0x1.0203beedb0c67p+0, -0x1.be37b6612afa7p-9 },
+ { 0x1.010037d38bcc2p+0, -0x1.bc3a8398ac26p-10 },
+ { 1.0, 0.0 },
+ { 0x1.fc06d493cca10p-1, 0x1.bb796219f30a5p-9 },
+ { 0x1.f81e6ac3b918fp-1, 0x1.b984fdcba61cep-8 },
+ { 0x1.f44546ef18996p-1, 0x1.49cf12adf8e8cp-7 },
+ { 0x1.f07b10382c84bp-1, 0x1.b6075b5217083p-7 },
+ { 0x1.ecbf7070e59d4p-1, 0x1.10b7466fc30ddp-6 },
+ { 0x1.e91213f715939p-1, 0x1.4603e4db6a3a1p-6 },
+ { 0x1.e572a9a75f7b7p-1, 0x1.7aeb10e99e105p-6 },
+ { 0x1.e1e0e2c530207p-1, 0x1.af6e49b0f0e36p-6 },
+ { 0x1.de5c72d8a8be3p-1, 0x1.e38f064f41179p-6 },
+ { 0x1.dae50fa5658ccp-1, 0x1.0ba75abbb7623p-5 },
+ { 0x1.d77a71145a2dap-1, 0x1.25575ee2dba86p-5 },
+ { 0x1.d41c51166623ep-1, 0x1.3ed83f477f946p-5 },
+ { 0x1.d0ca6ba0bb29fp-1, 0x1.582aa79af60efp-5 },
+ { 0x1.cd847e8e59681p-1, 0x1.714f400fa83aep-5 },
+ { 0x1.ca4a499693e00p-1, 0x1.8a46ad3901cb9p-5 },
+ { 0x1.c71b8e399e821p-1, 0x1.a311903b6b87p-5 },
+ { 0x1.c3f80faf19077p-1, 0x1.bbb086f216911p-5 },
+ { 0x1.c0df92dc2b0ecp-1, 0x1.d4242bdda648ep-5 },
+ { 0x1.bdd1de3cbb542p-1, 0x1.ec6d167c2af1p-5 },
+ { 0x1.baceb9e1007a3p-1, 0x1.0245ed8221426p-4 },
+ { 0x1.b7d5ef543e55ep-1, 0x1.0e40856c74f64p-4 },
+ { 0x1.b4e749977d953p-1, 0x1.1a269a31120fep-4 },
+ { 0x1.b20295155478ep-1, 0x1.25f8718fc076cp-4 },
+ { 0x1.af279f8e82be2p-1, 0x1.31b64ffc95bfp-4 },
+ { 0x1.ac5638197fdf3p-1, 0x1.3d60787ca5063p-4 },
+ { 0x1.a98e2f102e087p-1, 0x1.48f72ccd187fdp-4 },
+ { 0x1.a6cf5606d05c1p-1, 0x1.547aad6602f1cp-4 },
+ { 0x1.a4197fc04d746p-1, 0x1.5feb3989d3acbp-4 },
+ { 0x1.a16c80293dc01p-1, 0x1.6b490f3978c79p-4 },
+ { 0x1.9ec82c4dc5bc9p-1, 0x1.76946b3f5e703p-4 },
+ { 0x1.9c2c5a491f534p-1, 0x1.81cd895717c83p-4 },
+ { 0x1.9998e1480b618p-1, 0x1.8cf4a4055c30ep-4 },
+ { 0x1.970d9977c6c2dp-1, 0x1.9809f4c48c0ebp-4 },
+ { 0x1.948a5c023d212p-1, 0x1.a30db3f9899efp-4 },
+ { 0x1.920f0303d6809p-1, 0x1.ae001905458fcp-4 },
+ { 0x1.8f9b698a98b45p-1, 0x1.b8e15a2e3a2cdp-4 },
+ { 0x1.8d2f6b81726f6p-1, 0x1.c3b1ace2b0996p-4 },
+ { 0x1.8acae5bb55badp-1, 0x1.ce71456edfa62p-4 },
+ { 0x1.886db5d9275b8p-1, 0x1.d9205759882c4p-4 },
+ { 0x1.8617ba567c13cp-1, 0x1.e3bf1513af0dfp-4 },
+ { 0x1.83c8d27487800p-1, 0x1.ee4db0412c414p-4 },
+ { 0x1.8180de3c5dbe7p-1, 0x1.f8cc5998de3a5p-4 },
+ { 0x1.7f3fbe71cdb71p-1, 0x1.019da085eaeb1p-3 },
+ { 0x1.7d055498071c1p-1, 0x1.06cd4acdb4e3dp-3 },
+ { 0x1.7ad182e54f65ap-1, 0x1.0bf542bef813fp-3 },
+ { 0x1.78a42c3c90125p-1, 0x1.11159f14da262p-3 },
+ { 0x1.767d342f76944p-1, 0x1.162e761c10d1cp-3 },
+ { 0x1.745c7ef26b00ap-1, 0x1.1b3fddc60d43ep-3 },
+ { 0x1.7241f15769d0fp-1, 0x1.2049ebac86aa6p-3 },
+ { 0x1.702d70d396e41p-1, 0x1.254cb4fb7836ap-3 },
+ { 0x1.6e1ee3700cd11p-1, 0x1.2a484e8d0d252p-3 },
+ { 0x1.6c162fc9cbe02p-1, 0x1.2f3ccce1c860bp-3 } }
+};
diff --git a/contrib/arm-optimized-routines/pl/math/v_log10f_3u5.c b/contrib/arm-optimized-routines/pl/math/v_log10f_3u5.c
new file mode 100644
index 000000000000..92bc50ba5bd9
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log10f_3u5.c
@@ -0,0 +1,82 @@
+/*
+ * Single-precision vector log10 function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ uint32x4_t min_norm;
+ uint16x8_t special_bound;
+ float32x4_t poly[8];
+ float32x4_t inv_ln10, ln2;
+ uint32x4_t off, mantissa_mask;
+} data = {
+ /* Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in
+ [-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25. */
+ .poly = { V4 (-0x1.bcb79cp-3f), V4 (0x1.2879c8p-3f), V4 (-0x1.bcd472p-4f),
+ V4 (0x1.6408f8p-4f), V4 (-0x1.246f8p-4f), V4 (0x1.f0e514p-5f),
+ V4 (-0x1.0fc92cp-4f), V4 (0x1.f5f76ap-5f) },
+ .ln2 = V4 (0x1.62e43p-1f),
+ .inv_ln10 = V4 (0x1.bcb7b2p-2f),
+ .min_norm = V4 (0x00800000),
+ .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */
+ .off = V4 (0x3f2aaaab), /* 0.666667. */
+ .mantissa_mask = V4 (0x007fffff),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2,
+ uint16x4_t cmp)
+{
+ /* Fall back to scalar code. */
+ return v_call_f32 (log10f, x, vfmaq_f32 (y, p, r2), vmovl_u16 (cmp));
+}
+
+/* Fast implementation of AdvSIMD log10f,
+ uses a similar approach as AdvSIMD logf with the same offset (i.e., 2/3) and
+ an order 9 polynomial.
+ Maximum error: 3.305ulps (nearest rounding.)
+ _ZGVnN4v_log10f(0x1.555c16p+0) got 0x1.ffe2fap-4
+ want 0x1.ffe2f4p-4. */
+float32x4_t VPCS_ATTR V_NAME_F1 (log10) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint32x4_t u = vreinterpretq_u32_f32 (x);
+ uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm),
+ vget_low_u16 (d->special_bound));
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ u = vsubq_u32 (u, d->off);
+ float32x4_t n = vcvtq_f32_s32 (
+ vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */
+ u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off);
+ float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+ /* y = log10(1+r) + n * log10(2). */
+ float32x4_t r2 = vmulq_f32 (r, r);
+ float32x4_t poly = v_pw_horner_7_f32 (r, r2, d->poly);
+ /* y = Log10(2) * n + poly * InvLn(10). */
+ float32x4_t y = vfmaq_f32 (r, d->ln2, n);
+ y = vmulq_f32 (y, d->inv_ln10);
+
+ if (unlikely (v_any_u16h (special)))
+ return special_case (x, y, poly, r2, special);
+ return vfmaq_f32 (y, poly, r2);
+}
+
+PL_SIG (V, F, 1, log10, 0.01, 11.1)
+PL_TEST_ULP (V_NAME_F1 (log10), 2.81)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_F1 (log10))
+PL_TEST_INTERVAL (V_NAME_F1 (log10), -0.0, -inf, 100)
+PL_TEST_INTERVAL (V_NAME_F1 (log10), 0, 0x1p-126, 100)
+PL_TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (log10), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (log10), 1.0, 100, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (log10), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log1p_2u5.c b/contrib/arm-optimized-routines/pl/math/v_log1p_2u5.c
new file mode 100644
index 000000000000..face02ddc6c3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log1p_2u5.c
@@ -0,0 +1,128 @@
+/*
+ * Double-precision vector log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+const static struct data
+{
+ float64x2_t poly[19], ln2[2];
+ uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask, inf, minus_one;
+ int64x2_t one_top;
+} data = {
+ /* Generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */
+ .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2),
+ V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3),
+ V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3),
+ V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4),
+ V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4),
+ V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4),
+ V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4),
+ V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5),
+ V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4),
+ V2 (-0x1.cfa7385bdb37ep-6) },
+ .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) },
+ /* top32(asuint64(sqrt(2)/2)) << 32. */
+ .hf_rt2_top = V2 (0x3fe6a09e00000000),
+ /* (top32(asuint64(1)) - top32(asuint64(sqrt(2)/2))) << 32. */
+ .one_m_hf_rt2_top = V2 (0x00095f6200000000),
+ .umask = V2 (0x000fffff00000000),
+ .one_top = V2 (0x3ff),
+ .inf = V2 (0x7ff0000000000000),
+ .minus_one = V2 (0xbff0000000000000)
+};
+
+#define BottomMask v_u64 (0xffffffff)
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (log1p, x, y, special);
+}
+
+/* Vector log1p approximation using polynomial on reduced interval. Routine is
+ a modification of the algorithm used in scalar log1p, with no shortcut for
+ k=0 and no narrowing for f and k. Maximum observed error is 2.45 ULP:
+ _ZGVnN2v_log1p(0x1.658f7035c4014p+11) got 0x1.fd61d0727429dp+2
+ want 0x1.fd61d0727429fp+2 . */
+VPCS_ATTR float64x2_t V_NAME_D1 (log1p) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+ uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
+ uint64x2_t special = vcgeq_u64 (ia, d->inf);
+
+#if WANT_SIMD_EXCEPT
+ special = vorrq_u64 (special,
+ vcgeq_u64 (ix, vreinterpretq_u64_f64 (v_f64 (-1))));
+ if (unlikely (v_any_u64 (special)))
+ x = v_zerofy_f64 (x, special);
+#else
+ special = vorrq_u64 (special, vcleq_f64 (x, v_f64 (-1)));
+#endif
+
+ /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
+ is in [sqrt(2)/2, sqrt(2)]):
+ log1p(x) = k*log(2) + log1p(f).
+
+ f may not be representable exactly, so we need a correction term:
+ let m = round(1 + x), c = (1 + x) - m.
+ c << m: at very small x, log1p(x) ~ x, hence:
+ log(1+x) - log(m) ~ c/m.
+
+ We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */
+
+ /* Obtain correctly scaled k by manipulation in the exponent.
+ The scalar algorithm casts down to 32-bit at this point to calculate k and
+ u_red. We stay in double-width to obtain f and k, using the same constants
+ as the scalar algorithm but shifted left by 32. */
+ float64x2_t m = vaddq_f64 (x, v_f64 (1));
+ uint64x2_t mi = vreinterpretq_u64_f64 (m);
+ uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
+
+ int64x2_t ki
+ = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
+ float64x2_t k = vcvtq_f64_s64 (ki);
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
+ uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
+ float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1));
+
+ /* Correction term c/m. */
+ float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m);
+
+ /* Approximate log1p(x) on the reduced input using a polynomial. Because
+ log1p(0)=0 we choose an approximation of the form:
+ x + C0*x^2 + C1*x^3 + C2x^4 + ...
+ Hence approximation has the form f + f^2 * P(f)
+ where P(x) = C0 + C1*x + C2x^2 + ...
+ Assembling this all correctly is dealt with at the final step. */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly);
+
+ float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]);
+ float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]);
+ float64x2_t y = vaddq_f64 (ylo, yhi);
+
+ if (unlikely (v_any_u64 (special)))
+ return special_case (vreinterpretq_f64_u64 (ix), vfmaq_f64 (y, f2, p),
+ special);
+
+ return vfmaq_f64 (y, f2, p);
+}
+
+PL_SIG (V, D, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (V_NAME_D1 (log1p), 1.97)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (log1p), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.0, 0x1p-23, 50000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0x1p-23, 0.001, 50000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (log1p), 0.001, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (log1p), 1, inf, 40000)
+PL_TEST_INTERVAL (V_NAME_D1 (log1p), -1.0, -inf, 500)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log1p_inline.h b/contrib/arm-optimized-routines/pl/math/v_log1p_inline.h
new file mode 100644
index 000000000000..bd57bfc6fe6e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log1p_inline.h
@@ -0,0 +1,91 @@
+/*
+ * Helper for vector double-precision routines which calculate log(1 + x) and do
+ * not need special-case handling
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+#ifndef PL_MATH_V_LOG1P_INLINE_H
+#define PL_MATH_V_LOG1P_INLINE_H
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+struct v_log1p_data
+{
+ float64x2_t poly[19], ln2[2];
+ uint64x2_t hf_rt2_top, one_m_hf_rt2_top, umask;
+ int64x2_t one_top;
+};
+
+/* Coefficients generated using Remez, deg=20, in [sqrt(2)/2-1, sqrt(2)-1]. */
+#define V_LOG1P_CONSTANTS_TABLE \
+ { \
+ .poly = { V2 (-0x1.ffffffffffffbp-2), V2 (0x1.55555555551a9p-2), \
+ V2 (-0x1.00000000008e3p-2), V2 (0x1.9999999a32797p-3), \
+ V2 (-0x1.555555552fecfp-3), V2 (0x1.249248e071e5ap-3), \
+ V2 (-0x1.ffffff8bf8482p-4), V2 (0x1.c71c8f07da57ap-4), \
+ V2 (-0x1.9999ca4ccb617p-4), V2 (0x1.7459ad2e1dfa3p-4), \
+ V2 (-0x1.554d2680a3ff2p-4), V2 (0x1.3b4c54d487455p-4), \
+ V2 (-0x1.2548a9ffe80e6p-4), V2 (0x1.0f389a24b2e07p-4), \
+ V2 (-0x1.eee4db15db335p-5), V2 (0x1.e95b494d4a5ddp-5), \
+ V2 (-0x1.15fdf07cb7c73p-4), V2 (0x1.0310b70800fcfp-4), \
+ V2 (-0x1.cfa7385bdb37ep-6) }, \
+ .ln2 = { V2 (0x1.62e42fefa3800p-1), V2 (0x1.ef35793c76730p-45) }, \
+ .hf_rt2_top = V2 (0x3fe6a09e00000000), \
+ .one_m_hf_rt2_top = V2 (0x00095f6200000000), \
+ .umask = V2 (0x000fffff00000000), .one_top = V2 (0x3ff) \
+ }
+
+#define BottomMask v_u64 (0xffffffff)
+
+static inline float64x2_t
+log1p_inline (float64x2_t x, const struct v_log1p_data *d)
+{
+ /* Helper for calculating log(x + 1). Copied from v_log1p_2u5.c, with several
+ modifications:
+ - No special-case handling - this should be dealt with by the caller.
+ - Pairwise Horner polynomial evaluation for improved accuracy.
+ - Optionally simulate the shortcut for k=0, used in the scalar routine,
+ using v_sel, for improved accuracy when the argument to log1p is close to
+ 0. This feature is enabled by defining WANT_V_LOG1P_K0_SHORTCUT as 1 in
+ the source of the caller before including this file.
+ See v_log1pf_2u1.c for details of the algorithm. */
+ float64x2_t m = vaddq_f64 (x, v_f64 (1));
+ uint64x2_t mi = vreinterpretq_u64_f64 (m);
+ uint64x2_t u = vaddq_u64 (mi, d->one_m_hf_rt2_top);
+
+ int64x2_t ki
+ = vsubq_s64 (vreinterpretq_s64_u64 (vshrq_n_u64 (u, 52)), d->one_top);
+ float64x2_t k = vcvtq_f64_s64 (ki);
+
+ /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
+ uint64x2_t utop = vaddq_u64 (vandq_u64 (u, d->umask), d->hf_rt2_top);
+ uint64x2_t u_red = vorrq_u64 (utop, vandq_u64 (mi, BottomMask));
+ float64x2_t f = vsubq_f64 (vreinterpretq_f64_u64 (u_red), v_f64 (1));
+
+ /* Correction term c/m. */
+ float64x2_t cm = vdivq_f64 (vsubq_f64 (x, vsubq_f64 (m, v_f64 (1))), m);
+
+#ifndef WANT_V_LOG1P_K0_SHORTCUT
+#error \
+ "Cannot use v_log1p_inline.h without specifying whether you need the k0 shortcut for greater accuracy close to 0"
+#elif WANT_V_LOG1P_K0_SHORTCUT
+ /* Shortcut if k is 0 - set correction term to 0 and f to x. The result is
+ that the approximation is solely the polynomial. */
+ uint64x2_t k0 = vceqzq_f64 (k);
+ cm = v_zerofy_f64 (cm, k0);
+ f = vbslq_f64 (k0, x, f);
+#endif
+
+ /* Approximate log1p(f) on the reduced input using a polynomial. */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t p = v_pw_horner_18_f64 (f, f2, d->poly);
+
+ /* Assemble log1p(x) = k * log2 + log1p(f) + c/m. */
+ float64x2_t ylo = vfmaq_f64 (cm, k, d->ln2[1]);
+ float64x2_t yhi = vfmaq_f64 (f, k, d->ln2[0]);
+ return vfmaq_f64 (vaddq_f64 (ylo, yhi), f2, p);
+}
+
+#endif // PL_MATH_V_LOG1P_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_log1pf_2u1.c b/contrib/arm-optimized-routines/pl/math/v_log1pf_2u1.c
new file mode 100644
index 000000000000..153c88da9c88
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log1pf_2u1.c
@@ -0,0 +1,126 @@
+/*
+ * Single-precision vector log(1+x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f32.h"
+
+const static struct data
+{
+ float32x4_t poly[8], ln2;
+ uint32x4_t tiny_bound, minus_one, four, thresh;
+ int32x4_t three_quarters;
+} data = {
+ .poly = { /* Generated using FPMinimax in [-0.25, 0.5]. First two coefficients
+ (1, -0.5) are not stored as they can be generated more
+ efficiently. */
+ V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f),
+ V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f),
+ V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) },
+ .ln2 = V4 (0x1.62e43p-1f),
+ .tiny_bound = V4 (0x34000000), /* asuint32(0x1p-23). ulp=0.5 at 0x1p-23. */
+ .thresh = V4 (0x4b800000), /* asuint32(INFINITY) - tiny_bound. */
+ .minus_one = V4 (0xbf800000),
+ .four = V4 (0x40800000),
+ .three_quarters = V4 (0x3f400000)
+};
+
+static inline float32x4_t
+eval_poly (float32x4_t m, const float32x4_t *p)
+{
+ /* Approximate log(1+m) on [-0.25, 0.5] using split Estrin scheme. */
+ float32x4_t p_12 = vfmaq_f32 (v_f32 (-0.5), m, p[0]);
+ float32x4_t p_34 = vfmaq_f32 (p[1], m, p[2]);
+ float32x4_t p_56 = vfmaq_f32 (p[3], m, p[4]);
+ float32x4_t p_78 = vfmaq_f32 (p[5], m, p[6]);
+
+ float32x4_t m2 = vmulq_f32 (m, m);
+ float32x4_t p_02 = vfmaq_f32 (m, m2, p_12);
+ float32x4_t p_36 = vfmaq_f32 (p_34, m2, p_56);
+ float32x4_t p_79 = vfmaq_f32 (p_78, m2, p[7]);
+
+ float32x4_t m4 = vmulq_f32 (m2, m2);
+ float32x4_t p_06 = vfmaq_f32 (p_02, m4, p_36);
+ return vfmaq_f32 (p_06, m4, vmulq_f32 (m4, p_79));
+}
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (log1pf, x, y, special);
+}
+
+/* Vector log1pf approximation using polynomial on reduced interval. Accuracy
+ is roughly 2.02 ULP:
+ log1pf(0x1.21e13ap-2) got 0x1.fe8028p-3 want 0x1.fe802cp-3. */
+VPCS_ATTR float32x4_t V_NAME_F1 (log1p) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ uint32x4_t ia = vreinterpretq_u32_f32 (vabsq_f32 (x));
+ uint32x4_t special_cases
+ = vorrq_u32 (vcgeq_u32 (vsubq_u32 (ia, d->tiny_bound), d->thresh),
+ vcgeq_u32 (ix, d->minus_one));
+ float32x4_t special_arg = x;
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u32 (special_cases)))
+ /* Side-step special lanes so fenv exceptions are not triggered
+ inadvertently. */
+ x = v_zerofy_f32 (x, special_cases);
+#endif
+
+ /* With x + 1 = t * 2^k (where t = m + 1 and k is chosen such that m
+ is in [-0.25, 0.5]):
+ log1p(x) = log(t) + log(2^k) = log1p(m) + k*log(2).
+
+ We approximate log1p(m) with a polynomial, then scale by
+ k*log(2). Instead of doing this directly, we use an intermediate
+ scale factor s = 4*k*log(2) to ensure the scale is representable
+ as a normalised fp32 number. */
+
+ float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
+
+ /* Choose k to scale x to the range [-1/4, 1/2]. */
+ int32x4_t k
+ = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d->three_quarters),
+ v_s32 (0xff800000));
+ uint32x4_t ku = vreinterpretq_u32_s32 (k);
+
+ /* Scale x by exponent manipulation. */
+ float32x4_t m_scale
+ = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
+
+ /* Scale up to ensure that the scale factor is representable as normalised
+ fp32 number, and scale m down accordingly. */
+ float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d->four, ku));
+ m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
+
+ /* Evaluate polynomial on the reduced interval. */
+ float32x4_t p = eval_poly (m_scale, d->poly);
+
+ /* The scale factor to be applied back at the end - by multiplying float(k)
+ by 2^-23 we get the unbiased exponent of k. */
+ float32x4_t scale_back = vcvtq_f32_s32 (vshrq_n_s32 (k, 23));
+
+ /* Apply the scaling back. */
+ float32x4_t y = vfmaq_f32 (p, scale_back, d->ln2);
+
+ if (unlikely (v_any_u32 (special_cases)))
+ return special_case (special_arg, y, special_cases);
+ return y;
+}
+
+PL_SIG (V, F, 1, log1p, -0.9, 10.0)
+PL_TEST_ULP (V_NAME_F1 (log1p), 1.53)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (log1p), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0.0, 0x1p-23, 30000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (log1p), 0x1p-23, 1, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (log1p), 1, inf, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (log1p), -1.0, -inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log1pf_inline.h b/contrib/arm-optimized-routines/pl/math/v_log1pf_inline.h
new file mode 100644
index 000000000000..c654c6bad08f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log1pf_inline.h
@@ -0,0 +1,67 @@
+/*
+ * Helper for single-precision routines which calculate log(1 + x) and do not
+ * need special-case handling
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef PL_MATH_V_LOG1PF_INLINE_H
+#define PL_MATH_V_LOG1PF_INLINE_H
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+
+struct v_log1pf_data
+{
+ float32x4_t poly[8], ln2;
+ uint32x4_t four;
+ int32x4_t three_quarters;
+};
+
+/* Polynomial generated using FPMinimax in [-0.25, 0.5]. First two coefficients
+ (1, -0.5) are not stored as they can be generated more efficiently. */
+#define V_LOG1PF_CONSTANTS_TABLE \
+ { \
+ .poly \
+ = { V4 (0x1.5555aap-2f), V4 (-0x1.000038p-2f), V4 (0x1.99675cp-3f), \
+ V4 (-0x1.54ef78p-3f), V4 (0x1.28a1f4p-3f), V4 (-0x1.0da91p-3f), \
+ V4 (0x1.abcb6p-4f), V4 (-0x1.6f0d5ep-5f) }, \
+ .ln2 = V4 (0x1.62e43p-1f), .four = V4 (0x40800000), \
+ .three_quarters = V4 (0x3f400000) \
+ }
+
+static inline float32x4_t
+eval_poly (float32x4_t m, const float32x4_t *c)
+{
+ /* Approximate log(1+m) on [-0.25, 0.5] using pairwise Horner (main routine
+ uses split Estrin, but this way reduces register pressure in the calling
+ routine). */
+ float32x4_t q = vfmaq_f32 (v_f32 (-0.5), m, c[0]);
+ float32x4_t m2 = vmulq_f32 (m, m);
+ q = vfmaq_f32 (m, m2, q);
+ float32x4_t p = v_pw_horner_6_f32 (m, m2, c + 1);
+ p = vmulq_f32 (m2, p);
+ return vfmaq_f32 (q, m2, p);
+}
+
+static inline float32x4_t
+log1pf_inline (float32x4_t x, const struct v_log1pf_data d)
+{
+ /* Helper for calculating log(x + 1). Copied from log1pf_2u1.c, with no
+ special-case handling. See that file for details of the algorithm. */
+ float32x4_t m = vaddq_f32 (x, v_f32 (1.0f));
+ int32x4_t k
+ = vandq_s32 (vsubq_s32 (vreinterpretq_s32_f32 (m), d.three_quarters),
+ v_s32 (0xff800000));
+ uint32x4_t ku = vreinterpretq_u32_s32 (k);
+ float32x4_t s = vreinterpretq_f32_u32 (vsubq_u32 (d.four, ku));
+ float32x4_t m_scale
+ = vreinterpretq_f32_u32 (vsubq_u32 (vreinterpretq_u32_f32 (x), ku));
+ m_scale = vaddq_f32 (m_scale, vfmaq_f32 (v_f32 (-1.0f), v_f32 (0.25f), s));
+ float32x4_t p = eval_poly (m_scale, d.poly);
+ float32x4_t scale_back = vmulq_f32 (vcvtq_f32_s32 (k), v_f32 (0x1.0p-23f));
+ return vfmaq_f32 (p, scale_back, d.ln2);
+}
+
+#endif // PL_MATH_V_LOG1PF_INLINE_H
diff --git a/contrib/arm-optimized-routines/pl/math/v_log2_3u.c b/contrib/arm-optimized-routines/pl/math/v_log2_3u.c
new file mode 100644
index 000000000000..2dd2c34b7c97
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log2_3u.c
@@ -0,0 +1,109 @@
+/*
+ * Double-precision vector log2 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+#include "poly_advsimd_f64.h"
+
+#define N (1 << V_LOG2_TABLE_BITS)
+
+static const struct data
+{
+ uint64x2_t min_norm;
+ uint32x4_t special_bound;
+ float64x2_t poly[5];
+ float64x2_t invln2;
+ uint64x2_t sign_exp_mask;
+} data = {
+ /* Each coefficient was generated to approximate log(r) for |r| < 0x1.fp-9
+ and N = 128, then scaled by log2(e) in extended precision and rounded back
+ to double precision. */
+ .poly = { V2 (-0x1.71547652b83p-1), V2 (0x1.ec709dc340953p-2),
+ V2 (-0x1.71547651c8f35p-2), V2 (0x1.2777ebe12dda5p-2),
+ V2 (-0x1.ec738d616fe26p-3) },
+ .invln2 = V2 (0x1.71547652b82fep0),
+ .min_norm = V2 (0x0010000000000000), /* asuint64(0x1p-1022). */
+ .special_bound = V4 (0x7fe00000), /* asuint64(inf) - min_norm. */
+ .sign_exp_mask = V2 (0xfff0000000000000),
+};
+
+#define Off v_u64 (0x3fe6900900000000)
+#define IndexMask (N - 1)
+
+struct entry
+{
+ float64x2_t invc;
+ float64x2_t log2c;
+};
+
+static inline struct entry
+lookup (uint64x2_t i)
+{
+ struct entry e;
+ uint64_t i0 = (i[0] >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
+ uint64_t i1 = (i[1] >> (52 - V_LOG2_TABLE_BITS)) & IndexMask;
+ float64x2_t e0 = vld1q_f64 (&__v_log2_data.table[i0].invc);
+ float64x2_t e1 = vld1q_f64 (&__v_log2_data.table[i1].invc);
+ e.invc = vuzp1q_f64 (e0, e1);
+ e.log2c = vuzp2q_f64 (e0, e1);
+ return e;
+}
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, float64x2_t w, float64x2_t r2,
+ uint32x2_t special)
+{
+ return v_call_f64 (log2, x, vfmaq_f64 (w, r2, y), vmovl_u32 (special));
+}
+
+/* Double-precision vector log2 routine. Implements the same algorithm as
+ vector log10, with coefficients and table entries scaled in extended
+ precision. The maximum observed error is 2.58 ULP:
+ _ZGVnN2v_log2(0x1.0b556b093869bp+0) got 0x1.fffb34198d9dap-5
+ want 0x1.fffb34198d9ddp-5. */
+float64x2_t VPCS_ATTR V_NAME_D1 (log2) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint64x2_t ix = vreinterpretq_u64_f64 (x);
+ uint32x2_t special = vcge_u32 (vsubhn_u64 (ix, d->min_norm),
+ vget_low_u32 (d->special_bound));
+
+ /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ uint64x2_t tmp = vsubq_u64 (ix, Off);
+ int64x2_t k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52);
+ uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
+ float64x2_t z = vreinterpretq_f64_u64 (iz);
+
+ struct entry e = lookup (tmp);
+
+ /* log2(x) = log1p(z/c-1)/log(2) + log2(c) + k. */
+
+ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+ float64x2_t kd = vcvtq_f64_s64 (k);
+ float64x2_t w = vfmaq_f64 (e.log2c, r, d->invln2);
+
+ float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t y = v_pw_horner_4_f64 (r, r2, d->poly);
+ w = vaddq_f64 (kd, w);
+
+ if (unlikely (v_any_u32h (special)))
+ return special_case (x, y, w, r2, special);
+ return vfmaq_f64 (w, r2, y);
+}
+
+PL_SIG (V, D, 1, log2, 0.01, 11.1)
+PL_TEST_ULP (V_NAME_D1 (log2), 2.09)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_D1 (log2))
+PL_TEST_INTERVAL (V_NAME_D1 (log2), -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (log2), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (log2), 1.0, 100, 50000)
+PL_TEST_INTERVAL (V_NAME_D1 (log2), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log2_data.c b/contrib/arm-optimized-routines/pl/math/v_log2_data.c
new file mode 100644
index 000000000000..50697daff925
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log2_data.c
@@ -0,0 +1,153 @@
+/*
+ * Coefficients and table entries for vector log2
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << V_LOG2_TABLE_BITS)
+
+const struct v_log2_data __v_log2_data = {
+
+ /* Each coefficient was generated to approximate log(r) for |r| < 0x1.fp-9
+ and N = 128, then scaled by log2(e) in extended precision and rounded back
+ to double precision. */
+ .poly = { -0x1.71547652b83p-1, 0x1.ec709dc340953p-2, -0x1.71547651c8f35p-2,
+ 0x1.2777ebe12dda5p-2, -0x1.ec738d616fe26p-3 },
+
+ .invln2 = 0x1.71547652b82fep0,
+
+ /* Derived from tables in v_log_data.c in a similar way as v_log10_data.c.
+ This means invc is unchanged and log2c was calculated by scaling log(c) by
+ log2(e) in extended precision and rounding back to double precision. */
+ .table = { { 0x1.6a133d0dec120p+0, -0x1.00130d57f5fadp-1 },
+ { 0x1.6815f2f3e42edp+0, -0x1.f802661bd725ep-2 },
+ { 0x1.661e39be1ac9ep+0, -0x1.efea1c6f73a5bp-2 },
+ { 0x1.642bfa30ac371p+0, -0x1.e7dd1dcd06f05p-2 },
+ { 0x1.623f1d916f323p+0, -0x1.dfdb4ae024809p-2 },
+ { 0x1.60578da220f65p+0, -0x1.d7e484d101958p-2 },
+ { 0x1.5e75349dea571p+0, -0x1.cff8ad452f6ep-2 },
+ { 0x1.5c97fd387a75ap+0, -0x1.c817a666c997fp-2 },
+ { 0x1.5abfd2981f200p+0, -0x1.c04152d640419p-2 },
+ { 0x1.58eca051dc99cp+0, -0x1.b87595a3f64b2p-2 },
+ { 0x1.571e526d9df12p+0, -0x1.b0b4526c44d07p-2 },
+ { 0x1.5554d555b3fcbp+0, -0x1.a8fd6d1a90f5ep-2 },
+ { 0x1.539015e2a20cdp+0, -0x1.a150ca2559fc6p-2 },
+ { 0x1.51d0014ee0164p+0, -0x1.99ae4e62cca29p-2 },
+ { 0x1.50148538cd9eep+0, -0x1.9215df1a1e842p-2 },
+ { 0x1.4e5d8f9f698a1p+0, -0x1.8a8761fe1f0d9p-2 },
+ { 0x1.4cab0edca66bep+0, -0x1.8302bd1cc9a54p-2 },
+ { 0x1.4afcf1a9db874p+0, -0x1.7b87d6fb437f6p-2 },
+ { 0x1.495327136e16fp+0, -0x1.741696673a86dp-2 },
+ { 0x1.47ad9e84af28fp+0, -0x1.6caee2b3c6fe4p-2 },
+ { 0x1.460c47b39ae15p+0, -0x1.6550a3666c27ap-2 },
+ { 0x1.446f12b278001p+0, -0x1.5dfbc08de02a4p-2 },
+ { 0x1.42d5efdd720ecp+0, -0x1.56b022766c84ap-2 },
+ { 0x1.4140cfe001a0fp+0, -0x1.4f6db1c955536p-2 },
+ { 0x1.3fafa3b421f69p+0, -0x1.4834579063054p-2 },
+ { 0x1.3e225c9c8ece5p+0, -0x1.4103fd2249a76p-2 },
+ { 0x1.3c98ec29a211ap+0, -0x1.39dc8c3fe6dabp-2 },
+ { 0x1.3b13442a413fep+0, -0x1.32bdeed4b5c8fp-2 },
+ { 0x1.399156baa3c54p+0, -0x1.2ba80f41e20ddp-2 },
+ { 0x1.38131639b4cdbp+0, -0x1.249ad8332f4a7p-2 },
+ { 0x1.36987540fbf53p+0, -0x1.1d96347e7f3ebp-2 },
+ { 0x1.352166b648f61p+0, -0x1.169a0f7d6604ap-2 },
+ { 0x1.33adddb3eb575p+0, -0x1.0fa654a221909p-2 },
+ { 0x1.323dcd99fc1d3p+0, -0x1.08baefcf8251ap-2 },
+ { 0x1.30d129fefc7d2p+0, -0x1.01d7cd14deecdp-2 },
+ { 0x1.2f67e6b72fe7dp+0, -0x1.f5f9b1ad55495p-3 },
+ { 0x1.2e01f7cf8b187p+0, -0x1.e853ff76a77afp-3 },
+ { 0x1.2c9f518ddc86ep+0, -0x1.dabe5d624cba1p-3 },
+ { 0x1.2b3fe86e5f413p+0, -0x1.cd38a5cef4822p-3 },
+ { 0x1.29e3b1211b25cp+0, -0x1.bfc2b38d315f9p-3 },
+ { 0x1.288aa08b373cfp+0, -0x1.b25c61f5edd0fp-3 },
+ { 0x1.2734abcaa8467p+0, -0x1.a5058d18e9cacp-3 },
+ { 0x1.25e1c82459b81p+0, -0x1.97be1113e47a3p-3 },
+ { 0x1.2491eb1ad59c5p+0, -0x1.8a85cafdf5e27p-3 },
+ { 0x1.23450a54048b5p+0, -0x1.7d5c97e8fc45bp-3 },
+ { 0x1.21fb1bb09e578p+0, -0x1.704255d6486e4p-3 },
+ { 0x1.20b415346d8f7p+0, -0x1.6336e2cedd7bfp-3 },
+ { 0x1.1f6fed179a1acp+0, -0x1.563a1d9b0cc6ap-3 },
+ { 0x1.1e2e99b93c7b3p+0, -0x1.494be541aaa6fp-3 },
+ { 0x1.1cf011a7a882ap+0, -0x1.3c6c1964dd0f2p-3 },
+ { 0x1.1bb44b97dba5ap+0, -0x1.2f9a99f19a243p-3 },
+ { 0x1.1a7b3e66cdd4fp+0, -0x1.22d747344446p-3 },
+ { 0x1.1944e11dc56cdp+0, -0x1.1622020d4f7f5p-3 },
+ { 0x1.18112aebb1a6ep+0, -0x1.097aabb3553f3p-3 },
+ { 0x1.16e013231b7e9p+0, -0x1.f9c24b48014c5p-4 },
+ { 0x1.15b1913f156cfp+0, -0x1.e0aaa3bdc858ap-4 },
+ { 0x1.14859cdedde13p+0, -0x1.c7ae257c952d6p-4 },
+ { 0x1.135c2dc68cfa4p+0, -0x1.aecc960a03e58p-4 },
+ { 0x1.12353bdb01684p+0, -0x1.9605bb724d541p-4 },
+ { 0x1.1110bf25b85b4p+0, -0x1.7d595ca7147cep-4 },
+ { 0x1.0feeafd2f8577p+0, -0x1.64c74165002d9p-4 },
+ { 0x1.0ecf062c51c3bp+0, -0x1.4c4f31c86d344p-4 },
+ { 0x1.0db1baa076c8bp+0, -0x1.33f0f70388258p-4 },
+ { 0x1.0c96c5bb3048ep+0, -0x1.1bac5abb3037dp-4 },
+ { 0x1.0b7e20263e070p+0, -0x1.0381272495f21p-4 },
+ { 0x1.0a67c2acd0ce3p+0, -0x1.d6de4eba2de2ap-5 },
+ { 0x1.0953a6391e982p+0, -0x1.a6ec4e8156898p-5 },
+ { 0x1.0841c3caea380p+0, -0x1.772be542e3e1bp-5 },
+ { 0x1.07321489b13eap+0, -0x1.479cadcde852dp-5 },
+ { 0x1.062491aee9904p+0, -0x1.183e4265faa5p-5 },
+ { 0x1.05193497a7cc5p+0, -0x1.d2207fdaa1b85p-6 },
+ { 0x1.040ff6b5f5e9fp+0, -0x1.742486cb4a6a2p-6 },
+ { 0x1.0308d19aa6127p+0, -0x1.1687d77cfc299p-6 },
+ { 0x1.0203beedb0c67p+0, -0x1.7293623a6b5dep-7 },
+ { 0x1.010037d38bcc2p+0, -0x1.70ec80ec8f25dp-8 },
+ { 1.0, 0.0 },
+ { 0x1.fc06d493cca10p-1, 0x1.704c1ca6b6bc9p-7 },
+ { 0x1.f81e6ac3b918fp-1, 0x1.6eac8ba664beap-6 },
+ { 0x1.f44546ef18996p-1, 0x1.11e67d040772dp-5 },
+ { 0x1.f07b10382c84bp-1, 0x1.6bc665e2105dep-5 },
+ { 0x1.ecbf7070e59d4p-1, 0x1.c4f8a9772bf1dp-5 },
+ { 0x1.e91213f715939p-1, 0x1.0ebff10fbb951p-4 },
+ { 0x1.e572a9a75f7b7p-1, 0x1.3aaf4d7805d11p-4 },
+ { 0x1.e1e0e2c530207p-1, 0x1.664ba81a4d717p-4 },
+ { 0x1.de5c72d8a8be3p-1, 0x1.9196387da6de4p-4 },
+ { 0x1.dae50fa5658ccp-1, 0x1.bc902f2b7796p-4 },
+ { 0x1.d77a71145a2dap-1, 0x1.e73ab5f584f28p-4 },
+ { 0x1.d41c51166623ep-1, 0x1.08cb78510d232p-3 },
+ { 0x1.d0ca6ba0bb29fp-1, 0x1.1dd2fe2f0dcb5p-3 },
+ { 0x1.cd847e8e59681p-1, 0x1.32b4784400df4p-3 },
+ { 0x1.ca4a499693e00p-1, 0x1.47706f3d49942p-3 },
+ { 0x1.c71b8e399e821p-1, 0x1.5c0768ee4a4dcp-3 },
+ { 0x1.c3f80faf19077p-1, 0x1.7079e86fc7c6dp-3 },
+ { 0x1.c0df92dc2b0ecp-1, 0x1.84c86e1183467p-3 },
+ { 0x1.bdd1de3cbb542p-1, 0x1.98f377a34b499p-3 },
+ { 0x1.baceb9e1007a3p-1, 0x1.acfb803bc924bp-3 },
+ { 0x1.b7d5ef543e55ep-1, 0x1.c0e10098b025fp-3 },
+ { 0x1.b4e749977d953p-1, 0x1.d4a46efe103efp-3 },
+ { 0x1.b20295155478ep-1, 0x1.e8463f45b8d0bp-3 },
+ { 0x1.af279f8e82be2p-1, 0x1.fbc6e3228997fp-3 },
+ { 0x1.ac5638197fdf3p-1, 0x1.079364f2e5aa8p-2 },
+ { 0x1.a98e2f102e087p-1, 0x1.1133306010a63p-2 },
+ { 0x1.a6cf5606d05c1p-1, 0x1.1ac309631bd17p-2 },
+ { 0x1.a4197fc04d746p-1, 0x1.24432485370c1p-2 },
+ { 0x1.a16c80293dc01p-1, 0x1.2db3b5449132fp-2 },
+ { 0x1.9ec82c4dc5bc9p-1, 0x1.3714ee1d7a32p-2 },
+ { 0x1.9c2c5a491f534p-1, 0x1.406700ab52c94p-2 },
+ { 0x1.9998e1480b618p-1, 0x1.49aa1d87522b2p-2 },
+ { 0x1.970d9977c6c2dp-1, 0x1.52de746d7ecb2p-2 },
+ { 0x1.948a5c023d212p-1, 0x1.5c0434336b343p-2 },
+ { 0x1.920f0303d6809p-1, 0x1.651b8ad6c90d1p-2 },
+ { 0x1.8f9b698a98b45p-1, 0x1.6e24a56ab5831p-2 },
+ { 0x1.8d2f6b81726f6p-1, 0x1.771fb04ec29b1p-2 },
+ { 0x1.8acae5bb55badp-1, 0x1.800cd6f19c25ep-2 },
+ { 0x1.886db5d9275b8p-1, 0x1.88ec441df11dfp-2 },
+ { 0x1.8617ba567c13cp-1, 0x1.91be21b7c93f5p-2 },
+ { 0x1.83c8d27487800p-1, 0x1.9a8298f8c7454p-2 },
+ { 0x1.8180de3c5dbe7p-1, 0x1.a339d255c04ddp-2 },
+ { 0x1.7f3fbe71cdb71p-1, 0x1.abe3f59f43db7p-2 },
+ { 0x1.7d055498071c1p-1, 0x1.b48129deca9efp-2 },
+ { 0x1.7ad182e54f65ap-1, 0x1.bd119575364c1p-2 },
+ { 0x1.78a42c3c90125p-1, 0x1.c5955e23ebcbcp-2 },
+ { 0x1.767d342f76944p-1, 0x1.ce0ca8f4e1557p-2 },
+ { 0x1.745c7ef26b00ap-1, 0x1.d6779a5a75774p-2 },
+ { 0x1.7241f15769d0fp-1, 0x1.ded6563550d27p-2 },
+ { 0x1.702d70d396e41p-1, 0x1.e728ffafd840ep-2 },
+ { 0x1.6e1ee3700cd11p-1, 0x1.ef6fb96c8d739p-2 },
+ { 0x1.6c162fc9cbe02p-1, 0x1.f7aaa57907219p-2 } }
+};
diff --git a/contrib/arm-optimized-routines/pl/math/v_log2f_2u5.c b/contrib/arm-optimized-routines/pl/math/v_log2f_2u5.c
new file mode 100644
index 000000000000..c64d88742136
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log2f_2u5.c
@@ -0,0 +1,77 @@
+/*
+ * Single-precision vector log2 function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ uint32x4_t min_norm;
+ uint16x8_t special_bound;
+ uint32x4_t off, mantissa_mask;
+ float32x4_t poly[9];
+} data = {
+ /* Coefficients generated using Remez algorithm approximate
+ log2(1+r)/r for r in [ -1/3, 1/3 ].
+ rel error: 0x1.c4c4b0cp-26. */
+ .poly = { V4 (0x1.715476p0f), /* (float)(1 / ln(2)). */
+ V4 (-0x1.715458p-1f), V4 (0x1.ec701cp-2f), V4 (-0x1.7171a4p-2f),
+ V4 (0x1.27a0b8p-2f), V4 (-0x1.e5143ep-3f), V4 (0x1.9d8ecap-3f),
+ V4 (-0x1.c675bp-3f), V4 (0x1.9e495p-3f) },
+ .min_norm = V4 (0x00800000),
+ .special_bound = V8 (0x7f00), /* asuint32(inf) - min_norm. */
+ .off = V4 (0x3f2aaaab), /* 0.666667. */
+ .mantissa_mask = V4 (0x007fffff),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t n, float32x4_t p, float32x4_t r,
+ uint16x4_t cmp)
+{
+ /* Fall back to scalar code. */
+ return v_call_f32 (log2f, x, vfmaq_f32 (n, p, r), vmovl_u16 (cmp));
+}
+
+/* Fast implementation for single precision AdvSIMD log2,
+ relies on same argument reduction as AdvSIMD logf.
+ Maximum error: 2.48 ULPs
+ _ZGVnN4v_log2f(0x1.558174p+0) got 0x1.a9be84p-2
+ want 0x1.a9be8p-2. */
+float32x4_t VPCS_ATTR V_NAME_F1 (log2) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ uint32x4_t u = vreinterpretq_u32_f32 (x);
+ uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm),
+ vget_low_u16 (d->special_bound));
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ u = vsubq_u32 (u, d->off);
+ float32x4_t n = vcvtq_f32_s32 (
+ vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */
+ u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off);
+ float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+ /* y = log2(1+r) + n. */
+ float32x4_t r2 = vmulq_f32 (r, r);
+ float32x4_t p = v_pw_horner_8_f32 (r, r2, d->poly);
+
+ if (unlikely (v_any_u16h (special)))
+ return special_case (x, n, p, r, special);
+ return vfmaq_f32 (n, p, r);
+}
+
+PL_SIG (V, F, 1, log2, 0.01, 11.1)
+PL_TEST_ULP (V_NAME_F1 (log2), 1.99)
+PL_TEST_EXPECT_FENV_ALWAYS (V_NAME_F1 (log2))
+PL_TEST_INTERVAL (V_NAME_F1 (log2), -0.0, -0x1p126, 100)
+PL_TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-149, 0x1p-126, 4000)
+PL_TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-126, 0x1p-23, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (log2), 0x1p-23, 1.0, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (log2), 1.0, 100, 50000)
+PL_TEST_INTERVAL (V_NAME_F1 (log2), 100, inf, 50000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_log_data.c b/contrib/arm-optimized-routines/pl/math/v_log_data.c
new file mode 100644
index 000000000000..a26e8a051d97
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log_data.c
@@ -0,0 +1,161 @@
+/*
+ * Lookup table for double-precision log(x) vector function.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct v_log_data __v_log_data = {
+ /* Worst-case error: 1.17 + 0.5 ulp.
+ Rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ]. */
+ .poly = { -0x1.ffffffffffff7p-2, 0x1.55555555170d4p-2, -0x1.0000000399c27p-2,
+ 0x1.999b2e90e94cap-3, -0x1.554e550bd501ep-3 },
+ .ln2 = 0x1.62e42fefa39efp-1,
+ /* Algorithm:
+
+ x = 2^k z
+ log(x) = k ln2 + log(c) + poly(z/c - 1)
+
+ where z is in [a;2a) which is split into N subintervals (a=0x1.69009p-1,
+ N=128) and log(c) and 1/c for the ith subinterval comes from two lookup
+ tables:
+
+ table[i].invc = 1/c
+ table[i].logc = (double)log(c)
+
+ where c is near the center of the subinterval and is chosen by trying
+ several floating point invc candidates around 1/center and selecting one
+ for which the error in (double)log(c) is minimized (< 0x1p-74), except the
+ subinterval that contains 1 and the previous one got tweaked to avoid
+ cancellation. */
+ .table = { { 0x1.6a133d0dec120p+0, -0x1.62fe995eb963ap-2 },
+ { 0x1.6815f2f3e42edp+0, -0x1.5d5a48dad6b67p-2 },
+ { 0x1.661e39be1ac9ep+0, -0x1.57bde257d2769p-2 },
+ { 0x1.642bfa30ac371p+0, -0x1.52294fbf2af55p-2 },
+ { 0x1.623f1d916f323p+0, -0x1.4c9c7b598aa38p-2 },
+ { 0x1.60578da220f65p+0, -0x1.47174fc5ff560p-2 },
+ { 0x1.5e75349dea571p+0, -0x1.4199b7fa7b5cap-2 },
+ { 0x1.5c97fd387a75ap+0, -0x1.3c239f48cfb99p-2 },
+ { 0x1.5abfd2981f200p+0, -0x1.36b4f154d2aebp-2 },
+ { 0x1.58eca051dc99cp+0, -0x1.314d9a0ff32fbp-2 },
+ { 0x1.571e526d9df12p+0, -0x1.2bed85cca3cffp-2 },
+ { 0x1.5554d555b3fcbp+0, -0x1.2694a11421af9p-2 },
+ { 0x1.539015e2a20cdp+0, -0x1.2142d8d014fb2p-2 },
+ { 0x1.51d0014ee0164p+0, -0x1.1bf81a2c77776p-2 },
+ { 0x1.50148538cd9eep+0, -0x1.16b452a39c6a4p-2 },
+ { 0x1.4e5d8f9f698a1p+0, -0x1.11776ffa6c67ep-2 },
+ { 0x1.4cab0edca66bep+0, -0x1.0c416035020e0p-2 },
+ { 0x1.4afcf1a9db874p+0, -0x1.071211aa10fdap-2 },
+ { 0x1.495327136e16fp+0, -0x1.01e972e293b1bp-2 },
+ { 0x1.47ad9e84af28fp+0, -0x1.f98ee587fd434p-3 },
+ { 0x1.460c47b39ae15p+0, -0x1.ef5800ad716fbp-3 },
+ { 0x1.446f12b278001p+0, -0x1.e52e160484698p-3 },
+ { 0x1.42d5efdd720ecp+0, -0x1.db1104b19352ep-3 },
+ { 0x1.4140cfe001a0fp+0, -0x1.d100ac59e0bd6p-3 },
+ { 0x1.3fafa3b421f69p+0, -0x1.c6fced287c3bdp-3 },
+ { 0x1.3e225c9c8ece5p+0, -0x1.bd05a7b317c29p-3 },
+ { 0x1.3c98ec29a211ap+0, -0x1.b31abd229164fp-3 },
+ { 0x1.3b13442a413fep+0, -0x1.a93c0edadb0a3p-3 },
+ { 0x1.399156baa3c54p+0, -0x1.9f697ee30d7ddp-3 },
+ { 0x1.38131639b4cdbp+0, -0x1.95a2efa9aa40ap-3 },
+ { 0x1.36987540fbf53p+0, -0x1.8be843d796044p-3 },
+ { 0x1.352166b648f61p+0, -0x1.82395ecc477edp-3 },
+ { 0x1.33adddb3eb575p+0, -0x1.7896240966422p-3 },
+ { 0x1.323dcd99fc1d3p+0, -0x1.6efe77aca8c55p-3 },
+ { 0x1.30d129fefc7d2p+0, -0x1.65723e117ec5cp-3 },
+ { 0x1.2f67e6b72fe7dp+0, -0x1.5bf15c0955706p-3 },
+ { 0x1.2e01f7cf8b187p+0, -0x1.527bb6c111da1p-3 },
+ { 0x1.2c9f518ddc86ep+0, -0x1.491133c939f8fp-3 },
+ { 0x1.2b3fe86e5f413p+0, -0x1.3fb1b90c7fc58p-3 },
+ { 0x1.29e3b1211b25cp+0, -0x1.365d2cc485f8dp-3 },
+ { 0x1.288aa08b373cfp+0, -0x1.2d13758970de7p-3 },
+ { 0x1.2734abcaa8467p+0, -0x1.23d47a721fd47p-3 },
+ { 0x1.25e1c82459b81p+0, -0x1.1aa0229f25ec2p-3 },
+ { 0x1.2491eb1ad59c5p+0, -0x1.117655ddebc3bp-3 },
+ { 0x1.23450a54048b5p+0, -0x1.0856fbf83ab6bp-3 },
+ { 0x1.21fb1bb09e578p+0, -0x1.fe83fabbaa106p-4 },
+ { 0x1.20b415346d8f7p+0, -0x1.ec6e8507a56cdp-4 },
+ { 0x1.1f6fed179a1acp+0, -0x1.da6d68c7cc2eap-4 },
+ { 0x1.1e2e99b93c7b3p+0, -0x1.c88078462be0cp-4 },
+ { 0x1.1cf011a7a882ap+0, -0x1.b6a786a423565p-4 },
+ { 0x1.1bb44b97dba5ap+0, -0x1.a4e2676ac7f85p-4 },
+ { 0x1.1a7b3e66cdd4fp+0, -0x1.9330eea777e76p-4 },
+ { 0x1.1944e11dc56cdp+0, -0x1.8192f134d5ad9p-4 },
+ { 0x1.18112aebb1a6ep+0, -0x1.70084464f0538p-4 },
+ { 0x1.16e013231b7e9p+0, -0x1.5e90bdec5cb1fp-4 },
+ { 0x1.15b1913f156cfp+0, -0x1.4d2c3433c5536p-4 },
+ { 0x1.14859cdedde13p+0, -0x1.3bda7e219879ap-4 },
+ { 0x1.135c2dc68cfa4p+0, -0x1.2a9b732d27194p-4 },
+ { 0x1.12353bdb01684p+0, -0x1.196eeb2b10807p-4 },
+ { 0x1.1110bf25b85b4p+0, -0x1.0854be8ef8a7ep-4 },
+ { 0x1.0feeafd2f8577p+0, -0x1.ee998cb277432p-5 },
+ { 0x1.0ecf062c51c3bp+0, -0x1.ccadb79919fb9p-5 },
+ { 0x1.0db1baa076c8bp+0, -0x1.aae5b1d8618b0p-5 },
+ { 0x1.0c96c5bb3048ep+0, -0x1.89413015d7442p-5 },
+ { 0x1.0b7e20263e070p+0, -0x1.67bfe7bf158dep-5 },
+ { 0x1.0a67c2acd0ce3p+0, -0x1.46618f83941bep-5 },
+ { 0x1.0953a6391e982p+0, -0x1.2525df1b0618ap-5 },
+ { 0x1.0841c3caea380p+0, -0x1.040c8e2f77c6ap-5 },
+ { 0x1.07321489b13eap+0, -0x1.c62aad39f738ap-6 },
+ { 0x1.062491aee9904p+0, -0x1.847fe3bdead9cp-6 },
+ { 0x1.05193497a7cc5p+0, -0x1.43183683400acp-6 },
+ { 0x1.040ff6b5f5e9fp+0, -0x1.01f31c4e1d544p-6 },
+ { 0x1.0308d19aa6127p+0, -0x1.82201d1e6b69ap-7 },
+ { 0x1.0203beedb0c67p+0, -0x1.00dd0f3e1bfd6p-7 },
+ { 0x1.010037d38bcc2p+0, -0x1.ff6fe1feb4e53p-9 },
+ { 1.0, 0.0 },
+ { 0x1.fc06d493cca10p-1, 0x1.fe91885ec8e20p-8 },
+ { 0x1.f81e6ac3b918fp-1, 0x1.fc516f716296dp-7 },
+ { 0x1.f44546ef18996p-1, 0x1.7bb4dd70a015bp-6 },
+ { 0x1.f07b10382c84bp-1, 0x1.f84c99b34b674p-6 },
+ { 0x1.ecbf7070e59d4p-1, 0x1.39f9ce4fb2d71p-5 },
+ { 0x1.e91213f715939p-1, 0x1.7756c0fd22e78p-5 },
+ { 0x1.e572a9a75f7b7p-1, 0x1.b43ee82db8f3ap-5 },
+ { 0x1.e1e0e2c530207p-1, 0x1.f0b3fced60034p-5 },
+ { 0x1.de5c72d8a8be3p-1, 0x1.165bd78d4878ep-4 },
+ { 0x1.dae50fa5658ccp-1, 0x1.3425d2715ebe6p-4 },
+ { 0x1.d77a71145a2dap-1, 0x1.51b8bd91b7915p-4 },
+ { 0x1.d41c51166623ep-1, 0x1.6f15632c76a47p-4 },
+ { 0x1.d0ca6ba0bb29fp-1, 0x1.8c3c88ecbe503p-4 },
+ { 0x1.cd847e8e59681p-1, 0x1.a92ef077625dap-4 },
+ { 0x1.ca4a499693e00p-1, 0x1.c5ed5745fa006p-4 },
+ { 0x1.c71b8e399e821p-1, 0x1.e27876de1c993p-4 },
+ { 0x1.c3f80faf19077p-1, 0x1.fed104fce4cdcp-4 },
+ { 0x1.c0df92dc2b0ecp-1, 0x1.0d7bd9c17d78bp-3 },
+ { 0x1.bdd1de3cbb542p-1, 0x1.1b76986cef97bp-3 },
+ { 0x1.baceb9e1007a3p-1, 0x1.295913d24f750p-3 },
+ { 0x1.b7d5ef543e55ep-1, 0x1.37239fa295d17p-3 },
+ { 0x1.b4e749977d953p-1, 0x1.44d68dd78714bp-3 },
+ { 0x1.b20295155478ep-1, 0x1.52722ebe5d780p-3 },
+ { 0x1.af279f8e82be2p-1, 0x1.5ff6d12671f98p-3 },
+ { 0x1.ac5638197fdf3p-1, 0x1.6d64c2389484bp-3 },
+ { 0x1.a98e2f102e087p-1, 0x1.7abc4da40fddap-3 },
+ { 0x1.a6cf5606d05c1p-1, 0x1.87fdbda1e8452p-3 },
+ { 0x1.a4197fc04d746p-1, 0x1.95295b06a5f37p-3 },
+ { 0x1.a16c80293dc01p-1, 0x1.a23f6d34abbc5p-3 },
+ { 0x1.9ec82c4dc5bc9p-1, 0x1.af403a28e04f2p-3 },
+ { 0x1.9c2c5a491f534p-1, 0x1.bc2c06a85721ap-3 },
+ { 0x1.9998e1480b618p-1, 0x1.c903161240163p-3 },
+ { 0x1.970d9977c6c2dp-1, 0x1.d5c5aa93287ebp-3 },
+ { 0x1.948a5c023d212p-1, 0x1.e274051823fa9p-3 },
+ { 0x1.920f0303d6809p-1, 0x1.ef0e656300c16p-3 },
+ { 0x1.8f9b698a98b45p-1, 0x1.fb9509f05aa2ap-3 },
+ { 0x1.8d2f6b81726f6p-1, 0x1.04041821f37afp-2 },
+ { 0x1.8acae5bb55badp-1, 0x1.0a340a49b3029p-2 },
+ { 0x1.886db5d9275b8p-1, 0x1.105a7918a126dp-2 },
+ { 0x1.8617ba567c13cp-1, 0x1.1677819812b84p-2 },
+ { 0x1.83c8d27487800p-1, 0x1.1c8b405b40c0ep-2 },
+ { 0x1.8180de3c5dbe7p-1, 0x1.2295d16cfa6b1p-2 },
+ { 0x1.7f3fbe71cdb71p-1, 0x1.28975066318a2p-2 },
+ { 0x1.7d055498071c1p-1, 0x1.2e8fd855d86fcp-2 },
+ { 0x1.7ad182e54f65ap-1, 0x1.347f83d605e59p-2 },
+ { 0x1.78a42c3c90125p-1, 0x1.3a666d1244588p-2 },
+ { 0x1.767d342f76944p-1, 0x1.4044adb6f8ec4p-2 },
+ { 0x1.745c7ef26b00ap-1, 0x1.461a5f077558cp-2 },
+ { 0x1.7241f15769d0fp-1, 0x1.4be799e20b9c8p-2 },
+ { 0x1.702d70d396e41p-1, 0x1.51ac76a6b79dfp-2 },
+ { 0x1.6e1ee3700cd11p-1, 0x1.57690d5744a45p-2 },
+ { 0x1.6c162fc9cbe02p-1, 0x1.5d1d758e45217p-2 } }
+};
diff --git a/contrib/arm-optimized-routines/pl/math/v_log_inline.h b/contrib/arm-optimized-routines/pl/math/v_log_inline.h
new file mode 100644
index 000000000000..2df00cf4ddf4
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_log_inline.h
@@ -0,0 +1,104 @@
+/*
+ * Double-precision vector log(x) function - inline version
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "math_config.h"
+
+#ifndef V_LOG_INLINE_POLY_ORDER
+# error Cannot use inline log helper without specifying poly order (options are 4 or 5)
+#endif
+
+#if V_LOG_INLINE_POLY_ORDER == 4
+# define POLY \
+ { \
+ V2 (-0x1.ffffffffcbad3p-2), V2 (0x1.555555578ed68p-2), \
+ V2 (-0x1.0000d3a1e7055p-2), V2 (0x1.999392d02a63ep-3) \
+ }
+#elif V_LOG_INLINE_POLY_ORDER == 5
+# define POLY \
+ { \
+ V2 (-0x1.ffffffffffff7p-2), V2 (0x1.55555555170d4p-2), \
+ V2 (-0x1.0000000399c27p-2), V2 (0x1.999b2e90e94cap-3), \
+ V2 (-0x1.554e550bd501ep-3) \
+ }
+#else
+# error Can only choose order 4 or 5 for log poly
+#endif
+
+struct v_log_inline_data
+{
+ float64x2_t poly[V_LOG_INLINE_POLY_ORDER];
+ float64x2_t ln2;
+ uint64x2_t off, sign_exp_mask;
+};
+
+#define V_LOG_CONSTANTS \
+ { \
+ .poly = POLY, .ln2 = V2 (0x1.62e42fefa39efp-1), \
+ .sign_exp_mask = V2 (0xfff0000000000000), .off = V2 (0x3fe6900900000000) \
+ }
+
+#define A(i) d->poly[i]
+#define N (1 << V_LOG_TABLE_BITS)
+#define IndexMask (N - 1)
+
+struct entry
+{
+ float64x2_t invc;
+ float64x2_t logc;
+};
+
+static inline struct entry
+log_lookup (uint64x2_t i)
+{
+ /* Since N is a power of 2, n % N = n & (N - 1). */
+ struct entry e;
+ uint64_t i0 = (i[0] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+ uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask;
+ float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc);
+ float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc);
+ e.invc = vuzp1q_f64 (e0, e1);
+ e.logc = vuzp2q_f64 (e0, e1);
+ return e;
+}
+
+static inline float64x2_t
+v_log_inline (float64x2_t x, const struct v_log_inline_data *d)
+{
+ float64x2_t z, r, r2, p, y, kd, hi;
+ uint64x2_t ix, iz, tmp;
+ int64x2_t k;
+ struct entry e;
+
+ ix = vreinterpretq_u64_f64 (x);
+
+ /* x = 2^k z; where z is in range [Off,2*Off) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ tmp = vsubq_u64 (ix, d->off);
+ k = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift. */
+ iz = vsubq_u64 (ix, vandq_u64 (tmp, d->sign_exp_mask));
+ z = vreinterpretq_f64_u64 (iz);
+ e = log_lookup (tmp);
+
+ /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
+ r = vfmaq_f64 (v_f64 (-1.0), z, e.invc);
+ kd = vcvtq_f64_s64 (k);
+
+ /* hi = r + log(c) + k*Ln2. */
+ hi = vfmaq_f64 (vaddq_f64 (e.logc, r), kd, d->ln2);
+ /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */
+ r2 = vmulq_f64 (r, r);
+ y = vfmaq_f64 (A (2), A (3), r);
+ p = vfmaq_f64 (A (0), A (1), r);
+#if V_LOG_POLY_ORDER == 5
+ y = vfmaq_f64 (y, A (4), r2);
+#endif
+ y = vfmaq_f64 (p, y, r2);
+
+ return vfmaq_f64 (hi, y, r2);
+}
diff --git a/contrib/arm-optimized-routines/pl/math/v_logf_inline.h b/contrib/arm-optimized-routines/pl/math/v_logf_inline.h
new file mode 100644
index 000000000000..c00fe0909afc
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_logf_inline.h
@@ -0,0 +1,59 @@
+/*
+ * Single-precision vector log function - inline version
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+
+struct v_logf_data
+{
+ float32x4_t poly[7];
+ float32x4_t ln2;
+ uint32x4_t off, mantissa_mask;
+};
+
+#define V_LOGF_CONSTANTS \
+ { \
+ .poly \
+ = { V4 (-0x1.3e737cp-3f), V4 (0x1.5a9aa2p-3f), V4 (-0x1.4f9934p-3f), \
+ V4 (0x1.961348p-3f), V4 (-0x1.00187cp-2f), V4 (0x1.555d7cp-2f), \
+ V4 (-0x1.ffffc8p-2f) }, \
+ .ln2 = V4 (0x1.62e43p-1f), .off = V4 (0x3f2aaaab), \
+ .mantissa_mask = V4 (0x007fffff) \
+ }
+
+#define P(i) d->poly[7 - i]
+
+static inline float32x4_t
+v_logf_inline (float32x4_t x, const struct v_logf_data *d)
+{
+ float32x4_t n, p, q, r, r2, y;
+ uint32x4_t u;
+
+ u = vreinterpretq_u32_f32 (x);
+
+ /* x = 2^n * (1+r), where 2/3 < 1+r < 4/3. */
+ u = vsubq_u32 (u, d->off);
+ n = vcvtq_f32_s32 (
+ vshrq_n_s32 (vreinterpretq_s32_u32 (u), 23)); /* signextend. */
+ u = vandq_u32 (u, d->mantissa_mask);
+ u = vaddq_u32 (u, d->off);
+ r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (1.0f));
+
+ /* y = log(1+r) + n*ln2. */
+ r2 = vmulq_f32 (r, r);
+ /* n*ln2 + r + r2*(P1 + r*P2 + r2*(P3 + r*P4 + r2*(P5 + r*P6 + r2*P7))). */
+ p = vfmaq_f32 (P (5), P (6), r);
+ q = vfmaq_f32 (P (3), P (4), r);
+ y = vfmaq_f32 (P (1), P (2), r);
+ p = vfmaq_f32 (p, P (7), r2);
+ q = vfmaq_f32 (q, p, r2);
+ y = vfmaq_f32 (y, q, r2);
+ p = vfmaq_f32 (r, d->ln2, n);
+
+ return vfmaq_f32 (p, y, r2);
+}
+
+#undef P
diff --git a/contrib/arm-optimized-routines/pl/math/v_math.h b/contrib/arm-optimized-routines/pl/math/v_math.h
new file mode 100644
index 000000000000..1b10929faccc
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_math.h
@@ -0,0 +1,175 @@
+/*
+ * Vector math abstractions.
+ *
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _V_MATH_H
+#define _V_MATH_H
+
+#ifndef WANT_VMATH
+/* Enable the build of vector math code. */
+# define WANT_VMATH 1
+#endif
+
+#if WANT_VMATH
+
+# if __aarch64__
+# define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
+# else
+# error "Cannot build without AArch64"
+# endif
+
+# include <stdint.h>
+# include "math_config.h"
+# if __aarch64__
+
+# include <arm_neon.h>
+
+/* Shorthand helpers for declaring constants. */
+# define V2(X) { X, X }
+# define V4(X) { X, X, X, X }
+# define V8(X) { X, X, X, X, X, X, X, X }
+
+static inline int
+v_any_u16h (uint16x4_t x)
+{
+ return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
+}
+
+static inline float32x4_t
+v_f32 (float x)
+{
+ return (float32x4_t) V4 (x);
+}
+static inline uint32x4_t
+v_u32 (uint32_t x)
+{
+ return (uint32x4_t) V4 (x);
+}
+static inline int32x4_t
+v_s32 (int32_t x)
+{
+ return (int32x4_t) V4 (x);
+}
+
+/* true if any elements of a vector compare result is non-zero. */
+static inline int
+v_any_u32 (uint32x4_t x)
+{
+ /* assume elements in x are either 0 or -1u. */
+ return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
+}
+static inline int
+v_any_u32h (uint32x2_t x)
+{
+ return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
+}
+static inline float32x4_t
+v_lookup_f32 (const float *tab, uint32x4_t idx)
+{
+ return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
+}
+static inline uint32x4_t
+v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
+{
+ return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
+}
+static inline float32x4_t
+v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
+{
+ return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
+ p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] };
+}
+static inline float32x4_t
+v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
+ float32x4_t y, uint32x4_t p)
+{
+ return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0],
+ p[1] ? f (x1[1], x2[1]) : y[1],
+ p[2] ? f (x1[2], x2[2]) : y[2],
+ p[3] ? f (x1[3], x2[3]) : y[3] };
+}
+static inline float32x4_t
+v_zerofy_f32 (float32x4_t x, uint32x4_t mask)
+{
+ return vreinterpretq_f32_u32 (vbicq_u32 (vreinterpretq_u32_f32 (x), mask));
+}
+
+static inline float64x2_t
+v_f64 (double x)
+{
+ return (float64x2_t) V2 (x);
+}
+static inline uint64x2_t
+v_u64 (uint64_t x)
+{
+ return (uint64x2_t) V2 (x);
+}
+static inline int64x2_t
+v_s64 (int64_t x)
+{
+ return (int64x2_t) V2 (x);
+}
+
+/* true if any elements of a vector compare result is non-zero. */
+static inline int
+v_any_u64 (uint64x2_t x)
+{
+ /* assume elements in x are either 0 or -1u. */
+ return vpaddd_u64 (x) != 0;
+}
+/* true if all elements of a vector compare result is 1. */
+static inline int
+v_all_u64 (uint64x2_t x)
+{
+ /* assume elements in x are either 0 or -1u. */
+ return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
+}
+static inline float64x2_t
+v_lookup_f64 (const double *tab, uint64x2_t idx)
+{
+ return (float64x2_t){ tab[idx[0]], tab[idx[1]] };
+}
+static inline uint64x2_t
+v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
+{
+ return (uint64x2_t){ tab[idx[0]], tab[idx[1]] };
+}
+
+static inline float64x2_t
+v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
+{
+ double p1 = p[1];
+ double x1 = x[1];
+ if (likely (p[0]))
+ y[0] = f (x[0]);
+ if (likely (p1))
+ y[1] = f (x1);
+ return y;
+}
+
+static inline float64x2_t
+v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,
+ float64x2_t y, uint64x2_t p)
+{
+ double p1 = p[1];
+ double x1h = x1[1];
+ double x2h = x2[1];
+ if (likely (p[0]))
+ y[0] = f (x1[0], x2[0]);
+ if (likely (p1))
+ y[1] = f (x1h, x2h);
+ return y;
+}
+static inline float64x2_t
+v_zerofy_f64 (float64x2_t x, uint64x2_t mask)
+{
+ return vreinterpretq_f64_u64 (vbicq_u64 (vreinterpretq_u64_f64 (x), mask));
+}
+
+# endif
+#endif
+
+#endif
diff --git a/contrib/arm-optimized-routines/pl/math/v_pow_1u5.c b/contrib/arm-optimized-routines/pl/math/v_pow_1u5.c
new file mode 100644
index 000000000000..9053347d4e35
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_pow_1u5.c
@@ -0,0 +1,259 @@
+/*
+ * Double-precision vector pow function.
+ *
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+/* Defines parameters of the approximation and scalar fallback. */
+#include "finite_pow.h"
+
+#define VecSmallExp v_u64 (SmallExp)
+#define VecThresExp v_u64 (ThresExp)
+
+#define VecSmallPowX v_u64 (SmallPowX)
+#define VecThresPowX v_u64 (ThresPowX)
+#define VecSmallPowY v_u64 (SmallPowY)
+#define VecThresPowY v_u64 (ThresPowY)
+
+static const struct data
+{
+ float64x2_t log_poly[7];
+ float64x2_t exp_poly[3];
+ float64x2_t ln2_hi, ln2_lo;
+ float64x2_t shift, inv_ln2_n, ln2_hi_n, ln2_lo_n;
+} data = {
+ /* Coefficients copied from v_pow_log_data.c
+ relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]
+ Coefficients are scaled to match the scaling during evaluation. */
+ .log_poly = { V2 (-0x1p-1), V2 (0x1.555555555556p-2 * -2),
+ V2 (-0x1.0000000000006p-2 * -2), V2 (0x1.999999959554ep-3 * 4),
+ V2 (-0x1.555555529a47ap-3 * 4), V2 (0x1.2495b9b4845e9p-3 * -8),
+ V2 (-0x1.0002b8b263fc3p-3 * -8) },
+ .ln2_hi = V2 (0x1.62e42fefa3800p-1),
+ .ln2_lo = V2 (0x1.ef35793c76730p-45),
+ /* Polynomial coefficients: abs error: 1.43*2^-58, ulp error: 0.549
+ (0.550 without fma) if |x| < ln2/512. */
+ .exp_poly = { V2 (0x1.fffffffffffd4p-2), V2 (0x1.5555571d6ef9p-3),
+ V2 (0x1.5555576a5adcep-5) },
+ .shift = V2 (0x1.8p52), /* round to nearest int. without intrinsics. */
+ .inv_ln2_n = V2 (0x1.71547652b82fep8), /* N/ln2. */
+ .ln2_hi_n = V2 (0x1.62e42fefc0000p-9), /* ln2/N. */
+ .ln2_lo_n = V2 (-0x1.c610ca86c3899p-45),
+};
+
+#define A(i) data.log_poly[i]
+#define C(i) data.exp_poly[i]
+
+/* This version implements an algorithm close to AOR scalar pow but
+ - does not implement the trick in the exp's specialcase subroutine to avoid
+ double-rounding,
+ - does not use a tail in the exponential core computation,
+ - and pow's exp polynomial order and table bits might differ.
+
+ Maximum measured error is 1.04 ULPs:
+ _ZGVnN2vv_pow(0x1.024a3e56b3c3p-136, 0x1.87910248b58acp-13)
+ got 0x1.f71162f473251p-1
+ want 0x1.f71162f473252p-1. */
+
+static inline float64x2_t
+v_masked_lookup_f64 (const double *table, uint64x2_t i)
+{
+ return (float64x2_t){
+ table[(i[0] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)],
+ table[(i[1] >> (52 - V_POW_LOG_TABLE_BITS)) & (N_LOG - 1)]
+ };
+}
+
+/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
+ additional 15 bits precision. IX is the bit representation of x, but
+ normalized in the subnormal range using the sign bit for the exponent. */
+static inline float64x2_t
+v_log_inline (uint64x2_t ix, float64x2_t *tail, const struct data *d)
+{
+ /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
+ The range is split into N subintervals.
+ The ith subinterval contains z and c is near its center. */
+ uint64x2_t tmp = vsubq_u64 (ix, v_u64 (Off));
+ int64x2_t k
+ = vshrq_n_s64 (vreinterpretq_s64_u64 (tmp), 52); /* arithmetic shift. */
+ uint64x2_t iz = vsubq_u64 (ix, vandq_u64 (tmp, v_u64 (0xfffULL << 52)));
+ float64x2_t z = vreinterpretq_f64_u64 (iz);
+ float64x2_t kd = vcvtq_f64_s64 (k);
+ /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
+ float64x2_t invc = v_masked_lookup_f64 (__v_pow_log_data.invc, tmp);
+ float64x2_t logc = v_masked_lookup_f64 (__v_pow_log_data.logc, tmp);
+ float64x2_t logctail = v_masked_lookup_f64 (__v_pow_log_data.logctail, tmp);
+ /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
+ |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
+ float64x2_t r = vfmaq_f64 (v_f64 (-1.0), z, invc);
+ /* k*Ln2 + log(c) + r. */
+ float64x2_t t1 = vfmaq_f64 (logc, kd, d->ln2_hi);
+ float64x2_t t2 = vaddq_f64 (t1, r);
+ float64x2_t lo1 = vfmaq_f64 (logctail, kd, d->ln2_lo);
+ float64x2_t lo2 = vaddq_f64 (vsubq_f64 (t1, t2), r);
+ /* Evaluation is optimized assuming superscalar pipelined execution. */
+ float64x2_t ar = vmulq_f64 (A (0), r);
+ float64x2_t ar2 = vmulq_f64 (r, ar);
+ float64x2_t ar3 = vmulq_f64 (r, ar2);
+ /* k*Ln2 + log(c) + r + A[0]*r*r. */
+ float64x2_t hi = vaddq_f64 (t2, ar2);
+ float64x2_t lo3 = vfmaq_f64 (vnegq_f64 (ar2), ar, r);
+ float64x2_t lo4 = vaddq_f64 (vsubq_f64 (t2, hi), ar2);
+ /* p = log1p(r) - r - A[0]*r*r. */
+ float64x2_t a56 = vfmaq_f64 (A (5), r, A (6));
+ float64x2_t a34 = vfmaq_f64 (A (3), r, A (4));
+ float64x2_t a12 = vfmaq_f64 (A (1), r, A (2));
+ float64x2_t p = vfmaq_f64 (a34, ar2, a56);
+ p = vfmaq_f64 (a12, ar2, p);
+ p = vmulq_f64 (ar3, p);
+ float64x2_t lo
+ = vaddq_f64 (vaddq_f64 (vaddq_f64 (vaddq_f64 (lo1, lo2), lo3), lo4), p);
+ float64x2_t y = vaddq_f64 (hi, lo);
+ *tail = vaddq_f64 (vsubq_f64 (hi, y), lo);
+ return y;
+}
+
+/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. */
+static inline float64x2_t
+v_exp_inline (float64x2_t x, float64x2_t xtail, const struct data *d)
+{
+ /* Fallback to scalar exp_inline for all lanes if any lane
+ contains value of x s.t. |x| <= 2^-54 or >= 512. */
+ uint64x2_t abstop
+ = vandq_u64 (vshrq_n_u64 (vreinterpretq_u64_f64 (x), 52), v_u64 (0x7ff));
+ uint64x2_t uoflowx
+ = vcgeq_u64 (vsubq_u64 (abstop, VecSmallExp), VecThresExp);
+ if (unlikely (v_any_u64 (uoflowx)))
+ return v_call2_f64 (exp_nosignbias, x, xtail, x, v_u64 (-1));
+ /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
+ /* x = ln2/N*k + r, with k integer and r in [-ln2/2N, ln2/2N]. */
+ float64x2_t z = vmulq_f64 (d->inv_ln2_n, x);
+ /* z - kd is in [-1, 1] in non-nearest rounding modes. */
+ float64x2_t kd = vaddq_f64 (z, d->shift);
+ uint64x2_t ki = vreinterpretq_u64_f64 (kd);
+ kd = vsubq_f64 (kd, d->shift);
+ float64x2_t r = vfmsq_f64 (x, kd, d->ln2_hi_n);
+ r = vfmsq_f64 (r, kd, d->ln2_lo_n);
+ /* The code assumes 2^-200 < |xtail| < 2^-8/N. */
+ r = vaddq_f64 (r, xtail);
+ /* 2^(k/N) ~= scale. */
+ uint64x2_t idx = vandq_u64 (ki, v_u64 (N_EXP - 1));
+ uint64x2_t top = vshlq_n_u64 (ki, 52 - V_POW_EXP_TABLE_BITS);
+ /* This is only a valid scale when -1023*N < k < 1024*N. */
+ uint64x2_t sbits = v_lookup_u64 (SBits, idx);
+ sbits = vaddq_u64 (sbits, top);
+ /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (exp(r) - 1). */
+ float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t tmp = vfmaq_f64 (C (1), r, C (2));
+ tmp = vfmaq_f64 (C (0), r, tmp);
+ tmp = vfmaq_f64 (r, r2, tmp);
+ float64x2_t scale = vreinterpretq_f64_u64 (sbits);
+ /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
+ is no spurious underflow here even without fma. */
+ return vfmaq_f64 (scale, scale, tmp);
+}
+
+float64x2_t VPCS_ATTR V_NAME_D2 (pow) (float64x2_t x, float64x2_t y)
+{
+ const struct data *d = ptr_barrier (&data);
+ /* Case of x <= 0 is too complicated to be vectorised efficiently here,
+ fallback to scalar pow for all lanes if any x < 0 detected. */
+ if (v_any_u64 (vclezq_s64 (vreinterpretq_s64_f64 (x))))
+ return v_call2_f64 (__pl_finite_pow, x, y, x, v_u64 (-1));
+
+ uint64x2_t vix = vreinterpretq_u64_f64 (x);
+ uint64x2_t viy = vreinterpretq_u64_f64 (y);
+ uint64x2_t vtopx = vshrq_n_u64 (vix, 52);
+ uint64x2_t vtopy = vshrq_n_u64 (viy, 52);
+ uint64x2_t vabstopx = vandq_u64 (vtopx, v_u64 (0x7ff));
+ uint64x2_t vabstopy = vandq_u64 (vtopy, v_u64 (0x7ff));
+
+ /* Special cases of x or y. */
+#if WANT_SIMD_EXCEPT
+ /* Small or large. */
+ uint64x2_t specialx
+ = vcgeq_u64 (vsubq_u64 (vtopx, VecSmallPowX), VecThresPowX);
+ uint64x2_t specialy
+ = vcgeq_u64 (vsubq_u64 (vabstopy, VecSmallPowY), VecThresPowY);
+#else
+ /* Inf or nan. */
+ uint64x2_t specialx = vcgeq_u64 (vabstopx, v_u64 (0x7ff));
+ uint64x2_t specialy = vcgeq_u64 (vabstopy, v_u64 (0x7ff));
+ /* The case y==0 does not trigger a special case, since in this case it is
+ necessary to fix the result only if x is a signalling nan, which already
+ triggers a special case. We test y==0 directly in the scalar fallback. */
+#endif
+ uint64x2_t special = vorrq_u64 (specialx, specialy);
+ /* Fallback to scalar on all lanes if any lane is inf or nan. */
+ if (unlikely (v_any_u64 (special)))
+ return v_call2_f64 (__pl_finite_pow, x, y, x, v_u64 (-1));
+
+ /* Small cases of x: |x| < 0x1p-126. */
+ uint64x2_t smallx = vcltq_u64 (vabstopx, VecSmallPowX);
+ if (unlikely (v_any_u64 (smallx)))
+ {
+ /* Update ix if top 12 bits of x are 0. */
+ uint64x2_t sub_x = vceqzq_u64 (vtopx);
+ if (unlikely (v_any_u64 (sub_x)))
+ {
+ /* Normalize subnormal x so exponent becomes negative. */
+ uint64x2_t vix_norm
+ = vreinterpretq_u64_f64 (vmulq_f64 (x, v_f64 (0x1p52)));
+ vix_norm = vandq_u64 (vix_norm, v_u64 (0x7fffffffffffffff));
+ vix_norm = vsubq_u64 (vix_norm, v_u64 (52ULL << 52));
+ vix = vbslq_u64 (sub_x, vix_norm, vix);
+ }
+ }
+
+ /* Vector Log(ix, &lo). */
+ float64x2_t vlo;
+ float64x2_t vhi = v_log_inline (vix, &vlo, d);
+
+ /* Vector Exp(y_loghi, y_loglo). */
+ float64x2_t vehi = vmulq_f64 (y, vhi);
+ float64x2_t velo = vmulq_f64 (y, vlo);
+ float64x2_t vemi = vfmsq_f64 (vehi, y, vhi);
+ velo = vsubq_f64 (velo, vemi);
+ return v_exp_inline (vehi, velo, d);
+}
+
+PL_SIG (V, D, 2, pow)
+PL_TEST_ULP (V_NAME_D2 (pow), 0.55)
+PL_TEST_EXPECT_FENV (V_NAME_D2 (pow), WANT_SIMD_EXCEPT)
+/* Wide intervals spanning the whole domain but shared between x and y. */
+#define V_POW_INTERVAL2(xlo, xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL2 (V_NAME_D2 (pow), xlo, xhi, -ylo, -yhi, n) \
+ PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, ylo, yhi, n) \
+ PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -xlo, -xhi, -ylo, -yhi, n)
+#define EXPAND(str) str##000000000
+#define SHL52(str) EXPAND (str)
+V_POW_INTERVAL2 (0, SHL52 (SmallPowX), 0, inf, 40000)
+V_POW_INTERVAL2 (SHL52 (SmallPowX), SHL52 (BigPowX), 0, inf, 40000)
+V_POW_INTERVAL2 (SHL52 (BigPowX), inf, 0, inf, 40000)
+V_POW_INTERVAL2 (0, inf, 0, SHL52 (SmallPowY), 40000)
+V_POW_INTERVAL2 (0, inf, SHL52 (SmallPowY), SHL52 (BigPowY), 40000)
+V_POW_INTERVAL2 (0, inf, SHL52 (BigPowY), inf, 40000)
+V_POW_INTERVAL2 (0, inf, 0, inf, 1000)
+/* x~1 or y~1. */
+V_POW_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000)
+V_POW_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
+V_POW_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
+/* around argmaxs of ULP error. */
+V_POW_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
+V_POW_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
+/* x is negative, y is odd or even integer, or y is real not integer. */
+PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
+PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
+PL_TEST_INTERVAL2 (V_NAME_D2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
+PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
+/* 1.0^y. */
+PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
+PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
+PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
+PL_TEST_INTERVAL2 (V_NAME_D2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_pow_exp_data.c b/contrib/arm-optimized-routines/pl/math/v_pow_exp_data.c
new file mode 100644
index 000000000000..5d921ef648a4
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_pow_exp_data.c
@@ -0,0 +1,289 @@
+/*
+ * Shared data between exp, exp2 and pow.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << V_POW_EXP_TABLE_BITS)
+
+const struct v_pow_exp_data __v_pow_exp_data = {
+// exp polynomial coefficients.
+.poly = {
+// abs error: 1.43*2^-58
+// ulp error: 0.549 (0.550 without fma)
+// if |x| < ln2/512
+0x1.fffffffffffd4p-2,
+0x1.5555571d6ef9p-3,
+0x1.5555576a5adcep-5,
+},
+// N/ln2
+.n_over_ln2 = 0x1.71547652b82fep0 * N,
+// ln2/N
+.ln2_over_n_hi = 0x1.62e42fefc0000p-9,
+.ln2_over_n_lo = -0x1.c610ca86c3899p-45,
+// Used for rounding to nearest integer without using intrinsics.
+.shift = 0x1.8p52,
+// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
+// sbits[k] = asuint64(H[k]) - (k << 52)/N
+.sbits = {
+0x3ff0000000000000,
+0x3feffb1afa5abcbf,
+0x3feff63da9fb3335,
+0x3feff168143b0281,
+0x3fefec9a3e778061,
+0x3fefe7d42e11bbcc,
+0x3fefe315e86e7f85,
+0x3fefde5f72f654b1,
+0x3fefd9b0d3158574,
+0x3fefd50a0e3c1f89,
+0x3fefd06b29ddf6de,
+0x3fefcbd42b72a836,
+0x3fefc74518759bc8,
+0x3fefc2bdf66607e0,
+0x3fefbe3ecac6f383,
+0x3fefb9c79b1f3919,
+0x3fefb5586cf9890f,
+0x3fefb0f145e46c85,
+0x3fefac922b7247f7,
+0x3fefa83b23395dec,
+0x3fefa3ec32d3d1a2,
+0x3fef9fa55fdfa9c5,
+0x3fef9b66affed31b,
+0x3fef973028d7233e,
+0x3fef9301d0125b51,
+0x3fef8edbab5e2ab6,
+0x3fef8abdc06c31cc,
+0x3fef86a814f204ab,
+0x3fef829aaea92de0,
+0x3fef7e95934f312e,
+0x3fef7a98c8a58e51,
+0x3fef76a45471c3c2,
+0x3fef72b83c7d517b,
+0x3fef6ed48695bbc0,
+0x3fef6af9388c8dea,
+0x3fef672658375d2f,
+0x3fef635beb6fcb75,
+0x3fef5f99f8138a1c,
+0x3fef5be084045cd4,
+0x3fef582f95281c6b,
+0x3fef54873168b9aa,
+0x3fef50e75eb44027,
+0x3fef4d5022fcd91d,
+0x3fef49c18438ce4d,
+0x3fef463b88628cd6,
+0x3fef42be3578a819,
+0x3fef3f49917ddc96,
+0x3fef3bdda27912d1,
+0x3fef387a6e756238,
+0x3fef351ffb82140a,
+0x3fef31ce4fb2a63f,
+0x3fef2e85711ece75,
+0x3fef2b4565e27cdd,
+0x3fef280e341ddf29,
+0x3fef24dfe1f56381,
+0x3fef21ba7591bb70,
+0x3fef1e9df51fdee1,
+0x3fef1b8a66d10f13,
+0x3fef187fd0dad990,
+0x3fef157e39771b2f,
+0x3fef1285a6e4030b,
+0x3fef0f961f641589,
+0x3fef0cafa93e2f56,
+0x3fef09d24abd886b,
+0x3fef06fe0a31b715,
+0x3fef0432edeeb2fd,
+0x3fef0170fc4cd831,
+0x3feefeb83ba8ea32,
+0x3feefc08b26416ff,
+0x3feef96266e3fa2d,
+0x3feef6c55f929ff1,
+0x3feef431a2de883b,
+0x3feef1a7373aa9cb,
+0x3feeef26231e754a,
+0x3feeecae6d05d866,
+0x3feeea401b7140ef,
+0x3feee7db34e59ff7,
+0x3feee57fbfec6cf4,
+0x3feee32dc313a8e5,
+0x3feee0e544ede173,
+0x3feedea64c123422,
+0x3feedc70df1c5175,
+0x3feeda4504ac801c,
+0x3feed822c367a024,
+0x3feed60a21f72e2a,
+0x3feed3fb2709468a,
+0x3feed1f5d950a897,
+0x3feecffa3f84b9d4,
+0x3feece086061892d,
+0x3feecc2042a7d232,
+0x3feeca41ed1d0057,
+0x3feec86d668b3237,
+0x3feec6a2b5c13cd0,
+0x3feec4e1e192aed2,
+0x3feec32af0d7d3de,
+0x3feec17dea6db7d7,
+0x3feebfdad5362a27,
+0x3feebe41b817c114,
+0x3feebcb299fddd0d,
+0x3feebb2d81d8abff,
+0x3feeb9b2769d2ca7,
+0x3feeb8417f4531ee,
+0x3feeb6daa2cf6642,
+0x3feeb57de83f4eef,
+0x3feeb42b569d4f82,
+0x3feeb2e2f4f6ad27,
+0x3feeb1a4ca5d920f,
+0x3feeb070dde910d2,
+0x3feeaf4736b527da,
+0x3feeae27dbe2c4cf,
+0x3feead12d497c7fd,
+0x3feeac0827ff07cc,
+0x3feeab07dd485429,
+0x3feeaa11fba87a03,
+0x3feea9268a5946b7,
+0x3feea84590998b93,
+0x3feea76f15ad2148,
+0x3feea6a320dceb71,
+0x3feea5e1b976dc09,
+0x3feea52ae6cdf6f4,
+0x3feea47eb03a5585,
+0x3feea3dd1d1929fd,
+0x3feea34634ccc320,
+0x3feea2b9febc8fb7,
+0x3feea23882552225,
+0x3feea1c1c70833f6,
+0x3feea155d44ca973,
+0x3feea0f4b19e9538,
+0x3feea09e667f3bcd,
+0x3feea052fa75173e,
+0x3feea012750bdabf,
+0x3fee9fdcddd47645,
+0x3fee9fb23c651a2f,
+0x3fee9f9298593ae5,
+0x3fee9f7df9519484,
+0x3fee9f7466f42e87,
+0x3fee9f75e8ec5f74,
+0x3fee9f8286ead08a,
+0x3fee9f9a48a58174,
+0x3fee9fbd35d7cbfd,
+0x3fee9feb564267c9,
+0x3feea024b1ab6e09,
+0x3feea0694fde5d3f,
+0x3feea0b938ac1cf6,
+0x3feea11473eb0187,
+0x3feea17b0976cfdb,
+0x3feea1ed0130c132,
+0x3feea26a62ff86f0,
+0x3feea2f336cf4e62,
+0x3feea3878491c491,
+0x3feea427543e1a12,
+0x3feea4d2add106d9,
+0x3feea589994cce13,
+0x3feea64c1eb941f7,
+0x3feea71a4623c7ad,
+0x3feea7f4179f5b21,
+0x3feea8d99b4492ed,
+0x3feea9cad931a436,
+0x3feeaac7d98a6699,
+0x3feeabd0a478580f,
+0x3feeace5422aa0db,
+0x3feeae05bad61778,
+0x3feeaf3216b5448c,
+0x3feeb06a5e0866d9,
+0x3feeb1ae99157736,
+0x3feeb2fed0282c8a,
+0x3feeb45b0b91ffc6,
+0x3feeb5c353aa2fe2,
+0x3feeb737b0cdc5e5,
+0x3feeb8b82b5f98e5,
+0x3feeba44cbc8520f,
+0x3feebbdd9a7670b3,
+0x3feebd829fde4e50,
+0x3feebf33e47a22a2,
+0x3feec0f170ca07ba,
+0x3feec2bb4d53fe0d,
+0x3feec49182a3f090,
+0x3feec674194bb8d5,
+0x3feec86319e32323,
+0x3feeca5e8d07f29e,
+0x3feecc667b5de565,
+0x3feece7aed8eb8bb,
+0x3feed09bec4a2d33,
+0x3feed2c980460ad8,
+0x3feed503b23e255d,
+0x3feed74a8af46052,
+0x3feed99e1330b358,
+0x3feedbfe53c12e59,
+0x3feede6b5579fdbf,
+0x3feee0e521356eba,
+0x3feee36bbfd3f37a,
+0x3feee5ff3a3c2774,
+0x3feee89f995ad3ad,
+0x3feeeb4ce622f2ff,
+0x3feeee07298db666,
+0x3feef0ce6c9a8952,
+0x3feef3a2b84f15fb,
+0x3feef68415b749b1,
+0x3feef9728de5593a,
+0x3feefc6e29f1c52a,
+0x3feeff76f2fb5e47,
+0x3fef028cf22749e4,
+0x3fef05b030a1064a,
+0x3fef08e0b79a6f1f,
+0x3fef0c1e904bc1d2,
+0x3fef0f69c3f3a207,
+0x3fef12c25bd71e09,
+0x3fef16286141b33d,
+0x3fef199bdd85529c,
+0x3fef1d1cd9fa652c,
+0x3fef20ab5fffd07a,
+0x3fef244778fafb22,
+0x3fef27f12e57d14b,
+0x3fef2ba88988c933,
+0x3fef2f6d9406e7b5,
+0x3fef33405751c4db,
+0x3fef3720dcef9069,
+0x3fef3b0f2e6d1675,
+0x3fef3f0b555dc3fa,
+0x3fef43155b5bab74,
+0x3fef472d4a07897c,
+0x3fef4b532b08c968,
+0x3fef4f87080d89f2,
+0x3fef53c8eacaa1d6,
+0x3fef5818dcfba487,
+0x3fef5c76e862e6d3,
+0x3fef60e316c98398,
+0x3fef655d71ff6075,
+0x3fef69e603db3285,
+0x3fef6e7cd63a8315,
+0x3fef7321f301b460,
+0x3fef77d5641c0658,
+0x3fef7c97337b9b5f,
+0x3fef81676b197d17,
+0x3fef864614f5a129,
+0x3fef8b333b16ee12,
+0x3fef902ee78b3ff6,
+0x3fef953924676d76,
+0x3fef9a51fbc74c83,
+0x3fef9f7977cdb740,
+0x3fefa4afa2a490da,
+0x3fefa9f4867cca6e,
+0x3fefaf482d8e67f1,
+0x3fefb4aaa2188510,
+0x3fefba1bee615a27,
+0x3fefbf9c1cb6412a,
+0x3fefc52b376bba97,
+0x3fefcac948dd7274,
+0x3fefd0765b6e4540,
+0x3fefd632798844f8,
+0x3fefdbfdad9cbe14,
+0x3fefe1d802243c89,
+0x3fefe7c1819e90d8,
+0x3fefedba3692d514,
+0x3feff3c22b8f71f1,
+0x3feff9d96b2a23d9,
+},
+};
diff --git a/contrib/arm-optimized-routines/pl/math/v_pow_log_data.c b/contrib/arm-optimized-routines/pl/math/v_pow_log_data.c
new file mode 100644
index 000000000000..036faa5c97c1
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_pow_log_data.c
@@ -0,0 +1,174 @@
+/*
+ * Data for the log part of pow.
+ *
+ * Copyright (c) 2018-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+#define N (1 << V_POW_LOG_TABLE_BITS)
+
+/* Algorithm:
+
+ x = 2^k z
+ log(x) = k ln2 + log(c) + log(z/c)
+ log(z/c) = poly(z/c - 1)
+
+ where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
+ and z falls into the ith one, then table entries are computed as
+
+ tab[i].invc = 1/c
+ tab[i].logc = round(0x1p43*log(c))/0x1p43
+ tab[i].logctail = (double)(log(c) - logc)
+
+ where c is chosen near the center of the subinterval such that 1/c has only
+ a few precision bits so z/c - 1 is exactly representible as double:
+
+ 1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
+
+ Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| <
+ 0x1p-97, the last few bits of logc are rounded away so k*ln2hi + logc has no
+ rounding error and the interval for z is selected such that near x == 1,
+ where log(x)
+ is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
+const struct v_pow_log_data __v_pow_log_data = {
+ /* relative error: 0x1.11922ap-70 in [-0x1.6bp-8, 0x1.6bp-8]
+ Coefficients are scaled to match the scaling during evaluation. */
+ .poly = { -0x1p-1, -0x1.555555555556p-1, 0x1.0000000000006p-1,
+ 0x1.999999959554ep-1, -0x1.555555529a47ap-1, -0x1.2495b9b4845e9p0,
+ 0x1.0002b8b263fc3p0, },
+ .ln2_hi = 0x1.62e42fefa3800p-1,
+ .ln2_lo = 0x1.ef35793c76730p-45,
+ .invc = { 0x1.6a00000000000p+0, 0x1.6800000000000p+0, 0x1.6600000000000p+0,
+ 0x1.6400000000000p+0, 0x1.6200000000000p+0, 0x1.6000000000000p+0,
+ 0x1.5e00000000000p+0, 0x1.5c00000000000p+0, 0x1.5a00000000000p+0,
+ 0x1.5800000000000p+0, 0x1.5600000000000p+0, 0x1.5600000000000p+0,
+ 0x1.5400000000000p+0, 0x1.5200000000000p+0, 0x1.5000000000000p+0,
+ 0x1.4e00000000000p+0, 0x1.4c00000000000p+0, 0x1.4a00000000000p+0,
+ 0x1.4a00000000000p+0, 0x1.4800000000000p+0, 0x1.4600000000000p+0,
+ 0x1.4400000000000p+0, 0x1.4200000000000p+0, 0x1.4000000000000p+0,
+ 0x1.4000000000000p+0, 0x1.3e00000000000p+0, 0x1.3c00000000000p+0,
+ 0x1.3a00000000000p+0, 0x1.3a00000000000p+0, 0x1.3800000000000p+0,
+ 0x1.3600000000000p+0, 0x1.3400000000000p+0, 0x1.3400000000000p+0,
+ 0x1.3200000000000p+0, 0x1.3000000000000p+0, 0x1.3000000000000p+0,
+ 0x1.2e00000000000p+0, 0x1.2c00000000000p+0, 0x1.2c00000000000p+0,
+ 0x1.2a00000000000p+0, 0x1.2800000000000p+0, 0x1.2600000000000p+0,
+ 0x1.2600000000000p+0, 0x1.2400000000000p+0, 0x1.2400000000000p+0,
+ 0x1.2200000000000p+0, 0x1.2000000000000p+0, 0x1.2000000000000p+0,
+ 0x1.1e00000000000p+0, 0x1.1c00000000000p+0, 0x1.1c00000000000p+0,
+ 0x1.1a00000000000p+0, 0x1.1a00000000000p+0, 0x1.1800000000000p+0,
+ 0x1.1600000000000p+0, 0x1.1600000000000p+0, 0x1.1400000000000p+0,
+ 0x1.1400000000000p+0, 0x1.1200000000000p+0, 0x1.1000000000000p+0,
+ 0x1.1000000000000p+0, 0x1.0e00000000000p+0, 0x1.0e00000000000p+0,
+ 0x1.0c00000000000p+0, 0x1.0c00000000000p+0, 0x1.0a00000000000p+0,
+ 0x1.0a00000000000p+0, 0x1.0800000000000p+0, 0x1.0800000000000p+0,
+ 0x1.0600000000000p+0, 0x1.0400000000000p+0, 0x1.0400000000000p+0,
+ 0x1.0200000000000p+0, 0x1.0200000000000p+0, 0x1.0000000000000p+0,
+ 0x1.0000000000000p+0, 0x1.fc00000000000p-1, 0x1.f800000000000p-1,
+ 0x1.f400000000000p-1, 0x1.f000000000000p-1, 0x1.ec00000000000p-1,
+ 0x1.e800000000000p-1, 0x1.e400000000000p-1, 0x1.e200000000000p-1,
+ 0x1.de00000000000p-1, 0x1.da00000000000p-1, 0x1.d600000000000p-1,
+ 0x1.d400000000000p-1, 0x1.d000000000000p-1, 0x1.cc00000000000p-1,
+ 0x1.ca00000000000p-1, 0x1.c600000000000p-1, 0x1.c400000000000p-1,
+ 0x1.c000000000000p-1, 0x1.be00000000000p-1, 0x1.ba00000000000p-1,
+ 0x1.b800000000000p-1, 0x1.b400000000000p-1, 0x1.b200000000000p-1,
+ 0x1.ae00000000000p-1, 0x1.ac00000000000p-1, 0x1.aa00000000000p-1,
+ 0x1.a600000000000p-1, 0x1.a400000000000p-1, 0x1.a000000000000p-1,
+ 0x1.9e00000000000p-1, 0x1.9c00000000000p-1, 0x1.9a00000000000p-1,
+ 0x1.9600000000000p-1, 0x1.9400000000000p-1, 0x1.9200000000000p-1,
+ 0x1.9000000000000p-1, 0x1.8c00000000000p-1, 0x1.8a00000000000p-1,
+ 0x1.8800000000000p-1, 0x1.8600000000000p-1, 0x1.8400000000000p-1,
+ 0x1.8200000000000p-1, 0x1.7e00000000000p-1, 0x1.7c00000000000p-1,
+ 0x1.7a00000000000p-1, 0x1.7800000000000p-1, 0x1.7600000000000p-1,
+ 0x1.7400000000000p-1, 0x1.7200000000000p-1, 0x1.7000000000000p-1,
+ 0x1.6e00000000000p-1, 0x1.6c00000000000p-1, },
+ .logc
+ = { -0x1.62c82f2b9c800p-2, -0x1.5d1bdbf580800p-2, -0x1.5767717455800p-2,
+ -0x1.51aad872df800p-2, -0x1.4be5f95777800p-2, -0x1.4618bc21c6000p-2,
+ -0x1.404308686a800p-2, -0x1.3a64c55694800p-2, -0x1.347dd9a988000p-2,
+ -0x1.2e8e2bae12000p-2, -0x1.2895a13de8800p-2, -0x1.2895a13de8800p-2,
+ -0x1.22941fbcf7800p-2, -0x1.1c898c1699800p-2, -0x1.1675cababa800p-2,
+ -0x1.1058bf9ae4800p-2, -0x1.0a324e2739000p-2, -0x1.0402594b4d000p-2,
+ -0x1.0402594b4d000p-2, -0x1.fb9186d5e4000p-3, -0x1.ef0adcbdc6000p-3,
+ -0x1.e27076e2af000p-3, -0x1.d5c216b4fc000p-3, -0x1.c8ff7c79aa000p-3,
+ -0x1.c8ff7c79aa000p-3, -0x1.bc286742d9000p-3, -0x1.af3c94e80c000p-3,
+ -0x1.a23bc1fe2b000p-3, -0x1.a23bc1fe2b000p-3, -0x1.9525a9cf45000p-3,
+ -0x1.87fa06520d000p-3, -0x1.7ab890210e000p-3, -0x1.7ab890210e000p-3,
+ -0x1.6d60fe719d000p-3, -0x1.5ff3070a79000p-3, -0x1.5ff3070a79000p-3,
+ -0x1.526e5e3a1b000p-3, -0x1.44d2b6ccb8000p-3, -0x1.44d2b6ccb8000p-3,
+ -0x1.371fc201e9000p-3, -0x1.29552f81ff000p-3, -0x1.1b72ad52f6000p-3,
+ -0x1.1b72ad52f6000p-3, -0x1.0d77e7cd09000p-3, -0x1.0d77e7cd09000p-3,
+ -0x1.fec9131dbe000p-4, -0x1.e27076e2b0000p-4, -0x1.e27076e2b0000p-4,
+ -0x1.c5e548f5bc000p-4, -0x1.a926d3a4ae000p-4, -0x1.a926d3a4ae000p-4,
+ -0x1.8c345d631a000p-4, -0x1.8c345d631a000p-4, -0x1.6f0d28ae56000p-4,
+ -0x1.51b073f062000p-4, -0x1.51b073f062000p-4, -0x1.341d7961be000p-4,
+ -0x1.341d7961be000p-4, -0x1.16536eea38000p-4, -0x1.f0a30c0118000p-5,
+ -0x1.f0a30c0118000p-5, -0x1.b42dd71198000p-5, -0x1.b42dd71198000p-5,
+ -0x1.77458f632c000p-5, -0x1.77458f632c000p-5, -0x1.39e87b9fec000p-5,
+ -0x1.39e87b9fec000p-5, -0x1.f829b0e780000p-6, -0x1.f829b0e780000p-6,
+ -0x1.7b91b07d58000p-6, -0x1.fc0a8b0fc0000p-7, -0x1.fc0a8b0fc0000p-7,
+ -0x1.fe02a6b100000p-8, -0x1.fe02a6b100000p-8, 0x0.0000000000000p+0,
+ 0x0.0000000000000p+0, 0x1.0101575890000p-7, 0x1.0205658938000p-6,
+ 0x1.8492528c90000p-6, 0x1.0415d89e74000p-5, 0x1.466aed42e0000p-5,
+ 0x1.894aa149fc000p-5, 0x1.ccb73cdddc000p-5, 0x1.eea31c006c000p-5,
+ 0x1.1973bd1466000p-4, 0x1.3bdf5a7d1e000p-4, 0x1.5e95a4d97a000p-4,
+ 0x1.700d30aeac000p-4, 0x1.9335e5d594000p-4, 0x1.b6ac88dad6000p-4,
+ 0x1.c885801bc4000p-4, 0x1.ec739830a2000p-4, 0x1.fe89139dbe000p-4,
+ 0x1.1178e8227e000p-3, 0x1.1aa2b7e23f000p-3, 0x1.2d1610c868000p-3,
+ 0x1.365fcb0159000p-3, 0x1.4913d8333b000p-3, 0x1.527e5e4a1b000p-3,
+ 0x1.6574ebe8c1000p-3, 0x1.6f0128b757000p-3, 0x1.7898d85445000p-3,
+ 0x1.8beafeb390000p-3, 0x1.95a5adcf70000p-3, 0x1.a93ed3c8ae000p-3,
+ 0x1.b31d8575bd000p-3, 0x1.bd087383be000p-3, 0x1.c6ffbc6f01000p-3,
+ 0x1.db13db0d49000p-3, 0x1.e530effe71000p-3, 0x1.ef5ade4dd0000p-3,
+ 0x1.f991c6cb3b000p-3, 0x1.07138604d5800p-2, 0x1.0c42d67616000p-2,
+ 0x1.1178e8227e800p-2, 0x1.16b5ccbacf800p-2, 0x1.1bf99635a6800p-2,
+ 0x1.214456d0eb800p-2, 0x1.2bef07cdc9000p-2, 0x1.314f1e1d36000p-2,
+ 0x1.36b6776be1000p-2, 0x1.3c25277333000p-2, 0x1.419b423d5e800p-2,
+ 0x1.4718dc271c800p-2, 0x1.4c9e09e173000p-2, 0x1.522ae0738a000p-2,
+ 0x1.57bf753c8d000p-2, 0x1.5d5bddf596000p-2, },
+ .logctail
+ = { 0x1.ab42428375680p-48, -0x1.ca508d8e0f720p-46, -0x1.362a4d5b6506dp-45,
+ -0x1.684e49eb067d5p-49, -0x1.41b6993293ee0p-47, 0x1.3d82f484c84ccp-46,
+ 0x1.c42f3ed820b3ap-50, 0x1.0b1c686519460p-45, 0x1.5594dd4c58092p-45,
+ 0x1.67b1e99b72bd8p-45, 0x1.5ca14b6cfb03fp-46, 0x1.5ca14b6cfb03fp-46,
+ -0x1.65a242853da76p-46, -0x1.fafbc68e75404p-46, 0x1.f1fc63382a8f0p-46,
+ -0x1.6a8c4fd055a66p-45, -0x1.c6bee7ef4030ep-47, -0x1.036b89ef42d7fp-48,
+ -0x1.036b89ef42d7fp-48, 0x1.d572aab993c87p-47, 0x1.b26b79c86af24p-45,
+ -0x1.72f4f543fff10p-46, 0x1.1ba91bbca681bp-45, 0x1.7794f689f8434p-45,
+ 0x1.7794f689f8434p-45, 0x1.94eb0318bb78fp-46, 0x1.a4e633fcd9066p-52,
+ -0x1.58c64dc46c1eap-45, -0x1.58c64dc46c1eap-45, -0x1.ad1d904c1d4e3p-45,
+ 0x1.bbdbf7fdbfa09p-45, 0x1.bdb9072534a58p-45, 0x1.bdb9072534a58p-45,
+ -0x1.0e46aa3b2e266p-46, -0x1.e9e439f105039p-46, -0x1.e9e439f105039p-46,
+ -0x1.0de8b90075b8fp-45, 0x1.70cc16135783cp-46, 0x1.70cc16135783cp-46,
+ 0x1.178864d27543ap-48, -0x1.48d301771c408p-45, -0x1.e80a41811a396p-45,
+ -0x1.e80a41811a396p-45, 0x1.a699688e85bf4p-47, 0x1.a699688e85bf4p-47,
+ -0x1.575545ca333f2p-45, 0x1.a342c2af0003cp-45, 0x1.a342c2af0003cp-45,
+ -0x1.d0c57585fbe06p-46, 0x1.53935e85baac8p-45, 0x1.53935e85baac8p-45,
+ 0x1.37c294d2f5668p-46, 0x1.37c294d2f5668p-46, -0x1.69737c93373dap-45,
+ 0x1.f025b61c65e57p-46, 0x1.f025b61c65e57p-46, 0x1.c5edaccf913dfp-45,
+ 0x1.c5edaccf913dfp-45, 0x1.47c5e768fa309p-46, 0x1.d599e83368e91p-45,
+ 0x1.d599e83368e91p-45, 0x1.c827ae5d6704cp-46, 0x1.c827ae5d6704cp-46,
+ -0x1.cfc4634f2a1eep-45, -0x1.cfc4634f2a1eep-45, 0x1.502b7f526feaap-48,
+ 0x1.502b7f526feaap-48, -0x1.980267c7e09e4p-45, -0x1.980267c7e09e4p-45,
+ -0x1.88d5493faa639p-45, -0x1.f1e7cf6d3a69cp-50, -0x1.f1e7cf6d3a69cp-50,
+ -0x1.9e23f0dda40e4p-46, -0x1.9e23f0dda40e4p-46, 0x0.0000000000000p+0,
+ 0x0.0000000000000p+0, -0x1.0c76b999d2be8p-46, -0x1.3dc5b06e2f7d2p-45,
+ -0x1.aa0ba325a0c34p-45, 0x1.111c05cf1d753p-47, -0x1.c167375bdfd28p-45,
+ -0x1.97995d05a267dp-46, -0x1.a68f247d82807p-46, -0x1.e113e4fc93b7bp-47,
+ -0x1.5325d560d9e9bp-45, 0x1.cc85ea5db4ed7p-45, -0x1.c69063c5d1d1ep-45,
+ 0x1.c1e8da99ded32p-49, 0x1.3115c3abd47dap-45, -0x1.390802bf768e5p-46,
+ 0x1.646d1c65aacd3p-45, -0x1.dc068afe645e0p-45, -0x1.534d64fa10afdp-45,
+ 0x1.1ef78ce2d07f2p-45, 0x1.ca78e44389934p-45, 0x1.39d6ccb81b4a1p-47,
+ 0x1.62fa8234b7289p-51, 0x1.5837954fdb678p-45, 0x1.633e8e5697dc7p-45,
+ 0x1.9cf8b2c3c2e78p-46, -0x1.5118de59c21e1p-45, -0x1.c661070914305p-46,
+ -0x1.73d54aae92cd1p-47, 0x1.7f22858a0ff6fp-47, -0x1.8724350562169p-45,
+ -0x1.c358d4eace1aap-47, -0x1.d4bc4595412b6p-45, -0x1.1ec72c5962bd2p-48,
+ -0x1.aff2af715b035p-45, 0x1.212276041f430p-51, -0x1.a211565bb8e11p-51,
+ 0x1.bcbecca0cdf30p-46, 0x1.89cdb16ed4e91p-48, 0x1.7188b163ceae9p-45,
+ -0x1.c210e63a5f01cp-45, 0x1.b9acdf7a51681p-45, 0x1.ca6ed5147bdb7p-45,
+ 0x1.a87deba46baeap-47, 0x1.a9cfa4a5004f4p-45, -0x1.8e27ad3213cb8p-45,
+ 0x1.16ecdb0f177c8p-46, 0x1.83b54b606bd5cp-46, 0x1.8e436ec90e09dp-47,
+ -0x1.f27ce0967d675p-45, -0x1.e20891b0ad8a4p-45, 0x1.ebe708164c759p-45,
+ 0x1.fadedee5d40efp-46, -0x1.a0b2a08a465dcp-47, },
+};
diff --git a/contrib/arm-optimized-routines/pl/math/v_powf_data.c b/contrib/arm-optimized-routines/pl/math/v_powf_data.c
new file mode 100644
index 000000000000..ded211924b80
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_powf_data.c
@@ -0,0 +1,89 @@
+/*
+ * Coefficients for single-precision SVE pow(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "math_config.h"
+
+const struct v_powf_data __v_powf_data = {
+ .invc = { 0x1.6489890582816p+0,
+ 0x1.5cf19b35e3472p+0,
+ 0x1.55aac0e956d65p+0,
+ 0x1.4eb0022977e01p+0,
+ 0x1.47fcccda1dd1fp+0,
+ 0x1.418ceabab68c1p+0,
+ 0x1.3b5c788f1edb3p+0,
+ 0x1.3567de48e9c9ap+0,
+ 0x1.2fabc80fd19bap+0,
+ 0x1.2a25200ce536bp+0,
+ 0x1.24d108e0152e3p+0,
+ 0x1.1facd8ab2fbe1p+0,
+ 0x1.1ab614a03efdfp+0,
+ 0x1.15ea6d03af9ffp+0,
+ 0x1.1147b994bb776p+0,
+ 0x1.0ccbf650593aap+0,
+ 0x1.0875408477302p+0,
+ 0x1.0441d42a93328p+0,
+ 0x1p+0,
+ 0x1.f1d006c855e86p-1,
+ 0x1.e28c3341aa301p-1,
+ 0x1.d4bdf9aa64747p-1,
+ 0x1.c7b45a24e5803p-1,
+ 0x1.bb5f5eb2ed60ap-1,
+ 0x1.afb0bff8fe6b4p-1,
+ 0x1.a49badf7ab1f5p-1,
+ 0x1.9a14a111fc4c9p-1,
+ 0x1.901131f5b2fdcp-1,
+ 0x1.8687f73f6d865p-1,
+ 0x1.7d7067eb77986p-1,
+ 0x1.74c2c1cf97b65p-1,
+ 0x1.6c77f37cff2a1p-1
+ },
+ .logc = { -0x1.e960f97b22702p+3,
+ -0x1.c993406cd4db6p+3,
+ -0x1.aa711d9a7d0f3p+3,
+ -0x1.8bf37bacdce9bp+3,
+ -0x1.6e13b3519946ep+3,
+ -0x1.50cb8281e4089p+3,
+ -0x1.341504a237e2bp+3,
+ -0x1.17eaab624ffbbp+3,
+ -0x1.f88e708f8c853p+2,
+ -0x1.c24b6da113914p+2,
+ -0x1.8d02ee397cb1dp+2,
+ -0x1.58ac1223408b3p+2,
+ -0x1.253e6fd190e89p+2,
+ -0x1.e5641882c12ffp+1,
+ -0x1.81fea712926f7p+1,
+ -0x1.203e240de64a3p+1,
+ -0x1.8029b86a78281p0,
+ -0x1.85d713190fb9p-1,
+ 0x0p+0,
+ 0x1.4c1cc07312997p0,
+ 0x1.5e1848ccec948p+1,
+ 0x1.04cfcb7f1196fp+2,
+ 0x1.582813d463c21p+2,
+ 0x1.a936fa68760ccp+2,
+ 0x1.f81bc31d6cc4ep+2,
+ 0x1.2279a09fae6b1p+3,
+ 0x1.47ec0b6df5526p+3,
+ 0x1.6c71762280f1p+3,
+ 0x1.90155070798dap+3,
+ 0x1.b2e23b1d3068cp+3,
+ 0x1.d4e21b0daa86ap+3,
+ 0x1.f61e2a2f67f3fp+3
+ },
+ .scale = { 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f,
+ 0x3fef9301d0125b51, 0x3fef72b83c7d517b, 0x3fef54873168b9aa,
+ 0x3fef387a6e756238, 0x3fef1e9df51fdee1, 0x3fef06fe0a31b715,
+ 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+ 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429,
+ 0x3feea47eb03a5585, 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74,
+ 0x3feea11473eb0187, 0x3feea589994cce13, 0x3feeace5422aa0db,
+ 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+ 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c,
+ 0x3fef3720dcef9069, 0x3fef5818dcfba487, 0x3fef7c97337b9b5f,
+ 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+ },
+};
diff --git a/contrib/arm-optimized-routines/pl/math/v_sincos_3u5.c b/contrib/arm-optimized-routines/pl/math/v_sincos_3u5.c
new file mode 100644
index 000000000000..6fc014c120b8
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_sincos_3u5.c
@@ -0,0 +1,57 @@
+/*
+ * Double-precision vector sincos function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/* Define _GNU_SOURCE in order to include sincos declaration. If building
+ pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
+ be linked against the scalar sincosf from math/. */
+#define _GNU_SOURCE
+#include <math.h>
+#undef _GNU_SOURCE
+
+#include "v_math.h"
+#include "pl_test.h"
+#include "v_sincos_common.h"
+
+static void VPCS_ATTR NOINLINE
+special_case (float64x2_t x, uint64x2_t special, double *out_sin,
+ double *out_cos)
+{
+ if (special[0])
+ sincos (x[0], out_sin, out_cos);
+ if (special[1])
+ sincos (x[1], out_sin + 1, out_cos + 1);
+}
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate polynomials.
+ Largest observed error is for sin, 3.22 ULP:
+ v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
+ want -0x1.ffe9537d5dbb4p-3. */
+VPCS_ATTR void
+_ZGVnN2vl8l8_sincos (float64x2_t x, double *out_sin, double *out_cos)
+{
+ const struct v_sincos_data *d = ptr_barrier (&v_sincos_data);
+ uint64x2_t special = check_ge_rangeval (x, d);
+
+ float64x2x2_t sc = v_sincos_inline (x, d);
+
+ vst1q_f64 (out_sin, sc.val[0]);
+ vst1q_f64 (out_cos, sc.val[1]);
+
+ if (unlikely (v_any_u64 (special)))
+ special_case (x, special, out_sin, out_cos);
+}
+
+PL_TEST_ULP (_ZGVnN2v_sincos_sin, 2.73)
+PL_TEST_ULP (_ZGVnN2v_sincos_cos, 2.73)
+#define V_SINCOS_INTERVAL(lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVnN2v_sincos_sin, lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVnN2v_sincos_cos, lo, hi, n)
+V_SINCOS_INTERVAL (0, 0x1p23, 500000)
+V_SINCOS_INTERVAL (-0, -0x1p23, 500000)
+V_SINCOS_INTERVAL (0x1p23, inf, 10000)
+V_SINCOS_INTERVAL (-0x1p23, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_sincos_common.h b/contrib/arm-optimized-routines/pl/math/v_sincos_common.h
new file mode 100644
index 000000000000..ee7937e0785a
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_sincos_common.h
@@ -0,0 +1,86 @@
+/*
+ * Core approximation for double-precision vector sincos
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+static const struct v_sincos_data
+{
+ float64x2_t sin_poly[7], cos_poly[6], pio2[3];
+ float64x2_t inv_pio2, shift, range_val;
+} v_sincos_data = {
+ .inv_pio2 = V2 (0x1.45f306dc9c882p-1),
+ .pio2 = { V2 (0x1.921fb50000000p+0), V2 (0x1.110b460000000p-26),
+ V2 (0x1.1a62633145c07p-54) },
+ .shift = V2 (0x1.8p52),
+ .sin_poly = { /* Computed using Remez in [-pi/2, pi/2]. */
+ V2 (-0x1.555555555547bp-3), V2 (0x1.1111111108a4dp-7),
+ V2 (-0x1.a01a019936f27p-13), V2 (0x1.71de37a97d93ep-19),
+ V2 (-0x1.ae633919987c6p-26), V2 (0x1.60e277ae07cecp-33),
+ V2 (-0x1.9e9540300a1p-41) },
+ .cos_poly = { /* Computed using Remez in [-pi/4, pi/4]. */
+ V2 (0x1.555555555554cp-5), V2 (-0x1.6c16c16c1521fp-10),
+ V2 (0x1.a01a019cbf62ap-16), V2 (-0x1.27e4f812b681ep-22),
+ V2 (0x1.1ee9f152a57cdp-29), V2 (-0x1.8fb131098404bp-37) },
+ .range_val = V2 (0x1p23), };
+
+static inline uint64x2_t
+check_ge_rangeval (float64x2_t x, const struct v_sincos_data *d)
+{
+ return vcagtq_f64 (x, d->range_val);
+}
+
+/* Double-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate polynomials.
+ Largest observed error is for sin, 3.22 ULP:
+ v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
+ want -0x1.ffe9537d5dbb4p-3. */
+static inline float64x2x2_t
+v_sincos_inline (float64x2_t x, const struct v_sincos_data *d)
+{
+ /* q = nearest integer to 2 * x / pi. */
+ float64x2_t q = vsubq_f64 (vfmaq_f64 (d->shift, x, d->inv_pio2), d->shift);
+ int64x2_t n = vcvtq_s64_f64 (q);
+
+ /* Use q to reduce x to r in [-pi/4, pi/4], by:
+ r = x - q * pi/2, in extended precision. */
+ float64x2_t r = x;
+ r = vfmsq_f64 (r, q, d->pio2[0]);
+ r = vfmsq_f64 (r, q, d->pio2[1]);
+ r = vfmsq_f64 (r, q, d->pio2[2]);
+
+ float64x2_t r2 = r * r, r3 = r2 * r, r4 = r2 * r2;
+
+ /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2). */
+ float64x2_t s = v_pw_horner_6_f64 (r2, r4, d->sin_poly);
+ s = vfmaq_f64 (r, r3, s);
+
+ /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2). */
+ float64x2_t c = v_pw_horner_5_f64 (r2, r4, d->cos_poly);
+ c = vfmaq_f64 (v_f64 (-0.5), r2, c);
+ c = vfmaq_f64 (v_f64 (1), r2, c);
+
+ /* If odd quadrant, swap cos and sin. */
+ uint64x2_t swap = vtstq_s64 (n, v_s64 (1));
+ float64x2_t ss = vbslq_f64 (swap, c, s);
+ float64x2_t cc = vbslq_f64 (swap, s, c);
+
+ /* Fix signs according to quadrant.
+ ss = asdouble(asuint64(ss) ^ ((n & 2) << 62))
+ cc = asdouble(asuint64(cc) & (((n + 1) & 2) << 62)). */
+ uint64x2_t sin_sign
+ = vshlq_n_u64 (vandq_u64 (vreinterpretq_u64_s64 (n), v_u64 (2)), 62);
+ uint64x2_t cos_sign = vshlq_n_u64 (
+ vandq_u64 (vreinterpretq_u64_s64 (vaddq_s64 (n, v_s64 (1))), v_u64 (2)),
+ 62);
+ ss = vreinterpretq_f64_u64 (
+ veorq_u64 (vreinterpretq_u64_f64 (ss), sin_sign));
+ cc = vreinterpretq_f64_u64 (
+ veorq_u64 (vreinterpretq_u64_f64 (cc), cos_sign));
+
+ return (float64x2x2_t){ ss, cc };
+}
diff --git a/contrib/arm-optimized-routines/pl/math/v_sincosf_1u8.c b/contrib/arm-optimized-routines/pl/math/v_sincosf_1u8.c
new file mode 100644
index 000000000000..bf77afaa14db
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_sincosf_1u8.c
@@ -0,0 +1,58 @@
+/*
+ * Single-precision vector sincos function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+/* Define _GNU_SOURCE in order to include sincosf declaration. If building
+ pre-GLIBC 2.1, or on a non-GNU conforming system, this routine will need to
+ be linked against the scalar sincosf from math/. */
+#define _GNU_SOURCE
+#include <math.h>
+#undef _GNU_SOURCE
+
+#include "v_sincosf_common.h"
+#include "v_math.h"
+#include "pl_test.h"
+
+static void VPCS_ATTR NOINLINE
+special_case (float32x4_t x, uint32x4_t special, float *out_sin,
+ float *out_cos)
+{
+ for (int i = 0; i < 4; i++)
+ if (special[i])
+ sincosf (x[i], out_sin + i, out_cos + i);
+}
+
+/* Single-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate low-order
+ polynomials.
+ Worst-case error for sin is 1.67 ULP:
+ v_sincosf_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
+ Worst-case error for cos is 1.81 ULP:
+ v_sincosf_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6. */
+VPCS_ATTR void
+_ZGVnN4vl4l4_sincosf (float32x4_t x, float *out_sin, float *out_cos)
+{
+ const struct v_sincosf_data *d = ptr_barrier (&v_sincosf_data);
+ uint32x4_t special = check_ge_rangeval (x, d);
+
+ float32x4x2_t sc = v_sincosf_inline (x, d);
+
+ vst1q_f32 (out_sin, sc.val[0]);
+ vst1q_f32 (out_cos, sc.val[1]);
+
+ if (unlikely (v_any_u32 (special)))
+ special_case (x, special, out_sin, out_cos);
+}
+
+PL_TEST_ULP (_ZGVnN4v_sincosf_sin, 1.17)
+PL_TEST_ULP (_ZGVnN4v_sincosf_cos, 1.31)
+#define V_SINCOSF_INTERVAL(lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVnN4v_sincosf_sin, lo, hi, n) \
+ PL_TEST_INTERVAL (_ZGVnN4v_sincosf_cos, lo, hi, n)
+V_SINCOSF_INTERVAL (0, 0x1p20, 500000)
+V_SINCOSF_INTERVAL (-0, -0x1p20, 500000)
+V_SINCOSF_INTERVAL (0x1p20, inf, 10000)
+V_SINCOSF_INTERVAL (-0x1p20, -inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_sincosf_common.h b/contrib/arm-optimized-routines/pl/math/v_sincosf_common.h
new file mode 100644
index 000000000000..8239bd9f0176
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_sincosf_common.h
@@ -0,0 +1,84 @@
+/*
+ * Core approximation for single-precision vector sincos
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+
+const static struct v_sincosf_data
+{
+ float32x4_t poly_sin[3], poly_cos[3], pio2[3], inv_pio2, shift, range_val;
+} v_sincosf_data = {
+ .poly_sin = { /* Generated using Remez, odd coeffs only, in [-pi/4, pi/4]. */
+ V4 (-0x1.555546p-3), V4 (0x1.11076p-7), V4 (-0x1.994eb4p-13) },
+ .poly_cos = { /* Generated using Remez, even coeffs only, in [-pi/4, pi/4]. */
+ V4 (0x1.55554ap-5), V4 (-0x1.6c0c1ap-10), V4 (0x1.99e0eep-16) },
+ .pio2 = { V4 (0x1.921fb6p+0f), V4 (-0x1.777a5cp-25f), V4 (-0x1.ee59dap-50f) },
+ .inv_pio2 = V4 (0x1.45f306p-1f),
+ .shift = V4 (0x1.8p23),
+ .range_val = V4 (0x1p20),
+};
+
+static inline uint32x4_t
+check_ge_rangeval (float32x4_t x, const struct v_sincosf_data *d)
+{
+ return vcagtq_f32 (x, d->range_val);
+}
+
+/* Single-precision vector function allowing calculation of both sin and cos in
+ one function call, using shared argument reduction and separate low-order
+ polynomials.
+ Worst-case error for sin is 1.67 ULP:
+ v_sincosf_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
+ Worst-case error for cos is 1.81 ULP:
+ v_sincosf_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6. */
+static inline float32x4x2_t
+v_sincosf_inline (float32x4_t x, const struct v_sincosf_data *d)
+{
+ /* n = rint ( x / (pi/2) ). */
+ float32x4_t shift = d->shift;
+ float32x4_t q = vfmaq_f32 (shift, x, d->inv_pio2);
+ q = vsubq_f32 (q, shift);
+ int32x4_t n = vcvtq_s32_f32 (q);
+
+ /* Reduce x such that r is in [ -pi/4, pi/4 ]. */
+ float32x4_t r = x;
+ r = vfmsq_f32 (r, q, d->pio2[0]);
+ r = vfmsq_f32 (r, q, d->pio2[1]);
+ r = vfmsq_f32 (r, q, d->pio2[2]);
+
+ /* Approximate sin(r) ~= r + r^3 * poly_sin(r^2). */
+ float32x4_t r2 = vmulq_f32 (r, r), r3 = vmulq_f32 (r, r2);
+ float32x4_t s = vfmaq_f32 (d->poly_sin[1], r2, d->poly_sin[2]);
+ s = vfmaq_f32 (d->poly_sin[0], r2, s);
+ s = vfmaq_f32 (r, r3, s);
+
+ /* Approximate cos(r) ~= 1 - (r^2)/2 + r^4 * poly_cos(r^2). */
+ float32x4_t r4 = vmulq_f32 (r2, r2);
+ float32x4_t p = vfmaq_f32 (d->poly_cos[1], r2, d->poly_cos[2]);
+ float32x4_t c = vfmaq_f32 (v_f32 (-0.5), r2, d->poly_cos[0]);
+ c = vfmaq_f32 (c, r4, p);
+ c = vfmaq_f32 (v_f32 (1), c, r2);
+
+ /* If odd quadrant, swap cos and sin. */
+ uint32x4_t swap = vtstq_u32 (vreinterpretq_u32_s32 (n), v_u32 (1));
+ float32x4_t ss = vbslq_f32 (swap, c, s);
+ float32x4_t cc = vbslq_f32 (swap, s, c);
+
+ /* Fix signs according to quadrant.
+ ss = asfloat(asuint(ss) ^ ((n & 2) << 30))
+ cc = asfloat(asuint(cc) & (((n + 1) & 2) << 30)). */
+ uint32x4_t sin_sign
+ = vshlq_n_u32 (vandq_u32 (vreinterpretq_u32_s32 (n), v_u32 (2)), 30);
+ uint32x4_t cos_sign = vshlq_n_u32 (
+ vandq_u32 (vreinterpretq_u32_s32 (vaddq_s32 (n, v_s32 (1))), v_u32 (2)),
+ 30);
+ ss = vreinterpretq_f32_u32 (
+ veorq_u32 (vreinterpretq_u32_f32 (ss), sin_sign));
+ cc = vreinterpretq_f32_u32 (
+ veorq_u32 (vreinterpretq_u32_f32 (cc), cos_sign));
+
+ return (float32x4x2_t){ ss, cc };
+}
diff --git a/contrib/arm-optimized-routines/pl/math/v_sinh_3u.c b/contrib/arm-optimized-routines/pl/math/v_sinh_3u.c
new file mode 100644
index 000000000000..a644f54b4a0f
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_sinh_3u.c
@@ -0,0 +1,118 @@
+/*
+ * Double-precision vector sinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t poly[11];
+ float64x2_t inv_ln2, m_ln2, shift;
+ uint64x2_t halff;
+ int64x2_t onef;
+#if WANT_SIMD_EXCEPT
+ uint64x2_t tiny_bound, thresh;
+#else
+ uint64x2_t large_bound;
+#endif
+} data = {
+ /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
+ .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
+ V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
+ V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
+ V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
+ V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
+
+ .inv_ln2 = V2 (0x1.71547652b82fep0),
+ .m_ln2 = (float64x2_t) {-0x1.62e42fefa39efp-1, -0x1.abc9e3b39803fp-56},
+ .shift = V2 (0x1.8p52),
+
+ .halff = V2 (0x3fe0000000000000),
+ .onef = V2 (0x3ff0000000000000),
+#if WANT_SIMD_EXCEPT
+ /* 2^-26, below which sinh(x) rounds to x. */
+ .tiny_bound = V2 (0x3e50000000000000),
+ /* asuint(large_bound) - asuint(tiny_bound). */
+ .thresh = V2 (0x0230000000000000),
+#else
+/* 2^9. expm1 helper overflows for large input. */
+ .large_bound = V2 (0x4080000000000000),
+#endif
+};
+
+static inline float64x2_t
+expm1_inline (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ /* Reduce argument:
+ exp(x) - 1 = 2^i * (expm1(f) + 1) - 1
+ where i = round(x / ln2)
+ and f = x - i * ln2 (f in [-ln2/2, ln2/2]). */
+ float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
+ int64x2_t i = vcvtq_s64_f64 (j);
+ float64x2_t f = vfmaq_laneq_f64 (x, j, d->m_ln2, 0);
+ f = vfmaq_laneq_f64 (f, j, d->m_ln2, 1);
+ /* Approximate expm1(f) using polynomial. */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t f4 = vmulq_f64 (f2, f2);
+ float64x2_t f8 = vmulq_f64 (f4, f4);
+ float64x2_t p = vfmaq_f64 (f, f2, v_estrin_10_f64 (f, f2, f4, f8, d->poly));
+ /* t = 2^i. */
+ float64x2_t t = vreinterpretq_f64_u64 (
+ vreinterpretq_u64_s64 (vaddq_s64 (vshlq_n_s64 (i, 52), d->onef)));
+ /* expm1(x) ~= p * t + (t - 1). */
+ return vfmaq_f64 (vsubq_f64 (t, v_f64 (1.0)), p, t);
+}
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x)
+{
+ return v_call_f64 (sinh, x, x, v_u64 (-1));
+}
+
+/* Approximation for vector double-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The greatest observed error is 2.57 ULP:
+ _ZGVnN2v_sinh (0x1.9fb1d49d1d58bp-2) got 0x1.ab34e59d678dcp-2
+ want 0x1.ab34e59d678d9p-2. */
+float64x2_t VPCS_ATTR V_NAME_D1 (sinh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ float64x2_t ax = vabsq_f64 (x);
+ uint64x2_t sign
+ = veorq_u64 (vreinterpretq_u64_f64 (x), vreinterpretq_u64_f64 (ax));
+ float64x2_t halfsign = vreinterpretq_f64_u64 (vorrq_u64 (sign, d->halff));
+
+#if WANT_SIMD_EXCEPT
+ uint64x2_t special = vcgeq_u64 (
+ vsubq_u64 (vreinterpretq_u64_f64 (ax), d->tiny_bound), d->thresh);
+#else
+ uint64x2_t special = vcgeq_u64 (vreinterpretq_u64_f64 (ax), d->large_bound);
+#endif
+
+ /* Fall back to scalar variant for all lanes if any of them are special. */
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x);
+
+ /* Up to the point that expm1 overflows, we can use it to calculate sinh
+ using a slight rearrangement of the definition of sinh. This allows us to
+ retain acceptable accuracy for very small inputs. */
+ float64x2_t t = expm1_inline (ax);
+ t = vaddq_f64 (t, vdivq_f64 (t, vaddq_f64 (t, v_f64 (1.0))));
+ return vmulq_f64 (t, halfsign);
+}
+
+PL_SIG (V, D, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_D1 (sinh), 2.08)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (sinh), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0, 0x1p-26, 1000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p-26, 0x1p9, 500000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinh), 0x1p9, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_sinhf_2u3.c b/contrib/arm-optimized-routines/pl/math/v_sinhf_2u3.c
new file mode 100644
index 000000000000..cd8c0f08f784
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_sinhf_2u3.c
@@ -0,0 +1,84 @@
+/*
+ * Single-precision vector sinh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#include "v_expm1f_inline.h"
+
+static const struct data
+{
+ struct v_expm1f_data expm1f_consts;
+ uint32x4_t halff;
+#if WANT_SIMD_EXCEPT
+ uint32x4_t tiny_bound, thresh;
+#else
+ uint32x4_t oflow_bound;
+#endif
+} data = {
+ .expm1f_consts = V_EXPM1F_DATA,
+ .halff = V4 (0x3f000000),
+#if WANT_SIMD_EXCEPT
+ /* 0x1.6a09e8p-32, below which expm1f underflows. */
+ .tiny_bound = V4 (0x2fb504f4),
+ /* asuint(oflow_bound) - asuint(tiny_bound). */
+ .thresh = V4 (0x12fbbbb3),
+#else
+ /* 0x1.61814ep+6, above which expm1f helper overflows. */
+ .oflow_bound = V4 (0x42b0c0a7),
+#endif
+};
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (sinhf, x, y, special);
+}
+
+/* Approximation for vector single-precision sinh(x) using expm1.
+ sinh(x) = (exp(x) - exp(-x)) / 2.
+ The maximum error is 2.26 ULP:
+ _ZGVnN4v_sinhf (0x1.e34a9ep-4) got 0x1.e469ep-4
+ want 0x1.e469e4p-4. */
+float32x4_t VPCS_ATTR V_NAME_F1 (sinh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+ uint32x4_t sign = veorq_u32 (ix, iax);
+ float32x4_t halfsign = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->halff));
+
+#if WANT_SIMD_EXCEPT
+ uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, d->tiny_bound), d->thresh);
+ ax = v_zerofy_f32 (ax, special);
+#else
+ uint32x4_t special = vcgeq_u32 (iax, d->oflow_bound);
+#endif
+
+ /* Up to the point that expm1f overflows, we can use it to calculate sinhf
+ using a slight rearrangement of the definition of asinh. This allows us
+ to retain acceptable accuracy for very small inputs. */
+ float32x4_t t = expm1f_inline (ax, &d->expm1f_consts);
+ t = vaddq_f32 (t, vdivq_f32 (t, vaddq_f32 (t, v_f32 (1.0))));
+
+ /* Fall back to the scalar variant for any lanes that should trigger an
+ exception. */
+ if (unlikely (v_any_u32 (special)))
+ return special_case (x, vmulq_f32 (t, halfsign), special);
+
+ return vmulq_f32 (t, halfsign);
+}
+
+PL_SIG (V, F, 1, sinh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_F1 (sinh), 1.76)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (sinh), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0, 0x2fb504f4, 1000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x2fb504f4, 0x42b0c0a7, 100000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinh), 0x42b0c0a7, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_sinpi_3u1.c b/contrib/arm-optimized-routines/pl/math/v_sinpi_3u1.c
new file mode 100644
index 000000000000..8d2917ff8ecd
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_sinpi_3u1.c
@@ -0,0 +1,86 @@
+/*
+ * Double-precision vector sinpi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t poly[10];
+} data = {
+ /* Polynomial coefficients generated using Remez algorithm,
+ see sinpi.sollya for details. */
+ .poly = { V2 (0x1.921fb54442d184p1), V2 (-0x1.4abbce625be53p2),
+ V2 (0x1.466bc6775ab16p1), V2 (-0x1.32d2cce62dc33p-1),
+ V2 (0x1.507834891188ep-4), V2 (-0x1.e30750a28c88ep-8),
+ V2 (0x1.e8f48308acda4p-12), V2 (-0x1.6fc0032b3c29fp-16),
+ V2 (0x1.af86ae521260bp-21), V2 (-0x1.012a9870eeb7dp-25) },
+};
+
+#if WANT_SIMD_EXCEPT
+# define TinyBound v_u64 (0x3bf0000000000000) /* asuint64(0x1p-64). */
+/* asuint64(0x1p64) - TinyBound. */
+# define Thresh v_u64 (0x07f0000000000000)
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
+{
+ /* Fall back to scalar code. */
+ y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+ return v_call_f64 (sinpi, x, y, cmp);
+}
+#endif
+
+/* Approximation for vector double-precision sinpi(x).
+ Maximum Error 3.05 ULP:
+ _ZGVnN2v_sinpi(0x1.d32750db30b4ap-2) got 0x1.fb295878301c7p-1
+ want 0x1.fb295878301cap-1. */
+float64x2_t VPCS_ATTR V_NAME_D1 (sinpi) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+ uint64x2_t ir = vreinterpretq_u64_f64 (vabsq_f64 (x));
+ uint64x2_t cmp = vcgeq_u64 (vsubq_u64 (ir, TinyBound), Thresh);
+
+ /* When WANT_SIMD_EXCEPT = 1, special lanes should be set to 0
+ to avoid them under/overflowing and throwing exceptions. */
+ float64x2_t r = v_zerofy_f64 (x, cmp);
+#else
+ float64x2_t r = x;
+#endif
+
+ /* If r is odd, the sign of the result should be inverted. */
+ uint64x2_t odd
+ = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtaq_s64_f64 (r)), 63);
+
+ /* r = x - rint(x). Range reduction to -1/2 .. 1/2. */
+ r = vsubq_f64 (r, vrndaq_f64 (r));
+
+ /* y = sin(r). */
+ float64x2_t r2 = vmulq_f64 (r, r);
+ float64x2_t r4 = vmulq_f64 (r2, r2);
+ float64x2_t y = vmulq_f64 (v_pw_horner_9_f64 (r2, r4, d->poly), r);
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u64 (cmp)))
+ return special_case (x, y, odd, cmp);
+#endif
+
+ return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+}
+
+PL_SIG (V, D, 1, sinpi, -0.9, 0.9)
+PL_TEST_ULP (V_NAME_D1 (sinpi), 3.06)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (sinpi), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0, 0x1p-63, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p-63, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0.5, 0x1p51, 10000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (sinpi), 0x1p51, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_sinpif_3u.c b/contrib/arm-optimized-routines/pl/math/v_sinpif_3u.c
new file mode 100644
index 000000000000..3d6eeff333f7
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_sinpif_3u.c
@@ -0,0 +1,81 @@
+/*
+ * Single-precision vector sinpi function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "mathlib.h"
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float32x4_t poly[6];
+} data = {
+ /* Taylor series coefficents for sin(pi * x). */
+ .poly = { V4 (0x1.921fb6p1f), V4 (-0x1.4abbcep2f), V4 (0x1.466bc6p1f),
+ V4 (-0x1.32d2ccp-1f), V4 (0x1.50783p-4f), V4 (-0x1.e30750p-8f) },
+};
+
+#if WANT_SIMD_EXCEPT
+# define TinyBound v_u32 (0x30000000) /* asuint32(0x1p-31f). */
+# define Thresh v_u32 (0x1f000000) /* asuint32(0x1p31f) - TinyBound. */
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
+{
+ /* Fall back to scalar code. */
+ y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+ return v_call_f32 (sinpif, x, y, cmp);
+}
+#endif
+
+/* Approximation for vector single-precision sinpi(x)
+ Maximum Error 3.03 ULP:
+ _ZGVnN4v_sinpif(0x1.c597ccp-2) got 0x1.f7cd56p-1
+ want 0x1.f7cd5p-1. */
+float32x4_t VPCS_ATTR V_NAME_F1 (sinpi) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+ uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
+ uint32x4_t cmp = vcgeq_u32 (vsubq_u32 (ir, TinyBound), Thresh);
+
+ /* When WANT_SIMD_EXCEPT = 1, special lanes should be set to 0
+ to avoid them under/overflowing and throwing exceptions. */
+ float32x4_t r = v_zerofy_f32 (x, cmp);
+#else
+ float32x4_t r = x;
+#endif
+
+ /* If r is odd, the sign of the result should be inverted. */
+ uint32x4_t odd
+ = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (r)), 31);
+
+ /* r = x - rint(x). Range reduction to -1/2 .. 1/2. */
+ r = vsubq_f32 (r, vrndaq_f32 (r));
+
+ /* Pairwise Horner approximation for y = sin(r * pi). */
+ float32x4_t r2 = vmulq_f32 (r, r);
+ float32x4_t r4 = vmulq_f32 (r2, r2);
+ float32x4_t y = vmulq_f32 (v_pw_horner_5_f32 (r2, r4, d->poly), r);
+
+#if WANT_SIMD_EXCEPT
+ if (unlikely (v_any_u32 (cmp)))
+ return special_case (x, y, odd, cmp);
+#endif
+
+ return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+}
+
+PL_SIG (V, F, 1, sinpi, -0.9, 0.9)
+PL_TEST_ULP (V_NAME_F1 (sinpi), 2.54)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (sinpi), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0, 0x1p-31, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p-31, 0.5, 10000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0.5, 0x1p31f, 10000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (sinpi), 0x1p31f, inf, 10000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_tan_3u5.c b/contrib/arm-optimized-routines/pl/math/v_tan_3u5.c
new file mode 100644
index 000000000000..c431c8c4889e
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_tan_3u5.c
@@ -0,0 +1,120 @@
+/*
+ * Double-precision vector tan(x) function.
+ *
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t poly[9];
+ float64x2_t half_pi, two_over_pi, shift;
+#if !WANT_SIMD_EXCEPT
+ float64x2_t range_val;
+#endif
+} data = {
+ /* Coefficients generated using FPMinimax. */
+ .poly = { V2 (0x1.5555555555556p-2), V2 (0x1.1111111110a63p-3),
+ V2 (0x1.ba1ba1bb46414p-5), V2 (0x1.664f47e5b5445p-6),
+ V2 (0x1.226e5e5ecdfa3p-7), V2 (0x1.d6c7ddbf87047p-9),
+ V2 (0x1.7ea75d05b583ep-10), V2 (0x1.289f22964a03cp-11),
+ V2 (0x1.4e4fd14147622p-12) },
+ .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 },
+ .two_over_pi = V2 (0x1.45f306dc9c883p-1),
+ .shift = V2 (0x1.8p52),
+#if !WANT_SIMD_EXCEPT
+ .range_val = V2 (0x1p23),
+#endif
+};
+
+#define RangeVal 0x4160000000000000 /* asuint64(0x1p23). */
+#define TinyBound 0x3e50000000000000 /* asuint64(2^-26). */
+#define Thresh 0x310000000000000 /* RangeVal - TinyBound. */
+
+/* Special cases (fall back to scalar calls). */
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x)
+{
+ return v_call_f64 (tan, x, x, v_u64 (-1));
+}
+
+/* Vector approximation for double-precision tan.
+ Maximum measured error is 3.48 ULP:
+ _ZGVnN2v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37
+ want -0x1.f6ccd8ecf7deap+37. */
+float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x)
+{
+ const struct data *dat = ptr_barrier (&data);
+ /* Our argument reduction cannot calculate q with sufficient accuracy for
+ very large inputs. Fall back to scalar routine for all lanes if any are
+ too large, or Inf/NaN. If fenv exceptions are expected, also fall back for
+ tiny input to avoid underflow. */
+#if WANT_SIMD_EXCEPT
+ uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
+ /* iax - tiny_bound > range_val - tiny_bound. */
+ uint64x2_t special
+ = vcgtq_u64 (vsubq_u64 (iax, v_u64 (TinyBound)), v_u64 (Thresh));
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x);
+#endif
+
+ /* q = nearest integer to 2 * x / pi. */
+ float64x2_t q
+ = vsubq_f64 (vfmaq_f64 (dat->shift, x, dat->two_over_pi), dat->shift);
+ int64x2_t qi = vcvtq_s64_f64 (q);
+
+ /* Use q to reduce x to r in [-pi/4, pi/4], by:
+ r = x - q * pi/2, in extended precision. */
+ float64x2_t r = x;
+ r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0);
+ r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1);
+ /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle
+ formula. */
+ r = vmulq_n_f64 (r, 0.5);
+
+ /* Approximate tan(r) using order 8 polynomial.
+ tan(x) is odd, so polynomial has the form:
+ tan(x) ~= x + C0 * x^3 + C1 * x^5 + C3 * x^7 + ...
+ Hence we first approximate P(r) = C1 + C2 * r^2 + C3 * r^4 + ...
+ Then compute the approximation by:
+ tan(r) ~= r + r^3 * (C0 + r^2 * P(r)). */
+ float64x2_t r2 = vmulq_f64 (r, r), r4 = vmulq_f64 (r2, r2),
+ r8 = vmulq_f64 (r4, r4);
+ /* Offset coefficients to evaluate from C1 onwards. */
+ float64x2_t p = v_estrin_7_f64 (r2, r4, r8, dat->poly + 1);
+ p = vfmaq_f64 (dat->poly[0], p, r2);
+ p = vfmaq_f64 (r, r2, vmulq_f64 (p, r));
+
+ /* Recombination uses double-angle formula:
+ tan(2x) = 2 * tan(x) / (1 - (tan(x))^2)
+ and reciprocity around pi/2:
+ tan(x) = 1 / (tan(pi/2 - x))
+ to assemble result using change-of-sign and conditional selection of
+ numerator/denominator, dependent on odd/even-ness of q (hence quadrant).
+ */
+ float64x2_t n = vfmaq_f64 (v_f64 (-1), p, p);
+ float64x2_t d = vaddq_f64 (p, p);
+
+ uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1));
+
+#if !WANT_SIMD_EXCEPT
+ uint64x2_t special = vcageq_f64 (x, dat->range_val);
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x);
+#endif
+
+ return vdivq_f64 (vbslq_f64 (no_recip, n, vnegq_f64 (d)),
+ vbslq_f64 (no_recip, d, n));
+}
+
+PL_SIG (V, D, 1, tan, -3.1, 3.1)
+PL_TEST_ULP (V_NAME_D1 (tan), 2.99)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (tan), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (tan), 0, TinyBound, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (tan), TinyBound, RangeVal, 100000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (tan), RangeVal, inf, 5000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_tanf_3u5.c b/contrib/arm-optimized-routines/pl/math/v_tanf_3u5.c
new file mode 100644
index 000000000000..98948b0a9ecf
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_tanf_3u5.c
@@ -0,0 +1,127 @@
+/*
+ * Single-precision vector tan(x) function.
+ *
+ * Copyright (c) 2021-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float32x4_t poly[6];
+ float32x4_t pi_consts;
+ float32x4_t shift;
+#if !WANT_SIMD_EXCEPT
+ float32x4_t range_val;
+#endif
+} data = {
+ /* Coefficients generated using FPMinimax. */
+ .poly = { V4 (0x1.55555p-2f), V4 (0x1.11166p-3f), V4 (0x1.b88a78p-5f),
+ V4 (0x1.7b5756p-6f), V4 (0x1.4ef4cep-8f), V4 (0x1.0e1e74p-7f) },
+ /* Stores constants: (-pi/2)_high, (-pi/2)_mid, (-pi/2)_low, and 2/pi. */
+ .pi_consts
+ = { -0x1.921fb6p+0f, 0x1.777a5cp-25f, 0x1.ee59dap-50f, 0x1.45f306p-1f },
+ .shift = V4 (0x1.8p+23f),
+#if !WANT_SIMD_EXCEPT
+ .range_val = V4 (0x1p15f),
+#endif
+};
+
+#define RangeVal v_u32 (0x47000000) /* asuint32(0x1p15f). */
+#define TinyBound v_u32 (0x30000000) /* asuint32 (0x1p-31f). */
+#define Thresh v_u32 (0x16000000) /* asuint32(RangeVal) - TinyBound. */
+
+/* Special cases (fall back to scalar calls). */
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
+{
+ return v_call_f32 (tanf, x, y, cmp);
+}
+
+/* Use a full Estrin scheme to evaluate polynomial. */
+static inline float32x4_t
+eval_poly (float32x4_t z, const struct data *d)
+{
+ float32x4_t z2 = vmulq_f32 (z, z);
+#if WANT_SIMD_EXCEPT
+ /* Tiny z (<= 0x1p-31) will underflow when calculating z^4.
+ If fp exceptions are to be triggered correctly,
+ sidestep this by fixing such lanes to 0. */
+ uint32x4_t will_uflow
+ = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound);
+ if (unlikely (v_any_u32 (will_uflow)))
+ z2 = vbslq_f32 (will_uflow, v_f32 (0), z2);
+#endif
+ float32x4_t z4 = vmulq_f32 (z2, z2);
+ return v_estrin_5_f32 (z, z2, z4, d->poly);
+}
+
+/* Fast implementation of AdvSIMD tanf.
+ Maximum error is 3.45 ULP:
+ __v_tanf(-0x1.e5f0cap+13) got 0x1.ff9856p-1
+ want 0x1.ff9850p-1. */
+float32x4_t VPCS_ATTR V_NAME_F1 (tan) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+ float32x4_t special_arg = x;
+
+ /* iax >= RangeVal means x, if not inf or NaN, is too large to perform fast
+ regression. */
+#if WANT_SIMD_EXCEPT
+ uint32x4_t iax = vreinterpretq_u32_f32 (vabsq_f32 (x));
+ /* If fp exceptions are to be triggered correctly, also special-case tiny
+ input, as this will load to overflow later. Fix any special lanes to 1 to
+ prevent any exceptions being triggered. */
+ uint32x4_t special = vcgeq_u32 (vsubq_u32 (iax, TinyBound), Thresh);
+ if (unlikely (v_any_u32 (special)))
+ x = vbslq_f32 (special, v_f32 (1.0f), x);
+#else
+ /* Otherwise, special-case large and special values. */
+ uint32x4_t special = vcageq_f32 (x, d->range_val);
+#endif
+
+ /* n = rint(x/(pi/2)). */
+ float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3);
+ float32x4_t n = vsubq_f32 (q, d->shift);
+ /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */
+ uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1));
+
+ /* r = x - n * (pi/2) (range reduction into -pi./4 .. pi/4). */
+ float32x4_t r;
+ r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0);
+ r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1);
+ r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2);
+
+ /* If x lives in an interval, where |tan(x)|
+ - is finite, then use a polynomial approximation of the form
+ tan(r) ~ r + r^3 * P(r^2) = r + r * r^2 * P(r^2).
+ - grows to infinity then use symmetries of tangent and the identity
+ tan(r) = cotan(pi/2 - r) to express tan(x) as 1/tan(-r). Finally, use
+ the same polynomial approximation of tan as above. */
+
+ /* Invert sign of r if odd quadrant. */
+ float32x4_t z = vmulq_f32 (r, vbslq_f32 (pred_alt, v_f32 (-1), v_f32 (1)));
+
+ /* Evaluate polynomial approximation of tangent on [-pi/4, pi/4]. */
+ float32x4_t z2 = vmulq_f32 (r, r);
+ float32x4_t p = eval_poly (z2, d);
+ float32x4_t y = vfmaq_f32 (z, vmulq_f32 (z, z2), p);
+
+ /* Compute reciprocal and apply if required. */
+ float32x4_t inv_y = vdivq_f32 (v_f32 (1.0f), y);
+
+ if (unlikely (v_any_u32 (special)))
+ return special_case (special_arg, vbslq_f32 (pred_alt, inv_y, y), special);
+ return vbslq_f32 (pred_alt, inv_y, y);
+}
+
+PL_SIG (V, F, 1, tan, -3.1, 3.1)
+PL_TEST_ULP (V_NAME_F1 (tan), 2.96)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (tan), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0, 0x1p-31, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p-31, 0x1p15, 500000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (tan), 0x1p15, inf, 5000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_tanh_3u.c b/contrib/arm-optimized-routines/pl/math/v_tanh_3u.c
new file mode 100644
index 000000000000..5de85c68da2c
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_tanh_3u.c
@@ -0,0 +1,106 @@
+/*
+ * Double-precision vector tanh(x) function.
+ * Copyright (c) 2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+#include "mathlib.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+static const struct data
+{
+ float64x2_t poly[11];
+ float64x2_t inv_ln2, ln2_hi, ln2_lo, shift;
+ uint64x2_t onef;
+ uint64x2_t thresh, tiny_bound;
+} data = {
+ /* Generated using Remez, deg=12 in [-log(2)/2, log(2)/2]. */
+ .poly = { V2 (0x1p-1), V2 (0x1.5555555555559p-3), V2 (0x1.555555555554bp-5),
+ V2 (0x1.111111110f663p-7), V2 (0x1.6c16c16c1b5f3p-10),
+ V2 (0x1.a01a01affa35dp-13), V2 (0x1.a01a018b4ecbbp-16),
+ V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22),
+ V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29), },
+
+ .inv_ln2 = V2 (0x1.71547652b82fep0),
+ .ln2_hi = V2 (-0x1.62e42fefa39efp-1),
+ .ln2_lo = V2 (-0x1.abc9e3b39803fp-56),
+ .shift = V2 (0x1.8p52),
+
+ .onef = V2 (0x3ff0000000000000),
+ .tiny_bound = V2 (0x3e40000000000000), /* asuint64 (0x1p-27). */
+ /* asuint64(0x1.241bf835f9d5fp+4) - asuint64(tiny_bound). */
+ .thresh = V2 (0x01f241bf835f9d5f),
+};
+
+static inline float64x2_t
+expm1_inline (float64x2_t x, const struct data *d)
+{
+ /* Helper routine for calculating exp(x) - 1. Vector port of the helper from
+ the scalar variant of tanh. */
+
+ /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */
+ float64x2_t j = vsubq_f64 (vfmaq_f64 (d->shift, d->inv_ln2, x), d->shift);
+ int64x2_t i = vcvtq_s64_f64 (j);
+ float64x2_t f = vfmaq_f64 (x, j, d->ln2_hi);
+ f = vfmaq_f64 (f, j, d->ln2_lo);
+
+ /* Approximate expm1(f) using polynomial. */
+ float64x2_t f2 = vmulq_f64 (f, f);
+ float64x2_t f4 = vmulq_f64 (f2, f2);
+ float64x2_t p = vfmaq_f64 (
+ f, f2, v_estrin_10_f64 (f, f2, f4, vmulq_f64 (f4, f4), d->poly));
+
+ /* t = 2 ^ i. */
+ float64x2_t t = vreinterpretq_f64_u64 (
+ vaddq_u64 (vreinterpretq_u64_s64 (i << 52), d->onef));
+ /* expm1(x) = p * t + (t - 1). */
+ return vfmaq_f64 (vsubq_f64 (t, v_f64 (1)), p, t);
+}
+
+static float64x2_t NOINLINE VPCS_ATTR
+special_case (float64x2_t x, float64x2_t y, uint64x2_t special)
+{
+ return v_call_f64 (tanh, x, y, special);
+}
+
+/* Vector approximation for double-precision tanh(x), using a simplified
+ version of expm1. The greatest observed error is 2.77 ULP:
+ _ZGVnN2v_tanh(-0x1.c4a4ca0f9f3b7p-3) got -0x1.bd6a21a163627p-3
+ want -0x1.bd6a21a163624p-3. */
+float64x2_t VPCS_ATTR V_NAME_D1 (tanh) (float64x2_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
+
+ float64x2_t u = x;
+
+ /* Trigger special-cases for tiny, boring and infinity/NaN. */
+ uint64x2_t special = vcgtq_u64 (vsubq_u64 (ia, d->tiny_bound), d->thresh);
+#if WANT_SIMD_EXCEPT
+ /* To trigger fp exceptions correctly, set special lanes to a neutral value.
+ They will be fixed up later by the special-case handler. */
+ if (unlikely (v_any_u64 (special)))
+ u = v_zerofy_f64 (u, special);
+#endif
+
+ u = vaddq_f64 (u, u);
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ float64x2_t q = expm1_inline (u, d);
+ float64x2_t qp2 = vaddq_f64 (q, v_f64 (2));
+
+ if (unlikely (v_any_u64 (special)))
+ return special_case (x, vdivq_f64 (q, qp2), special);
+ return vdivq_f64 (q, qp2);
+}
+
+PL_SIG (V, D, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_D1 (tanh), 2.27)
+PL_TEST_EXPECT_FENV (V_NAME_D1 (tanh), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0, 0x1p-27, 5000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1p-27, 0x1.241bf835f9d5fp+4, 50000)
+PL_TEST_SYM_INTERVAL (V_NAME_D1 (tanh), 0x1.241bf835f9d5fp+4, inf, 1000)
diff --git a/contrib/arm-optimized-routines/pl/math/v_tanhf_2u6.c b/contrib/arm-optimized-routines/pl/math/v_tanhf_2u6.c
new file mode 100644
index 000000000000..d1cb9fb6eeb3
--- /dev/null
+++ b/contrib/arm-optimized-routines/pl/math/v_tanhf_2u6.c
@@ -0,0 +1,73 @@
+/*
+ * Single-precision vector tanh(x) function.
+ *
+ * Copyright (c) 2022-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "v_math.h"
+#include "pl_sig.h"
+#include "pl_test.h"
+
+#include "v_expm1f_inline.h"
+
+static const struct data
+{
+ struct v_expm1f_data expm1f_consts;
+ uint32x4_t boring_bound, large_bound, onef;
+} data = {
+ .expm1f_consts = V_EXPM1F_DATA,
+ /* 0x1.205966p+3, above which tanhf rounds to 1 (or -1 for negative). */
+ .boring_bound = V4 (0x41102cb3),
+ .large_bound = V4 (0x7f800000),
+ .onef = V4 (0x3f800000),
+};
+
+static float32x4_t NOINLINE VPCS_ATTR
+special_case (float32x4_t x, float32x4_t y, uint32x4_t special)
+{
+ return v_call_f32 (tanhf, x, y, special);
+}
+
+/* Approximation for single-precision vector tanh(x), using a simplified
+ version of expm1f. The maximum error is 2.58 ULP:
+ _ZGVnN4v_tanhf (0x1.fa5eep-5) got 0x1.f9ba02p-5
+ want 0x1.f9ba08p-5. */
+float32x4_t VPCS_ATTR V_NAME_F1 (tanh) (float32x4_t x)
+{
+ const struct data *d = ptr_barrier (&data);
+
+ uint32x4_t ix = vreinterpretq_u32_f32 (x);
+ float32x4_t ax = vabsq_f32 (x);
+ uint32x4_t iax = vreinterpretq_u32_f32 (ax);
+ uint32x4_t sign = veorq_u32 (ix, iax);
+ uint32x4_t is_boring = vcgtq_u32 (iax, d->boring_bound);
+ float32x4_t boring = vreinterpretq_f32_u32 (vorrq_u32 (sign, d->onef));
+
+#if WANT_SIMD_EXCEPT
+ /* If fp exceptions are to be triggered properly, set all special and boring
+ lanes to 0, which will trigger no exceptions, and fix them up later. */
+ uint32x4_t special = vorrq_u32 (vcgtq_u32 (iax, d->large_bound),
+ vcltq_u32 (iax, v_u32 (0x34000000)));
+ x = v_zerofy_f32 (x, is_boring);
+ if (unlikely (v_any_u32 (special)))
+ x = v_zerofy_f32 (x, special);
+#else
+ uint32x4_t special = vcgtq_u32 (iax, d->large_bound);
+#endif
+
+ /* tanh(x) = (e^2x - 1) / (e^2x + 1). */
+ float32x4_t q = expm1f_inline (vmulq_n_f32 (x, 2), &d->expm1f_consts);
+ float32x4_t y = vdivq_f32 (q, vaddq_f32 (q, v_f32 (2.0)));
+ if (unlikely (v_any_u32 (special)))
+ return special_case (vreinterpretq_f32_u32 (ix),
+ vbslq_f32 (is_boring, boring, y), special);
+ return vbslq_f32 (is_boring, boring, y);
+}
+
+PL_SIG (V, F, 1, tanh, -10.0, 10.0)
+PL_TEST_ULP (V_NAME_F1 (tanh), 2.09)
+PL_TEST_EXPECT_FENV (V_NAME_F1 (tanh), WANT_SIMD_EXCEPT)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0, 0x1p-23, 1000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1p-23, 0x1.205966p+3, 100000)
+PL_TEST_SYM_INTERVAL (V_NAME_F1 (tanh), 0x1.205966p+3, inf, 100)