|
5 | 5 |
|
6 | 6 | #![allow(clippy::excessive_precision)]
|
7 | 7 |
|
| 8 | +use std::f32::consts::{PI, SQRT_2}; |
| 9 | + |
8 | 10 | use super::eval_rational_poly;
|
9 | 11 |
|
10 | 12 | const POW2F_NUMER_COEFFS: [f32; 3] = [1.01749063e1, 4.88687798e1, 9.85506591e1];
|
11 | 13 | const POW2F_DENOM_COEFFS: [f32; 4] = [2.10242958e-1, -2.22328856e-2, -1.94414990e1, 9.85506633e1];
|
12 | 14 |
|
| 15 | +#[inline] |
| 16 | +pub fn fast_cos(x: f32) -> f32 { |
| 17 | + // Step 1: range reduction to [0, 2pi) |
| 18 | + let pi2 = PI * 2.0; |
| 19 | + let pi2_inv = 0.5 / PI; |
| 20 | + let npi2 = (x * pi2_inv).floor() * pi2; |
| 21 | + let xmodpi2 = x - npi2; |
| 22 | + // Step 2: range reduction to [0, pi] |
| 23 | + let x_pi = xmodpi2.min(pi2 - xmodpi2); |
| 24 | + // Step 3: range reduction to [0, pi/2] |
| 25 | + let above_pihalf = x_pi >= PI / 2.0; |
| 26 | + let x_pihalf = if above_pihalf { PI - x_pi } else { x_pi }; |
| 27 | + // Step 4: Taylor-like approximation, scaled by 2**0.75 to make angle |
| 28 | + // duplication steps faster, on x/4. |
| 29 | + let xs = x_pihalf * 0.25; |
| 30 | + let x2 = xs * xs; |
| 31 | + let x4 = x2 * x2; |
| 32 | + let cosx_prescaling = x4 * 0.06960438 + (x2 * -0.84087373 + 1.68179268); |
| 33 | + // Step 5: angle duplication. |
| 34 | + let cosx_scale1 = cosx_prescaling * cosx_prescaling - SQRT_2; |
| 35 | + let cosx_scale2 = cosx_scale1 * cosx_scale1 - 1.0; |
| 36 | + // Step 6: change sign if needed. |
| 37 | + if above_pihalf { |
| 38 | + -cosx_scale2 |
| 39 | + } else { |
| 40 | + cosx_scale2 |
| 41 | + } |
| 42 | +} |
| 43 | + |
| 44 | +#[inline] |
| 45 | +pub fn fast_erff(x: f32) -> f32 { |
| 46 | + // Formula from |
| 47 | + // https://en.wikipedia.org/wiki/Error_function#Numerical_approximations |
| 48 | + // but constants have been recomputed. |
| 49 | + let absx = x.abs(); |
| 50 | + // Compute 1 - 1 / ((((x * a + b) * x + c) * x + d) * x + 1)**4 |
| 51 | + let denom1 = absx * 7.77394369e-02 + 2.05260015e-04; |
| 52 | + let denom2 = denom1 * absx + 2.32120216e-01; |
| 53 | + let denom3 = denom2 * absx + 2.77820801e-01; |
| 54 | + let denom4 = denom3 * absx + 1.0; |
| 55 | + let denom5 = denom4 * denom4; |
| 56 | + let inv_denom5 = 1.0 / denom5; |
| 57 | + let result = -inv_denom5 * inv_denom5 + 1.0; |
| 58 | + result.copysign(x) |
| 59 | +} |
| 60 | + |
13 | 61 | #[inline]
|
14 | 62 | pub fn fast_pow2f(x: f32) -> f32 {
|
15 | 63 | let x_floor = x.floor();
|
@@ -61,8 +109,61 @@ pub fn fast_powf(base: f32, exp: f32) -> f32 {
|
61 | 109 | mod test {
|
62 | 110 | use test_log::test;
|
63 | 111 |
|
| 112 | + use crate::util::test::assert_almost_eq; |
| 113 | + |
64 | 114 | use super::*;
|
65 | 115 |
|
| 116 | + #[test] |
| 117 | + fn test_fast_erff() { |
| 118 | + // Golden data copied from https://en.wikipedia.org/wiki/Error_function#Table_of_values. |
| 119 | + let golden = [ |
| 120 | + (0.0, 0.0), |
| 121 | + (0.02, 0.022564575), |
| 122 | + (0.04, 0.045111106), |
| 123 | + (0.06, 0.067621594), |
| 124 | + (0.08, 0.090078126), |
| 125 | + (0.1, 0.112462916), |
| 126 | + (0.2, 0.222702589), |
| 127 | + (0.3, 0.328626759), |
| 128 | + (0.4, 0.428392355), |
| 129 | + (0.5, 0.520499878), |
| 130 | + (0.6, 0.603856091), |
| 131 | + (0.7, 0.677801194), |
| 132 | + (0.8, 0.742100965), |
| 133 | + (0.9, 0.796908212), |
| 134 | + (1.0, 0.842700793), |
| 135 | + (1.1, 0.880205070), |
| 136 | + (1.2, 0.910313978), |
| 137 | + (1.3, 0.934007945), |
| 138 | + (1.4, 0.952285120), |
| 139 | + (1.5, 0.966105146), |
| 140 | + (1.6, 0.976348383), |
| 141 | + (1.7, 0.983790459), |
| 142 | + (1.8, 0.989090502), |
| 143 | + (1.9, 0.992790429), |
| 144 | + (2.0, 0.995322265), |
| 145 | + (2.1, 0.997020533), |
| 146 | + (2.2, 0.998137154), |
| 147 | + (2.3, 0.998856823), |
| 148 | + (2.4, 0.999311486), |
| 149 | + (2.5, 0.999593048), |
| 150 | + (3.0, 0.999977910), |
| 151 | + (3.5, 0.999999257), |
| 152 | + ]; |
| 153 | + for (x, erf_x) in golden { |
| 154 | + assert_almost_eq!(fast_erff(x), erf_x, 6e-4); |
| 155 | + assert_almost_eq!(fast_erff(-x), -erf_x, 6e-4); |
| 156 | + } |
| 157 | + } |
| 158 | + |
| 159 | + #[test] |
| 160 | + fn test_fast_cos() { |
| 161 | + for i in 0..100 { |
| 162 | + let x = i as f32 / 100.0 * (5.0 * PI) - (2.5 * PI); |
| 163 | + assert_almost_eq!(fast_cos(x), x.cos(), 1e-4); |
| 164 | + } |
| 165 | + } |
| 166 | + |
66 | 167 | #[test]
|
67 | 168 | fn fast_powf_arb() {
|
68 | 169 | arbtest::arbtest(|u| {
|
|
0 commit comments