Skip to content

Commit 8bacfef

Browse files
authored
Merge branch 'main' into pfm
2 parents d49e0ec + 2a39744 commit 8bacfef

File tree

1 file changed

+101
-0
lines changed

1 file changed

+101
-0
lines changed

jxl/src/util/fast_math.rs

+101
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,59 @@
55

66
#![allow(clippy::excessive_precision)]
77

8+
use std::f32::consts::{PI, SQRT_2};
9+
810
use super::eval_rational_poly;
911

1012
const POW2F_NUMER_COEFFS: [f32; 3] = [1.01749063e1, 4.88687798e1, 9.85506591e1];
1113
const POW2F_DENOM_COEFFS: [f32; 4] = [2.10242958e-1, -2.22328856e-2, -1.94414990e1, 9.85506633e1];
1214

15+
#[inline]
16+
pub fn fast_cos(x: f32) -> f32 {
17+
// Step 1: range reduction to [0, 2pi)
18+
let pi2 = PI * 2.0;
19+
let pi2_inv = 0.5 / PI;
20+
let npi2 = (x * pi2_inv).floor() * pi2;
21+
let xmodpi2 = x - npi2;
22+
// Step 2: range reduction to [0, pi]
23+
let x_pi = xmodpi2.min(pi2 - xmodpi2);
24+
// Step 3: range reduction to [0, pi/2]
25+
let above_pihalf = x_pi >= PI / 2.0;
26+
let x_pihalf = if above_pihalf { PI - x_pi } else { x_pi };
27+
// Step 4: Taylor-like approximation, scaled by 2**0.75 to make angle
28+
// duplication steps faster, on x/4.
29+
let xs = x_pihalf * 0.25;
30+
let x2 = xs * xs;
31+
let x4 = x2 * x2;
32+
let cosx_prescaling = x4 * 0.06960438 + (x2 * -0.84087373 + 1.68179268);
33+
// Step 5: angle duplication.
34+
let cosx_scale1 = cosx_prescaling * cosx_prescaling - SQRT_2;
35+
let cosx_scale2 = cosx_scale1 * cosx_scale1 - 1.0;
36+
// Step 6: change sign if needed.
37+
if above_pihalf {
38+
-cosx_scale2
39+
} else {
40+
cosx_scale2
41+
}
42+
}
43+
44+
#[inline]
45+
pub fn fast_erff(x: f32) -> f32 {
46+
// Formula from
47+
// https://en.wikipedia.org/wiki/Error_function#Numerical_approximations
48+
// but constants have been recomputed.
49+
let absx = x.abs();
50+
// Compute 1 - 1 / ((((x * a + b) * x + c) * x + d) * x + 1)**4
51+
let denom1 = absx * 7.77394369e-02 + 2.05260015e-04;
52+
let denom2 = denom1 * absx + 2.32120216e-01;
53+
let denom3 = denom2 * absx + 2.77820801e-01;
54+
let denom4 = denom3 * absx + 1.0;
55+
let denom5 = denom4 * denom4;
56+
let inv_denom5 = 1.0 / denom5;
57+
let result = -inv_denom5 * inv_denom5 + 1.0;
58+
result.copysign(x)
59+
}
60+
1361
#[inline]
1462
pub fn fast_pow2f(x: f32) -> f32 {
1563
let x_floor = x.floor();
@@ -61,8 +109,61 @@ pub fn fast_powf(base: f32, exp: f32) -> f32 {
61109
mod test {
62110
use test_log::test;
63111

112+
use crate::util::test::assert_almost_eq;
113+
64114
use super::*;
65115

116+
#[test]
117+
fn test_fast_erff() {
118+
// Golden data copied from https://en.wikipedia.org/wiki/Error_function#Table_of_values.
119+
let golden = [
120+
(0.0, 0.0),
121+
(0.02, 0.022564575),
122+
(0.04, 0.045111106),
123+
(0.06, 0.067621594),
124+
(0.08, 0.090078126),
125+
(0.1, 0.112462916),
126+
(0.2, 0.222702589),
127+
(0.3, 0.328626759),
128+
(0.4, 0.428392355),
129+
(0.5, 0.520499878),
130+
(0.6, 0.603856091),
131+
(0.7, 0.677801194),
132+
(0.8, 0.742100965),
133+
(0.9, 0.796908212),
134+
(1.0, 0.842700793),
135+
(1.1, 0.880205070),
136+
(1.2, 0.910313978),
137+
(1.3, 0.934007945),
138+
(1.4, 0.952285120),
139+
(1.5, 0.966105146),
140+
(1.6, 0.976348383),
141+
(1.7, 0.983790459),
142+
(1.8, 0.989090502),
143+
(1.9, 0.992790429),
144+
(2.0, 0.995322265),
145+
(2.1, 0.997020533),
146+
(2.2, 0.998137154),
147+
(2.3, 0.998856823),
148+
(2.4, 0.999311486),
149+
(2.5, 0.999593048),
150+
(3.0, 0.999977910),
151+
(3.5, 0.999999257),
152+
];
153+
for (x, erf_x) in golden {
154+
assert_almost_eq!(fast_erff(x), erf_x, 6e-4);
155+
assert_almost_eq!(fast_erff(-x), -erf_x, 6e-4);
156+
}
157+
}
158+
159+
#[test]
160+
fn test_fast_cos() {
161+
for i in 0..100 {
162+
let x = i as f32 / 100.0 * (5.0 * PI) - (2.5 * PI);
163+
assert_almost_eq!(fast_cos(x), x.cos(), 1e-4);
164+
}
165+
}
166+
66167
#[test]
67168
fn fast_powf_arb() {
68169
arbtest::arbtest(|u| {

0 commit comments

Comments
 (0)