Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 0a4caf2

Browse files
committed
cleanup
1 parent 57c0dec commit 0a4caf2

File tree

2 files changed

+86
-23
lines changed

2 files changed

+86
-23
lines changed

src/math/cbrt.rs

Lines changed: 85 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,19 @@ pub fn cbrt(x: f64) -> f64 {
1313
cbrt_round(x, Round::Nearest).val
1414
}
1515

16+
// /// Compute the cube root of the argument.
17+
// #[cfg(f128_enabled)]
18+
// #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
19+
// pub fn cbrtf128(x: f128) -> f128 {
20+
// cbrt_round(x, Round::Nearest).val
21+
// }
22+
23+
/// Correctly rounded cube root.
24+
///
25+
/// Algorithm:
26+
/// - Minimax initial approximation
27+
/// - `F`-sized newton iteration
28+
/// - `2xF`-sized newton iteration
1629
pub fn cbrt_round<F: Float + CbrtHelper>(x: F, round: Round) -> FpResult<F>
1730
where
1831
F::Int: CastFrom<u64>,
@@ -25,69 +38,67 @@ where
2538
let u1: F = F::U1;
2639
let off = F::OFF;
2740

28-
/* rm=0 for rounding to nearest, and other values for directed roundings */
2941
let hx = x.to_bits();
3042
let mut mant: F::Int = hx & F::SIG_MASK;
3143
let sign: F::Int = hx & F::SIGN_MASK;
3244
let neg = x.is_sign_negative();
3345

3446
let mut e: u32 = x.exp();
3547

48+
// Handle 0, infinity, NaN, and subnormals
3649
if ((e + 1) & F::EXP_SAT) < 2 {
3750
cold_path();
3851

3952
let ix = hx & !F::SIGN_MASK;
4053

41-
/* 0, inf, nan: we return x + x instead of simply x,
42-
to that for x a signaling NaN, it correctly triggers
43-
the invalid exception. */
4454
if e == F::EXP_SAT || ix == zero {
55+
// 0, infinity, NaN; use x + x to trigger exceptions
4556
return FpResult::ok(x + x);
4657
}
4758

48-
let nz = ix.leading_zeros() - 11; /* subnormal */
59+
// Normalize subnormals
60+
let nz = ix.leading_zeros() - F::EXP_BITS;
4961
mant <<= nz;
5062
mant &= F::SIG_MASK;
5163
e = e.wrapping_sub(nz - 1);
5264
}
5365

5466
e = e.wrapping_add(3072);
55-
let cvt1 = mant | (F::Int::cast_from(F::EXP_BIAS) << F::SIG_BITS);
56-
let mut cvt5 = cvt1;
67+
// Set the exponent to 0, z is now [1, 2)
68+
let iz = mant | (F::Int::cast_from(F::EXP_BIAS) << F::SIG_BITS);
5769

5870
let et: u32 = e / 3;
5971
let it: u32 = e % 3;
6072

61-
/* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */
62-
cvt5 += F::Int::cast_from(it) << F::SIG_BITS;
63-
cvt5 |= sign;
64-
let zz: F = F::from_bits(cvt5);
73+
// 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3
74+
// `zz` is `x` reduced to [1, 8)
75+
let izz = (iz + (F::Int::cast_from(it) << F::SIG_BITS)) | sign;
76+
let zz: F = F::from_bits(izz);
6577

6678
/* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */
67-
let mut isc = F::ESCALE[it as usize].to_bits(); // todo: index
68-
isc |= sign;
69-
let cvt2 = isc;
70-
let z: F = F::from_bits(cvt1);
79+
let isc = F::ESCALE[it as usize].to_bits() | sign;
80+
let z: F = F::from_bits(iz);
7181

7282
/* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3),
7383
and 1 <= z < 2 */
7484
let r: F = F::ONE / z;
75-
let rr: F = r * F::RSC[((it as usize) << 1) | neg as usize];
85+
let rr: F = r * F::RSCALE[((it as usize) << 1) | neg as usize];
7686
let z2: F = z * z;
7787
let c0: F = F::C[0] + z * F::C[1];
7888
let c2: F = F::C[2] + z * F::C[3];
79-
let mut y: F = c0 + z2 * c2;
80-
let mut y2: F = y * y;
8189

8290
/* y is an approximation of z^(1/3) */
83-
let mut h: F = y2 * (y * r) - F::ONE;
91+
let mut y: F = c0 + z2 * c2;
92+
let mut y2: F = y * y;
8493

8594
/* h determines the error between y and z^(1/3) */
86-
y -= (h * y) * (u0 - u1 * h);
95+
let mut h: F = y2 * (y * r) - F::ONE;
8796

8897
/* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant
8998
of Newton's method, with the function f(y) = 1-z/y^3. */
90-
y *= F::from_bits(cvt2);
99+
y -= (h * y) * (u0 - u1 * h);
100+
101+
y *= F::from_bits(isc);
91102

92103
/* Now y is an approximation of zz^(1/3),
93104
* and rr an approximation of 1/zz. We now perform another iteration of
@@ -194,7 +205,7 @@ pub trait CbrtHelper: Float {
194205
const C: [Self; 4];
195206
const U0: Self;
196207
const U1: Self;
197-
const RSC: [Self; 6];
208+
const RSCALE: [Self; 6];
198209
const OFF: [Self; 4];
199210
const WLIST: [(Self, Self); 7];
200211
const AZMAGIC1: Self;
@@ -212,17 +223,23 @@ impl CbrtHelper for f64 {
212223
];
213224

214225
const C: [Self; 4] = [
226+
// hf64!("0x1.1b850259b99ddp-1"),
227+
// hf64!("0x1.2b9762efeffecp-1"),
228+
// hf64!("-0x1.4af8eb64ea1ecp-3"),
229+
// hf64!("0x1.7590cccfad50bp-6"),
215230
hf64!("0x1.1b0babccfef9cp-1"),
216231
hf64!("0x1.2c9a3e94d1da5p-1"),
217232
hf64!("-0x1.4dc30b1a1ddbap-3"),
218233
hf64!("0x1.7a8d3e4ec9b07p-6"),
219234
];
220235

236+
// 0.33333333...
221237
const U0: Self = hf64!("0x1.5555555555555p-2");
222238

239+
// 0.22222222...
223240
const U1: Self = hf64!("0x1.c71c71c71c71cp-3");
224241

225-
const RSC: [Self; 6] = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25];
242+
const RSCALE: [Self; 6] = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25];
226243

227244
const OFF: [Self; 4] = [hf64!("0x1p-53"), 0.0, 0.0, 0.0];
228245

@@ -254,6 +271,51 @@ impl CbrtHelper for f64 {
254271
}
255272
}
256273

274+
#[cfg(f128_enabled)]
275+
impl CbrtHelper for f128 {
276+
const ESCALE: [Self; 3] = [
277+
1.0,
278+
hf128!("0x1.428a2f98d728acf826cc8664b665p+0"), /* 2^(1/3) */
279+
hf128!("0x1.965fea53d6e3c53be1ca3482bf3ap+0"), /* 2^(2/3) */
280+
];
281+
282+
const C: [Self; 4] = [
283+
hf128!("0x1.1b850223b8bf644fcef50feeced1p-1"),
284+
hf128!("0x1.2b97635e9e17d5240965cb56dc73p-1"),
285+
hf128!("-0x1.4af8ec964bbc3767a6cf8ac456cbp-3"),
286+
hf128!("0x1.7590ceecbb8c4c40d8c5e8b64d6bp-6"),
287+
];
288+
289+
const U0: Self = 0.3333333333333333333333333333333333333333;
290+
291+
const U1: Self = 0.2222222222222222222222222222222222222222;
292+
293+
const RSCALE: [Self; 6] = [1.0, -1.0, 0.5, -0.5, 0.25, -0.25];
294+
295+
const OFF: [Self; 4] = [hf128!("0x1p-53"), 0.0, 0.0, 0.0];
296+
297+
// Other rounding modes unsupported for f128
298+
const WLIST: [(Self, Self); 7] =
299+
[(0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0), (0.0, 0.0)];
300+
301+
const AZMAGIC1: Self = hf128!("0x1.9b78223aa307cp+1");
302+
const AZMAGIC2: Self = hf128!("0x1.79d15d0e8d59cp+0");
303+
const AZMAGIC3: Self = hf128!("0x1.a202bfc89ddffp+2");
304+
const AZMAGIC4: Self = hf128!("0x1.de87aa837820fp+0");
305+
306+
fn fma(self, y: Self, z: Self) -> Self {
307+
#[cfg(intrinsics_enabled)]
308+
{
309+
return unsafe { core::intrinsics::fmaf128(self, y, z) };
310+
}
311+
312+
#[cfg(not(intrinsics_enabled))]
313+
{
314+
return super::fmaf128(self, y, z);
315+
}
316+
}
317+
}
318+
257319
#[cfg(test)]
258320
mod tests {
259321
use super::*;

src/math/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ cfg_if! {
390390
mod truncf128;
391391
// verify-sorted-end
392392

393+
// pub use self::cbrt::cbrtf128;
393394
// verify-sorted-start
394395
pub use self::ceilf128::ceilf128;
395396
pub use self::copysignf128::copysignf128;

0 commit comments

Comments
 (0)