@@ -13,6 +13,19 @@ pub fn cbrt(x: f64) -> f64 {
13
13
cbrt_round ( x, Round :: Nearest ) . val
14
14
}
15
15
16
+ // /// Compute the cube root of the argument.
17
+ // #[cfg(f128_enabled)]
18
+ // #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
19
+ // pub fn cbrtf128(x: f128) -> f128 {
20
+ // cbrt_round(x, Round::Nearest).val
21
+ // }
22
+
23
+ /// Correctly rounded cube root.
24
+ ///
25
+ /// Algorithm:
26
+ /// - Minimax initial approximation
27
+ /// - `F`-sized newton iteration
28
+ /// - `2xF`-sized newton iteration
16
29
pub fn cbrt_round < F : Float + CbrtHelper > ( x : F , round : Round ) -> FpResult < F >
17
30
where
18
31
F :: Int : CastFrom < u64 > ,
@@ -25,69 +38,67 @@ where
25
38
let u1: F = F :: U1 ;
26
39
let off = F :: OFF ;
27
40
28
- /* rm=0 for rounding to nearest, and other values for directed roundings */
29
41
let hx = x. to_bits ( ) ;
30
42
let mut mant: F :: Int = hx & F :: SIG_MASK ;
31
43
let sign: F :: Int = hx & F :: SIGN_MASK ;
32
44
let neg = x. is_sign_negative ( ) ;
33
45
34
46
let mut e: u32 = x. exp ( ) ;
35
47
48
+ // Handle 0, infinity, NaN, and subnormals
36
49
if ( ( e + 1 ) & F :: EXP_SAT ) < 2 {
37
50
cold_path ( ) ;
38
51
39
52
let ix = hx & !F :: SIGN_MASK ;
40
53
41
- /* 0, inf, nan: we return x + x instead of simply x,
42
- to that for x a signaling NaN, it correctly triggers
43
- the invalid exception. */
44
54
if e == F :: EXP_SAT || ix == zero {
55
+ // 0, infinity, NaN; use x + x to trigger exceptions
45
56
return FpResult :: ok ( x + x) ;
46
57
}
47
58
48
- let nz = ix. leading_zeros ( ) - 11 ; /* subnormal */
59
+ // Normalize subnormals
60
+ let nz = ix. leading_zeros ( ) - F :: EXP_BITS ;
49
61
mant <<= nz;
50
62
mant &= F :: SIG_MASK ;
51
63
e = e. wrapping_sub ( nz - 1 ) ;
52
64
}
53
65
54
66
e = e. wrapping_add ( 3072 ) ;
55
- let cvt1 = mant | ( F :: Int :: cast_from ( F :: EXP_BIAS ) << F :: SIG_BITS ) ;
56
- let mut cvt5 = cvt1 ;
67
+ // Set the exponent to 0, z is now [1, 2)
68
+ let iz = mant | ( F :: Int :: cast_from ( F :: EXP_BIAS ) << F :: SIG_BITS ) ;
57
69
58
70
let et: u32 = e / 3 ;
59
71
let it: u32 = e % 3 ;
60
72
61
- /* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */
62
- cvt5 += F :: Int :: cast_from ( it ) << F :: SIG_BITS ;
63
- cvt5 |= sign;
64
- let zz: F = F :: from_bits ( cvt5 ) ;
73
+ // 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3
74
+ // `zz` is `x` reduced to [1, 8)
75
+ let izz = ( iz + ( F :: Int :: cast_from ( it ) << F :: SIG_BITS ) ) | sign;
76
+ let zz: F = F :: from_bits ( izz ) ;
65
77
66
78
/* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */
67
- let mut isc = F :: ESCALE [ it as usize ] . to_bits ( ) ; // todo: index
68
- isc |= sign;
69
- let cvt2 = isc;
70
- let z: F = F :: from_bits ( cvt1) ;
79
+ let isc = F :: ESCALE [ it as usize ] . to_bits ( ) | sign;
80
+ let z: F = F :: from_bits ( iz) ;
71
81
72
82
/* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3),
73
83
and 1 <= z < 2 */
74
84
let r: F = F :: ONE / z;
75
- let rr: F = r * F :: RSC [ ( ( it as usize ) << 1 ) | neg as usize ] ;
85
+ let rr: F = r * F :: RSCALE [ ( ( it as usize ) << 1 ) | neg as usize ] ;
76
86
let z2: F = z * z;
77
87
let c0: F = F :: C [ 0 ] + z * F :: C [ 1 ] ;
78
88
let c2: F = F :: C [ 2 ] + z * F :: C [ 3 ] ;
79
- let mut y: F = c0 + z2 * c2;
80
- let mut y2: F = y * y;
81
89
82
90
/* y is an approximation of z^(1/3) */
83
- let mut h: F = y2 * ( y * r) - F :: ONE ;
91
+ let mut y: F = c0 + z2 * c2;
92
+ let mut y2: F = y * y;
84
93
85
94
/* h determines the error between y and z^(1/3) */
86
- y -= ( h * y ) * ( u0 - u1 * h ) ;
95
+ let mut h : F = y2 * ( y * r ) - F :: ONE ;
87
96
88
97
/* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant
89
98
of Newton's method, with the function f(y) = 1-z/y^3. */
90
- y *= F :: from_bits ( cvt2) ;
99
+ y -= ( h * y) * ( u0 - u1 * h) ;
100
+
101
+ y *= F :: from_bits ( isc) ;
91
102
92
103
/* Now y is an approximation of zz^(1/3),
93
104
* and rr an approximation of 1/zz. We now perform another iteration of
@@ -194,7 +205,7 @@ pub trait CbrtHelper: Float {
194
205
const C : [ Self ; 4 ] ;
195
206
const U0 : Self ;
196
207
const U1 : Self ;
197
- const RSC : [ Self ; 6 ] ;
208
+ const RSCALE : [ Self ; 6 ] ;
198
209
const OFF : [ Self ; 4 ] ;
199
210
const WLIST : [ ( Self , Self ) ; 7 ] ;
200
211
const AZMAGIC1 : Self ;
@@ -212,17 +223,23 @@ impl CbrtHelper for f64 {
212
223
] ;
213
224
214
225
const C : [ Self ; 4 ] = [
226
+ // hf64!("0x1.1b850259b99ddp-1"),
227
+ // hf64!("0x1.2b9762efeffecp-1"),
228
+ // hf64!("-0x1.4af8eb64ea1ecp-3"),
229
+ // hf64!("0x1.7590cccfad50bp-6"),
215
230
hf64 ! ( "0x1.1b0babccfef9cp-1" ) ,
216
231
hf64 ! ( "0x1.2c9a3e94d1da5p-1" ) ,
217
232
hf64 ! ( "-0x1.4dc30b1a1ddbap-3" ) ,
218
233
hf64 ! ( "0x1.7a8d3e4ec9b07p-6" ) ,
219
234
] ;
220
235
236
+ // 0.33333333...
221
237
const U0 : Self = hf64 ! ( "0x1.5555555555555p-2" ) ;
222
238
239
+ // 0.22222222...
223
240
const U1 : Self = hf64 ! ( "0x1.c71c71c71c71cp-3" ) ;
224
241
225
- const RSC : [ Self ; 6 ] = [ 1.0 , -1.0 , 0.5 , -0.5 , 0.25 , -0.25 ] ;
242
+ const RSCALE : [ Self ; 6 ] = [ 1.0 , -1.0 , 0.5 , -0.5 , 0.25 , -0.25 ] ;
226
243
227
244
const OFF : [ Self ; 4 ] = [ hf64 ! ( "0x1p-53" ) , 0.0 , 0.0 , 0.0 ] ;
228
245
@@ -254,6 +271,51 @@ impl CbrtHelper for f64 {
254
271
}
255
272
}
256
273
274
+ #[ cfg( f128_enabled) ]
275
+ impl CbrtHelper for f128 {
276
+ const ESCALE : [ Self ; 3 ] = [
277
+ 1.0 ,
278
+ hf128 ! ( "0x1.428a2f98d728acf826cc8664b665p+0" ) , /* 2^(1/3) */
279
+ hf128 ! ( "0x1.965fea53d6e3c53be1ca3482bf3ap+0" ) , /* 2^(2/3) */
280
+ ] ;
281
+
282
+ const C : [ Self ; 4 ] = [
283
+ hf128 ! ( "0x1.1b850223b8bf644fcef50feeced1p-1" ) ,
284
+ hf128 ! ( "0x1.2b97635e9e17d5240965cb56dc73p-1" ) ,
285
+ hf128 ! ( "-0x1.4af8ec964bbc3767a6cf8ac456cbp-3" ) ,
286
+ hf128 ! ( "0x1.7590ceecbb8c4c40d8c5e8b64d6bp-6" ) ,
287
+ ] ;
288
+
289
+ const U0 : Self = 0.3333333333333333333333333333333333333333 ;
290
+
291
+ const U1 : Self = 0.2222222222222222222222222222222222222222 ;
292
+
293
+ const RSCALE : [ Self ; 6 ] = [ 1.0 , -1.0 , 0.5 , -0.5 , 0.25 , -0.25 ] ;
294
+
295
+ const OFF : [ Self ; 4 ] = [ hf128 ! ( "0x1p-53" ) , 0.0 , 0.0 , 0.0 ] ;
296
+
297
+ // Other rounding modes unsupported for f128
298
+ const WLIST : [ ( Self , Self ) ; 7 ] =
299
+ [ ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) ] ;
300
+
301
+ const AZMAGIC1 : Self = hf128 ! ( "0x1.9b78223aa307cp+1" ) ;
302
+ const AZMAGIC2 : Self = hf128 ! ( "0x1.79d15d0e8d59cp+0" ) ;
303
+ const AZMAGIC3 : Self = hf128 ! ( "0x1.a202bfc89ddffp+2" ) ;
304
+ const AZMAGIC4 : Self = hf128 ! ( "0x1.de87aa837820fp+0" ) ;
305
+
306
+ fn fma ( self , y : Self , z : Self ) -> Self {
307
+ #[ cfg( intrinsics_enabled) ]
308
+ {
309
+ return unsafe { core:: intrinsics:: fmaf128 ( self , y, z) } ;
310
+ }
311
+
312
+ #[ cfg( not( intrinsics_enabled) ) ]
313
+ {
314
+ return super :: fmaf128 ( self , y, z) ;
315
+ }
316
+ }
317
+ }
318
+
257
319
#[ cfg( test) ]
258
320
mod tests {
259
321
use super :: * ;
0 commit comments