@@ -45,7 +45,7 @@ macro_rules! int {
45
45
}
46
46
}
47
47
48
- int ! ( u32 , u64 ) ;
48
+ int ! ( u16 , u32 , u64 ) ;
49
49
50
50
/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
51
51
///
@@ -189,9 +189,14 @@ pub trait RawFloat:
189
189
190
190
/// Returns the mantissa, exponent and sign as integers.
191
191
///
192
- /// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
193
- /// For 0, the exponent will be `-(EXP_BIAS + SIG_BITS`, which is the
194
- /// minimum subnormal power.
192
+ /// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the
193
+ /// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For
194
+ /// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`.
195
+ ///
196
+ /// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned
197
+ /// with the explicit bit set but otherwise unshifted
198
+ ///
199
+ /// `s` is only ever +/-1.
195
200
fn integer_decode ( self ) -> ( u64 , i16 , i8 ) {
196
201
let bits = self . to_bits ( ) ;
197
202
let sign: i8 = if bits >> ( Self :: BITS - 1 ) == Self :: Int :: ZERO { 1 } else { -1 } ;
@@ -213,6 +218,49 @@ const fn pow2_to_pow10(a: i64) -> i64 {
213
218
res as i64
214
219
}
215
220
221
+ impl RawFloat for f16 {
222
+ type Int = u16 ;
223
+
224
+ const INFINITY : Self = Self :: INFINITY ;
225
+ const NEG_INFINITY : Self = Self :: NEG_INFINITY ;
226
+ const NAN : Self = Self :: NAN ;
227
+ const NEG_NAN : Self = -Self :: NAN ;
228
+
229
+ const BITS : u32 = 16 ;
230
+ const SIG_TOTAL_BITS : u32 = Self :: MANTISSA_DIGITS ;
231
+ const EXP_MASK : Self :: Int = Self :: EXP_MASK ;
232
+ const SIG_MASK : Self :: Int = Self :: MAN_MASK ;
233
+
234
+ const MIN_EXPONENT_ROUND_TO_EVEN : i32 = -22 ;
235
+ const MAX_EXPONENT_ROUND_TO_EVEN : i32 = 5 ;
236
+ const SMALLEST_POWER_OF_TEN : i32 = -27 ;
237
+
238
+ #[ inline]
239
+ fn from_u64 ( v : u64 ) -> Self {
240
+ debug_assert ! ( v <= Self :: MAX_MANTISSA_FAST_PATH ) ;
241
+ v as _
242
+ }
243
+
244
+ #[ inline]
245
+ fn from_u64_bits ( v : u64 ) -> Self {
246
+ Self :: from_bits ( ( v & 0xFFFF ) as u16 )
247
+ }
248
+
249
+ fn pow10_fast_path ( exponent : usize ) -> Self {
250
+ #[ allow( clippy:: use_self) ]
251
+ const TABLE : [ f16 ; 8 ] = [ 1e0 , 1e1 , 1e2 , 1e3 , 1e4 , 0.0 , 0.0 , 0. ] ;
252
+ TABLE [ exponent & 7 ]
253
+ }
254
+
255
+ fn to_bits ( self ) -> Self :: Int {
256
+ self . to_bits ( )
257
+ }
258
+
259
+ fn classify ( self ) -> FpCategory {
260
+ self . classify ( )
261
+ }
262
+ }
263
+
216
264
impl RawFloat for f32 {
217
265
type Int = u32 ;
218
266
0 commit comments