float: Add f16 parsing and printing

tgross35 · tgross35 · commit 4a6ba24193fb · 2025-04-29T05:43:54.000Z
Use the existing Lemire (decimal -&gt; float) and Dragon / Grisu algorithms
(float -&gt; decimal) to add support for `f16`. This allows updating the
implementation for `Display` to the expected behavior for `Display`
(currently it prints the a hex bitwise representation) and adds a
`FromStr` implementation.
diff --git a/library/core/src/fmt/float.rs b/library/core/src/fmt/float.rs
@@ -20,6 +20,7 @@ macro_rules! impl_general_format {
     }
 }
 
+impl_general_format! { f16 }
 impl_general_format! { f32 f64 }
 
 // Don't inline this so callers don't use the stack space this function
@@ -229,15 +230,7 @@ macro_rules! floating {
     };
 }
 
-floating! { f32 f64 }
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl Debug for f16 {
-    #[inline]
-    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
-        write!(f, "{:#06x}", self.to_bits())
-    }
-}
+floating! { f16 f32 f64 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
 impl Debug for f128 {
diff --git a/library/core/src/num/dec2flt/float.rs b/library/core/src/num/dec2flt/float.rs
@@ -45,7 +45,7 @@ macro_rules! int {
     }
 }
 
-int!(u32, u64);
+int!(u16, u32, u64);
 
 /// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
 ///
@@ -189,9 +189,14 @@ pub trait RawFloat:
 
     /// Returns the mantissa, exponent and sign as integers.
     ///
-    /// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
-    /// For 0, the exponent will be `-(EXP_BIAS + SIG_BITS`, which is the
-    /// minimum subnormal power.
+    /// This returns `(m, p, s)` such that `s * m * 2^p` represents the original float. For 0, the
+    /// exponent will be `-(EXP_BIAS + SIG_BITS)`, which is the minimum subnormal power. For
+    /// infinity or NaN, the exponent will be `EXP_SAT - EXP_BIAS - SIG_BITS`.
+    ///
+    /// If subnormal, the mantissa will be shifted one bit to the left. Otherwise, it is returned
+    /// with the explicit bit set but otherwise unshifted
+    ///
+    /// `s` is only ever +/-1.
     fn integer_decode(self) -> (u64, i16, i8) {
         let bits = self.to_bits();
         let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
@@ -213,6 +218,49 @@ const fn pow2_to_pow10(a: i64) -> i64 {
     res as i64
 }
 
+impl RawFloat for f16 {
+    type Int = u16;
+
+    const INFINITY: Self = Self::INFINITY;
+    const NEG_INFINITY: Self = Self::NEG_INFINITY;
+    const NAN: Self = Self::NAN;
+    const NEG_NAN: Self = -Self::NAN;
+
+    const BITS: u32 = 16;
+    const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
+    const EXP_MASK: Self::Int = Self::EXP_MASK;
+    const SIG_MASK: Self::Int = Self::MAN_MASK;
+
+    const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -22;
+    const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 5;
+    const SMALLEST_POWER_OF_TEN: i32 = -27;
+
+    #[inline]
+    fn from_u64(v: u64) -> Self {
+        debug_assert!(v <= Self::MAX_MANTISSA_FAST_PATH);
+        v as _
+    }
+
+    #[inline]
+    fn from_u64_bits(v: u64) -> Self {
+        Self::from_bits((v & 0xFFFF) as u16)
+    }
+
+    fn pow10_fast_path(exponent: usize) -> Self {
+        #[allow(clippy::use_self)]
+        const TABLE: [f16; 8] = [1e0, 1e1, 1e2, 1e3, 1e4, 0.0, 0.0, 0.];
+        TABLE[exponent & 7]
+    }
+
+    fn to_bits(self) -> Self::Int {
+        self.to_bits()
+    }
+
+    fn classify(self) -> FpCategory {
+        self.classify()
+    }
+}
+
 impl RawFloat for f32 {
     type Int = u32;
 
diff --git a/library/core/src/num/dec2flt/mod.rs b/library/core/src/num/dec2flt/mod.rs
@@ -171,6 +171,8 @@ macro_rules! from_str_float_impl {
         }
     };
 }
+
+from_str_float_impl!(f16);
 from_str_float_impl!(f32);
 from_str_float_impl!(f64);
 
diff --git a/library/core/src/num/flt2dec/decoder.rs b/library/core/src/num/flt2dec/decoder.rs
@@ -45,6 +45,12 @@ pub trait DecodableFloat: RawFloat + Copy {
     fn min_pos_norm_value() -> Self;
 }
 
+impl DecodableFloat for f16 {
+    fn min_pos_norm_value() -> Self {
+        f16::MIN_POSITIVE
+    }
+}
+
 impl DecodableFloat for f32 {
     fn min_pos_norm_value() -> Self {
         f32::MIN_POSITIVE

Original file line number	Diff line number	Diff line change
`@@ -171,6 +171,8 @@ macro_rules! from_str_float_impl {`
`171`	`171`	`}`
`172`	`172`	`};`
`173`	`173`	`}`
	`174`	`+`
	`175`	`+from_str_float_impl!(f16);`
`174`	`176`	`from_str_float_impl!(f32);`
`175`	`177`	`from_str_float_impl!(f64);`
`176`	`178`