Add NaN-in-negative-zero formats

eduardosm · eduardosm · commit 38e61cd0ad07 · 2023-10-14T19:30:50.000+02:00
Upstream LLVM commit: 6109e70c72fc5171d25c4467fc3cfe6eb2029f50

Adds Float8E5M2FNUZ and Float8E4M3FNUZ formats, where NaN is represented as negative zero
diff --git a/Cargo.toml b/Cargo.toml
@@ -2,7 +2,7 @@
 members = ["fuzz"]
 
 [workspace.package]
-version = "0.2.0+llvm-038f7debfda0"
+version = "0.2.0+llvm-6109e70c72fc"
 edition = "2021"
 license = "Apache-2.0 WITH LLVM-exception"
 
diff --git a/src/ieee.rs b/src/ieee.rs
@@ -81,13 +81,40 @@ pub enum NonfiniteBehavior {
     /// significand bits are all zero, and NaN otherwise
     IEEE754,
 
-    /// Only the Float8E5M2 has this behavior. There is no Inf representation. A
-    /// value is NaN if the exponent field and the mantissa field are all 1s.
+    /// This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
+    /// Float8E5M2FNUZ, and Float8E4M3FNUZ). There is no representation for Inf,
+    /// and operations that would ordinarily produce Inf produce NaN instead.
+    /// The details of the NaN representation(s) in this form are determined by the
+    /// `NanEncoding` enum. We treat all NaNs as quiet, as the available
+    /// encodings do not distinguish between signalling and quiet NaN.
+    NanOnly,
+}
+
+/// How NaN values are represented.
+///
+/// This is curently only used in combination with `NonfiniteBehavior::NanOnly`,
+/// and using a variant other than IEEE while having IEEE non-finite behavior is
+/// liable to lead to unexpected results.
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum NanEncoding {
+    /// Represents the standard IEEE behavior where a value is NaN if its
+    /// exponent is all 1s and the significand is non-zero.
+    IEEE,
+
+    /// Represents the behavior in the Float8E4M3 floating point type where NaN is
+    /// represented by having the exponent and mantissa set to all 1s.
     /// This behavior matches the FP8 E4M3 type described in
     /// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
     /// as non-signalling, although the paper does not state whether the NaN
     /// values are signalling or not.
-    NanOnly,
+    AllOnes,
+
+    /// Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
+    /// where NaN is represented by a sign bit of 1 and all 0s in the exponent
+    /// and mantissa (i.e. the negative zero encoding in a IEEE float). Since
+    /// there is only one NaN value, it is treated as quiet NaN. This matches the
+    /// behavior described in https://arxiv.org/abs/2206.02915 .
+    NegativeZero,
 }
 
 // HACK(eddyb) extension method flipping/changing the sign based on `bool`s.
@@ -123,6 +150,9 @@ pub trait Semantics: Sized {
     /// How the nonfinite values Inf and NaN are represented.
     const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::IEEE754;
 
+    /// How NaN values are represented.
+    const NAN_ENCODING: NanEncoding = NanEncoding::IEEE;
+
     /// The largest E such that 2^E is representable; this matches the
     /// definition of IEEE 754.
     const MAX_EXP: ExpInt = {
@@ -144,9 +174,10 @@ pub trait Semantics: Sized {
     /// The base significand bitpattern of NaNs, i.e. the bits that must always
     /// be set in all NaNs, with other significand bits being either used for
     /// payload bits (if `NAN_PAYLOAD_MASK` covers them) or always unset.
-    const NAN_SIGNIFICAND_BASE: Limb = match Self::NONFINITE_BEHAVIOR {
-        NonfiniteBehavior::IEEE754 => 0,
-        NonfiniteBehavior::NanOnly => (1 << (Self::PRECISION - 1)) - 1,
+    const NAN_SIGNIFICAND_BASE: Limb = match Self::NAN_ENCODING {
+        NanEncoding::IEEE => 0,
+        NanEncoding::AllOnes => (1 << (Self::PRECISION - 1)) - 1,
+        NanEncoding::NegativeZero => 0,
     };
 
     /// The significand bitmask for the payload of a NaN (if supported),
@@ -303,12 +334,41 @@ ieee_semantics! {
     // layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
     Float8E5M2 = Float8E5M2S(8:5),
 
+    // 8-bit floating point number mostly following IEEE-754 conventions
+    // and bit layout S1E5M2 described in https://arxiv.org/abs/2206.02915,
+    // with expanded range and with no infinity or signed zero.
+    // NaN is represnted as negative zero. (FN -> Finite, UZ -> unsigned zero).
+    // This format's exponent bias is 16, instead of the 15 (2 ** (5 - 1) - 1)
+    // that IEEE precedent would imply.
+    Float8E5M2FNUZ = Float8E5M2FNUZS(8:5) {
+        const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly;
+        const NAN_ENCODING: NanEncoding = NanEncoding::NegativeZero;
+
+        const MAX_EXP: ExpInt = 15;
+        const MIN_EXP: ExpInt = -15;
+    },
+
     // 8-bit floating point number mostly following IEEE-754 conventions with
     // bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
     // Unlike IEEE-754 types, there are no infinity values, and NaN is
     // represented with the exponent and mantissa bits set to all 1s.
     Float8E4M3FN = Float8E4M3FNS(8:4) {
         const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly;
+        const NAN_ENCODING: NanEncoding = NanEncoding::AllOnes;
+    },
+
+    // 8-bit floating point number mostly following IEEE-754 conventions
+    // and bit layout S1E4M3 described in https://arxiv.org/abs/2206.02915,
+    // with expanded range and with no infinity or signed zero.
+    // NaN is represnted as negative zero. (FN -> Finite, UZ -> unsigned zero).
+    // This format's exponent bias is 8, instead of the 7 (2 ** (4 - 1) - 1)
+    // that IEEE precedent would imply.
+    Float8E4M3FNUZ = Float8E4M3FNUZS(8:4) {
+        const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly;
+        const NAN_ENCODING: NanEncoding = NanEncoding::NegativeZero;
+
+        const MAX_EXP: ExpInt = 7;
+        const MIN_EXP: ExpInt = -7;
     },
 }
 
@@ -454,7 +514,10 @@ impl<S: Semantics> PartialOrd for IeeeFloat<S> {
 impl<S: Semantics> Neg for IeeeFloat<S> {
     type Output = Self;
     fn neg(mut self) -> Self {
-        self.read_only_sign_do_not_mutate = !self.is_negative();
+        if S::NAN_ENCODING != NanEncoding::NegativeZero || (!self.is_nan() && !self.is_zero()) {
+            // If NaN is encoded as negative zero, avoid converting NaN to zero or vieceversa.
+            self.read_only_sign_do_not_mutate = !self.is_negative();
+        }
         self
     }
 }
@@ -854,16 +917,17 @@ impl<S: Semantics> IeeeFloat<S> {
                 None => 0,
             }];
 
-        let exp = match S::NONFINITE_BEHAVIOR {
-            NonfiniteBehavior::IEEE754 => S::MAX_EXP + 1,
-            NonfiniteBehavior::NanOnly => S::MAX_EXP,
+        let (exp, sign) = match S::NAN_ENCODING {
+            NanEncoding::IEEE => (S::MAX_EXP + 1, false),
+            NanEncoding::AllOnes => (S::MAX_EXP, false),
+            NanEncoding::NegativeZero => (S::MIN_EXP - 1, true),
         };
 
         IeeeFloat {
             sig,
             exp,
             read_only_category_do_not_mutate: Category::NaN,
-            read_only_sign_do_not_mutate: false,
+            read_only_sign_do_not_mutate: sign,
             marker: PhantomData,
         }
     }
@@ -928,15 +992,15 @@ impl<S: Semantics> Float for IeeeFloat<S> {
         //   significand = 1..1
         IeeeFloat {
             sig: [((1 << S::PRECISION) - 1)
-                & match S::NONFINITE_BEHAVIOR {
+                & match S::NAN_ENCODING {
                     // The largest number by magnitude in our format will be the floating point
                     // number with maximum exponent and with significand that is all ones.
-                    NonfiniteBehavior::IEEE754 => !0,
+                    NanEncoding::IEEE | NanEncoding::NegativeZero => !0,
 
                     // The largest number by magnitude in our format will be the floating point
                     // number with maximum exponent and with significand that is all ones except
                     // the LSB.
-                    NonfiniteBehavior::NanOnly => !1,
+                    NanEncoding::AllOnes => !1,
                 }],
             exp: S::MAX_EXP,
             read_only_category_do_not_mutate: Category::Normal,
@@ -2053,10 +2117,12 @@ impl<S: Semantics> IeeeFloat<S> {
             }
         }
 
-        // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
+        // The all-ones values is an overflow if NaN is all ones. If NaN is
+        // represented by negative zero, then it is a valid finite value.
+        // NOTE(eddyb) for `NanEncoding::AllOnes`, the unique `NAN` takes up
         // the largest significand of `MAX_EXP` (which also has normals), though
         // comparing significands needs to ignore the integer bit `NAN` lacks.
-        if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly
+        if S::NAN_ENCODING == NanEncoding::AllOnes
             && self.exp == Self::NAN.exp
             && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig
         {
@@ -2101,10 +2167,12 @@ impl<S: Semantics> IeeeFloat<S> {
                 return Status::INEXACT.and(self);
             }
 
-            // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
+            // The all-ones values is an overflow if NaN is all ones. If NaN is
+            // represented by negative zero, then it is a valid finite value.
+            // NOTE(eddyb) for `NanEncoding::AllOnes`, the unique `NAN` takes up
             // the largest significand of `MAX_EXP` (which also has normals), though
             // comparing significands needs to ignore the integer bit `NAN` lacks.
-            if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly
+            if S::NAN_ENCODING == NanEncoding::AllOnes
                 && self.exp == Self::NAN.exp
                 && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig
             {
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,6 +1,6 @@
 //! Port of LLVM's APFloat software floating-point implementation from the
 //! following C++ sources (please update commit hash when backporting):
-//! https://github.com/llvm/llvm-project/commit/038f7debfda01471ce0d4eb1fed20da61e5c8b32
+//! https://github.com/llvm/llvm-project/commit/6109e70c72fc5171d25c4467fc3cfe6eb2029f50
 //! * `llvm/include/llvm/ADT/APFloat.h` -> `Float` and `FloatConvert` traits
 //! * `llvm/lib/Support/APFloat.cpp` -> `ieee` and `ppc` modules
 //! * `llvm/unittests/ADT/APFloatTest.cpp` -> `tests` directory
diff --git a/tests/ieee.rs b/tests/ieee.rs