Skip to content

Commit 38e61cd

Browse files
committed
Add NaN-in-negative-zero formats
Upstream LLVM commit: 6109e70c72fc5171d25c4467fc3cfe6eb2029f50 Adds Float8E5M2FNUZ and Float8E4M3FNUZ formats, where NaN is represented as negative zero
1 parent 2b347e7 commit 38e61cd

File tree

4 files changed

+822
-76
lines changed

4 files changed

+822
-76
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
members = ["fuzz"]
33

44
[workspace.package]
5-
version = "0.2.0+llvm-038f7debfda0"
5+
version = "0.2.0+llvm-6109e70c72fc"
66
edition = "2021"
77
license = "Apache-2.0 WITH LLVM-exception"
88

src/ieee.rs

+86-18
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,40 @@ pub enum NonfiniteBehavior {
8181
/// significand bits are all zero, and NaN otherwise
8282
IEEE754,
8383

84-
/// Only the Float8E5M2 has this behavior. There is no Inf representation. A
85-
/// value is NaN if the exponent field and the mantissa field are all 1s.
84+
/// This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
85+
/// Float8E5M2FNUZ, and Float8E4M3FNUZ). There is no representation for Inf,
86+
/// and operations that would ordinarily produce Inf produce NaN instead.
87+
/// The details of the NaN representation(s) in this form are determined by the
88+
/// `NanEncoding` enum. We treat all NaNs as quiet, as the available
89+
/// encodings do not distinguish between signalling and quiet NaN.
90+
NanOnly,
91+
}
92+
93+
/// How NaN values are represented.
94+
///
95+
/// This is curently only used in combination with `NonfiniteBehavior::NanOnly`,
96+
/// and using a variant other than IEEE while having IEEE non-finite behavior is
97+
/// liable to lead to unexpected results.
98+
#[derive(Copy, Clone, PartialEq, Eq)]
99+
pub enum NanEncoding {
100+
/// Represents the standard IEEE behavior where a value is NaN if its
101+
/// exponent is all 1s and the significand is non-zero.
102+
IEEE,
103+
104+
/// Represents the behavior in the Float8E4M3 floating point type where NaN is
105+
/// represented by having the exponent and mantissa set to all 1s.
86106
/// This behavior matches the FP8 E4M3 type described in
87107
/// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
88108
/// as non-signalling, although the paper does not state whether the NaN
89109
/// values are signalling or not.
90-
NanOnly,
110+
AllOnes,
111+
112+
/// Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
113+
/// where NaN is represented by a sign bit of 1 and all 0s in the exponent
114+
/// and mantissa (i.e. the negative zero encoding in a IEEE float). Since
115+
/// there is only one NaN value, it is treated as quiet NaN. This matches the
116+
/// behavior described in https://arxiv.org/abs/2206.02915 .
117+
NegativeZero,
91118
}
92119

93120
// HACK(eddyb) extension method flipping/changing the sign based on `bool`s.
@@ -123,6 +150,9 @@ pub trait Semantics: Sized {
123150
/// How the nonfinite values Inf and NaN are represented.
124151
const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::IEEE754;
125152

153+
/// How NaN values are represented.
154+
const NAN_ENCODING: NanEncoding = NanEncoding::IEEE;
155+
126156
/// The largest E such that 2^E is representable; this matches the
127157
/// definition of IEEE 754.
128158
const MAX_EXP: ExpInt = {
@@ -144,9 +174,10 @@ pub trait Semantics: Sized {
144174
/// The base significand bitpattern of NaNs, i.e. the bits that must always
145175
/// be set in all NaNs, with other significand bits being either used for
146176
/// payload bits (if `NAN_PAYLOAD_MASK` covers them) or always unset.
147-
const NAN_SIGNIFICAND_BASE: Limb = match Self::NONFINITE_BEHAVIOR {
148-
NonfiniteBehavior::IEEE754 => 0,
149-
NonfiniteBehavior::NanOnly => (1 << (Self::PRECISION - 1)) - 1,
177+
const NAN_SIGNIFICAND_BASE: Limb = match Self::NAN_ENCODING {
178+
NanEncoding::IEEE => 0,
179+
NanEncoding::AllOnes => (1 << (Self::PRECISION - 1)) - 1,
180+
NanEncoding::NegativeZero => 0,
150181
};
151182

152183
/// The significand bitmask for the payload of a NaN (if supported),
@@ -303,12 +334,41 @@ ieee_semantics! {
303334
// layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
304335
Float8E5M2 = Float8E5M2S(8:5),
305336

337+
// 8-bit floating point number mostly following IEEE-754 conventions
338+
// and bit layout S1E5M2 described in https://arxiv.org/abs/2206.02915,
339+
// with expanded range and with no infinity or signed zero.
340+
// NaN is represnted as negative zero. (FN -> Finite, UZ -> unsigned zero).
341+
// This format's exponent bias is 16, instead of the 15 (2 ** (5 - 1) - 1)
342+
// that IEEE precedent would imply.
343+
Float8E5M2FNUZ = Float8E5M2FNUZS(8:5) {
344+
const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly;
345+
const NAN_ENCODING: NanEncoding = NanEncoding::NegativeZero;
346+
347+
const MAX_EXP: ExpInt = 15;
348+
const MIN_EXP: ExpInt = -15;
349+
},
350+
306351
// 8-bit floating point number mostly following IEEE-754 conventions with
307352
// bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
308353
// Unlike IEEE-754 types, there are no infinity values, and NaN is
309354
// represented with the exponent and mantissa bits set to all 1s.
310355
Float8E4M3FN = Float8E4M3FNS(8:4) {
311356
const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly;
357+
const NAN_ENCODING: NanEncoding = NanEncoding::AllOnes;
358+
},
359+
360+
// 8-bit floating point number mostly following IEEE-754 conventions
361+
// and bit layout S1E4M3 described in https://arxiv.org/abs/2206.02915,
362+
// with expanded range and with no infinity or signed zero.
363+
// NaN is represnted as negative zero. (FN -> Finite, UZ -> unsigned zero).
364+
// This format's exponent bias is 8, instead of the 7 (2 ** (4 - 1) - 1)
365+
// that IEEE precedent would imply.
366+
Float8E4M3FNUZ = Float8E4M3FNUZS(8:4) {
367+
const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly;
368+
const NAN_ENCODING: NanEncoding = NanEncoding::NegativeZero;
369+
370+
const MAX_EXP: ExpInt = 7;
371+
const MIN_EXP: ExpInt = -7;
312372
},
313373
}
314374

@@ -454,7 +514,10 @@ impl<S: Semantics> PartialOrd for IeeeFloat<S> {
454514
impl<S: Semantics> Neg for IeeeFloat<S> {
455515
type Output = Self;
456516
fn neg(mut self) -> Self {
457-
self.read_only_sign_do_not_mutate = !self.is_negative();
517+
if S::NAN_ENCODING != NanEncoding::NegativeZero || (!self.is_nan() && !self.is_zero()) {
518+
// If NaN is encoded as negative zero, avoid converting NaN to zero or vieceversa.
519+
self.read_only_sign_do_not_mutate = !self.is_negative();
520+
}
458521
self
459522
}
460523
}
@@ -854,16 +917,17 @@ impl<S: Semantics> IeeeFloat<S> {
854917
None => 0,
855918
}];
856919

857-
let exp = match S::NONFINITE_BEHAVIOR {
858-
NonfiniteBehavior::IEEE754 => S::MAX_EXP + 1,
859-
NonfiniteBehavior::NanOnly => S::MAX_EXP,
920+
let (exp, sign) = match S::NAN_ENCODING {
921+
NanEncoding::IEEE => (S::MAX_EXP + 1, false),
922+
NanEncoding::AllOnes => (S::MAX_EXP, false),
923+
NanEncoding::NegativeZero => (S::MIN_EXP - 1, true),
860924
};
861925

862926
IeeeFloat {
863927
sig,
864928
exp,
865929
read_only_category_do_not_mutate: Category::NaN,
866-
read_only_sign_do_not_mutate: false,
930+
read_only_sign_do_not_mutate: sign,
867931
marker: PhantomData,
868932
}
869933
}
@@ -928,15 +992,15 @@ impl<S: Semantics> Float for IeeeFloat<S> {
928992
// significand = 1..1
929993
IeeeFloat {
930994
sig: [((1 << S::PRECISION) - 1)
931-
& match S::NONFINITE_BEHAVIOR {
995+
& match S::NAN_ENCODING {
932996
// The largest number by magnitude in our format will be the floating point
933997
// number with maximum exponent and with significand that is all ones.
934-
NonfiniteBehavior::IEEE754 => !0,
998+
NanEncoding::IEEE | NanEncoding::NegativeZero => !0,
935999

9361000
// The largest number by magnitude in our format will be the floating point
9371001
// number with maximum exponent and with significand that is all ones except
9381002
// the LSB.
939-
NonfiniteBehavior::NanOnly => !1,
1003+
NanEncoding::AllOnes => !1,
9401004
}],
9411005
exp: S::MAX_EXP,
9421006
read_only_category_do_not_mutate: Category::Normal,
@@ -2053,10 +2117,12 @@ impl<S: Semantics> IeeeFloat<S> {
20532117
}
20542118
}
20552119

2056-
// NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
2120+
// The all-ones values is an overflow if NaN is all ones. If NaN is
2121+
// represented by negative zero, then it is a valid finite value.
2122+
// NOTE(eddyb) for `NanEncoding::AllOnes`, the unique `NAN` takes up
20572123
// the largest significand of `MAX_EXP` (which also has normals), though
20582124
// comparing significands needs to ignore the integer bit `NAN` lacks.
2059-
if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly
2125+
if S::NAN_ENCODING == NanEncoding::AllOnes
20602126
&& self.exp == Self::NAN.exp
20612127
&& [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig
20622128
{
@@ -2101,10 +2167,12 @@ impl<S: Semantics> IeeeFloat<S> {
21012167
return Status::INEXACT.and(self);
21022168
}
21032169

2104-
// NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
2170+
// The all-ones values is an overflow if NaN is all ones. If NaN is
2171+
// represented by negative zero, then it is a valid finite value.
2172+
// NOTE(eddyb) for `NanEncoding::AllOnes`, the unique `NAN` takes up
21052173
// the largest significand of `MAX_EXP` (which also has normals), though
21062174
// comparing significands needs to ignore the integer bit `NAN` lacks.
2107-
if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly
2175+
if S::NAN_ENCODING == NanEncoding::AllOnes
21082176
&& self.exp == Self::NAN.exp
21092177
&& [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig
21102178
{

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Port of LLVM's APFloat software floating-point implementation from the
22
//! following C++ sources (please update commit hash when backporting):
3-
//! https://github.com/llvm/llvm-project/commit/038f7debfda01471ce0d4eb1fed20da61e5c8b32
3+
//! https://github.com/llvm/llvm-project/commit/6109e70c72fc5171d25c4467fc3cfe6eb2029f50
44
//! * `llvm/include/llvm/ADT/APFloat.h` -> `Float` and `FloatConvert` traits
55
//! * `llvm/lib/Support/APFloat.cpp` -> `ieee` and `ppc` modules
66
//! * `llvm/unittests/ADT/APFloatTest.cpp` -> `tests` directory

0 commit comments

Comments
 (0)