@@ -81,13 +81,40 @@ pub enum NonfiniteBehavior {
81
81
/// significand bits are all zero, and NaN otherwise
82
82
IEEE754 ,
83
83
84
- /// Only the Float8E5M2 has this behavior. There is no Inf representation. A
85
- /// value is NaN if the exponent field and the mantissa field are all 1s.
84
+ /// This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
85
+ /// Float8E5M2FNUZ, and Float8E4M3FNUZ). There is no representation for Inf,
86
+ /// and operations that would ordinarily produce Inf produce NaN instead.
87
+ /// The details of the NaN representation(s) in this form are determined by the
88
+ /// `NanEncoding` enum. We treat all NaNs as quiet, as the available
89
+ /// encodings do not distinguish between signalling and quiet NaN.
90
+ NanOnly ,
91
+ }
92
+
93
+ /// How NaN values are represented.
94
+ ///
95
+ /// This is curently only used in combination with `NonfiniteBehavior::NanOnly`,
96
+ /// and using a variant other than IEEE while having IEEE non-finite behavior is
97
+ /// liable to lead to unexpected results.
98
+ #[ derive( Copy , Clone , PartialEq , Eq ) ]
99
+ pub enum NanEncoding {
100
+ /// Represents the standard IEEE behavior where a value is NaN if its
101
+ /// exponent is all 1s and the significand is non-zero.
102
+ IEEE ,
103
+
104
+ /// Represents the behavior in the Float8E4M3 floating point type where NaN is
105
+ /// represented by having the exponent and mantissa set to all 1s.
86
106
/// This behavior matches the FP8 E4M3 type described in
87
107
/// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
88
108
/// as non-signalling, although the paper does not state whether the NaN
89
109
/// values are signalling or not.
90
- NanOnly ,
110
+ AllOnes ,
111
+
112
+ /// Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
113
+ /// where NaN is represented by a sign bit of 1 and all 0s in the exponent
114
+ /// and mantissa (i.e. the negative zero encoding in a IEEE float). Since
115
+ /// there is only one NaN value, it is treated as quiet NaN. This matches the
116
+ /// behavior described in https://arxiv.org/abs/2206.02915 .
117
+ NegativeZero ,
91
118
}
92
119
93
120
// HACK(eddyb) extension method flipping/changing the sign based on `bool`s.
@@ -123,6 +150,9 @@ pub trait Semantics: Sized {
123
150
/// How the nonfinite values Inf and NaN are represented.
124
151
const NONFINITE_BEHAVIOR : NonfiniteBehavior = NonfiniteBehavior :: IEEE754 ;
125
152
153
+ /// How NaN values are represented.
154
+ const NAN_ENCODING : NanEncoding = NanEncoding :: IEEE ;
155
+
126
156
/// The largest E such that 2^E is representable; this matches the
127
157
/// definition of IEEE 754.
128
158
const MAX_EXP : ExpInt = {
@@ -144,9 +174,10 @@ pub trait Semantics: Sized {
144
174
/// The base significand bitpattern of NaNs, i.e. the bits that must always
145
175
/// be set in all NaNs, with other significand bits being either used for
146
176
/// payload bits (if `NAN_PAYLOAD_MASK` covers them) or always unset.
147
- const NAN_SIGNIFICAND_BASE : Limb = match Self :: NONFINITE_BEHAVIOR {
148
- NonfiniteBehavior :: IEEE754 => 0 ,
149
- NonfiniteBehavior :: NanOnly => ( 1 << ( Self :: PRECISION - 1 ) ) - 1 ,
177
+ const NAN_SIGNIFICAND_BASE : Limb = match Self :: NAN_ENCODING {
178
+ NanEncoding :: IEEE => 0 ,
179
+ NanEncoding :: AllOnes => ( 1 << ( Self :: PRECISION - 1 ) ) - 1 ,
180
+ NanEncoding :: NegativeZero => 0 ,
150
181
} ;
151
182
152
183
/// The significand bitmask for the payload of a NaN (if supported),
@@ -303,12 +334,41 @@ ieee_semantics! {
303
334
// layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
304
335
Float8E5M2 = Float8E5M2S ( 8 : 5 ) ,
305
336
337
+ // 8-bit floating point number mostly following IEEE-754 conventions
338
+ // and bit layout S1E5M2 described in https://arxiv.org/abs/2206.02915,
339
+ // with expanded range and with no infinity or signed zero.
340
+ // NaN is represnted as negative zero. (FN -> Finite, UZ -> unsigned zero).
341
+ // This format's exponent bias is 16, instead of the 15 (2 ** (5 - 1) - 1)
342
+ // that IEEE precedent would imply.
343
+ Float8E5M2FNUZ = Float8E5M2FNUZS ( 8 : 5 ) {
344
+ const NONFINITE_BEHAVIOR : NonfiniteBehavior = NonfiniteBehavior :: NanOnly ;
345
+ const NAN_ENCODING : NanEncoding = NanEncoding :: NegativeZero ;
346
+
347
+ const MAX_EXP : ExpInt = 15 ;
348
+ const MIN_EXP : ExpInt = -15 ;
349
+ } ,
350
+
306
351
// 8-bit floating point number mostly following IEEE-754 conventions with
307
352
// bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
308
353
// Unlike IEEE-754 types, there are no infinity values, and NaN is
309
354
// represented with the exponent and mantissa bits set to all 1s.
310
355
Float8E4M3FN = Float8E4M3FNS ( 8 : 4 ) {
311
356
const NONFINITE_BEHAVIOR : NonfiniteBehavior = NonfiniteBehavior :: NanOnly ;
357
+ const NAN_ENCODING : NanEncoding = NanEncoding :: AllOnes ;
358
+ } ,
359
+
360
+ // 8-bit floating point number mostly following IEEE-754 conventions
361
+ // and bit layout S1E4M3 described in https://arxiv.org/abs/2206.02915,
362
+ // with expanded range and with no infinity or signed zero.
363
+ // NaN is represnted as negative zero. (FN -> Finite, UZ -> unsigned zero).
364
+ // This format's exponent bias is 8, instead of the 7 (2 ** (4 - 1) - 1)
365
+ // that IEEE precedent would imply.
366
+ Float8E4M3FNUZ = Float8E4M3FNUZS ( 8 : 4 ) {
367
+ const NONFINITE_BEHAVIOR : NonfiniteBehavior = NonfiniteBehavior :: NanOnly ;
368
+ const NAN_ENCODING : NanEncoding = NanEncoding :: NegativeZero ;
369
+
370
+ const MAX_EXP : ExpInt = 7 ;
371
+ const MIN_EXP : ExpInt = -7 ;
312
372
} ,
313
373
}
314
374
@@ -454,7 +514,10 @@ impl<S: Semantics> PartialOrd for IeeeFloat<S> {
454
514
impl < S : Semantics > Neg for IeeeFloat < S > {
455
515
type Output = Self ;
456
516
fn neg ( mut self ) -> Self {
457
- self . read_only_sign_do_not_mutate = !self . is_negative ( ) ;
517
+ if S :: NAN_ENCODING != NanEncoding :: NegativeZero || ( !self . is_nan ( ) && !self . is_zero ( ) ) {
518
+ // If NaN is encoded as negative zero, avoid converting NaN to zero or vieceversa.
519
+ self . read_only_sign_do_not_mutate = !self . is_negative ( ) ;
520
+ }
458
521
self
459
522
}
460
523
}
@@ -854,16 +917,17 @@ impl<S: Semantics> IeeeFloat<S> {
854
917
None => 0 ,
855
918
} ] ;
856
919
857
- let exp = match S :: NONFINITE_BEHAVIOR {
858
- NonfiniteBehavior :: IEEE754 => S :: MAX_EXP + 1 ,
859
- NonfiniteBehavior :: NanOnly => S :: MAX_EXP ,
920
+ let ( exp, sign) = match S :: NAN_ENCODING {
921
+ NanEncoding :: IEEE => ( S :: MAX_EXP + 1 , false ) ,
922
+ NanEncoding :: AllOnes => ( S :: MAX_EXP , false ) ,
923
+ NanEncoding :: NegativeZero => ( S :: MIN_EXP - 1 , true ) ,
860
924
} ;
861
925
862
926
IeeeFloat {
863
927
sig,
864
928
exp,
865
929
read_only_category_do_not_mutate : Category :: NaN ,
866
- read_only_sign_do_not_mutate : false ,
930
+ read_only_sign_do_not_mutate : sign ,
867
931
marker : PhantomData ,
868
932
}
869
933
}
@@ -928,15 +992,15 @@ impl<S: Semantics> Float for IeeeFloat<S> {
928
992
// significand = 1..1
929
993
IeeeFloat {
930
994
sig : [ ( ( 1 << S :: PRECISION ) - 1 )
931
- & match S :: NONFINITE_BEHAVIOR {
995
+ & match S :: NAN_ENCODING {
932
996
// The largest number by magnitude in our format will be the floating point
933
997
// number with maximum exponent and with significand that is all ones.
934
- NonfiniteBehavior :: IEEE754 => !0 ,
998
+ NanEncoding :: IEEE | NanEncoding :: NegativeZero => !0 ,
935
999
936
1000
// The largest number by magnitude in our format will be the floating point
937
1001
// number with maximum exponent and with significand that is all ones except
938
1002
// the LSB.
939
- NonfiniteBehavior :: NanOnly => !1 ,
1003
+ NanEncoding :: AllOnes => !1 ,
940
1004
} ] ,
941
1005
exp : S :: MAX_EXP ,
942
1006
read_only_category_do_not_mutate : Category :: Normal ,
@@ -2053,10 +2117,12 @@ impl<S: Semantics> IeeeFloat<S> {
2053
2117
}
2054
2118
}
2055
2119
2056
- // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
2120
+ // The all-ones values is an overflow if NaN is all ones. If NaN is
2121
+ // represented by negative zero, then it is a valid finite value.
2122
+ // NOTE(eddyb) for `NanEncoding::AllOnes`, the unique `NAN` takes up
2057
2123
// the largest significand of `MAX_EXP` (which also has normals), though
2058
2124
// comparing significands needs to ignore the integer bit `NAN` lacks.
2059
- if S :: NONFINITE_BEHAVIOR == NonfiniteBehavior :: NanOnly
2125
+ if S :: NAN_ENCODING == NanEncoding :: AllOnes
2060
2126
&& self . exp == Self :: NAN . exp
2061
2127
&& [ self . sig [ 0 ] & S :: NAN_SIGNIFICAND_BASE ] == Self :: NAN . sig
2062
2128
{
@@ -2101,10 +2167,12 @@ impl<S: Semantics> IeeeFloat<S> {
2101
2167
return Status :: INEXACT . and ( self ) ;
2102
2168
}
2103
2169
2104
- // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
2170
+ // The all-ones values is an overflow if NaN is all ones. If NaN is
2171
+ // represented by negative zero, then it is a valid finite value.
2172
+ // NOTE(eddyb) for `NanEncoding::AllOnes`, the unique `NAN` takes up
2105
2173
// the largest significand of `MAX_EXP` (which also has normals), though
2106
2174
// comparing significands needs to ignore the integer bit `NAN` lacks.
2107
- if S :: NONFINITE_BEHAVIOR == NonfiniteBehavior :: NanOnly
2175
+ if S :: NAN_ENCODING == NanEncoding :: AllOnes
2108
2176
&& self . exp == Self :: NAN . exp
2109
2177
&& [ self . sig [ 0 ] & S :: NAN_SIGNIFICAND_BASE ] == Self :: NAN . sig
2110
2178
{
0 commit comments