@@ -849,12 +849,14 @@ fn cast_float_to_int(bcx: &Builder,
849
849
x : ValueRef ,
850
850
float_ty : Type ,
851
851
int_ty : Type ) -> ValueRef {
852
+ let fptosui_result = if signed {
853
+ bcx. fptosi ( x, int_ty)
854
+ } else {
855
+ bcx. fptoui ( x, int_ty)
856
+ } ;
857
+
852
858
if !bcx. sess ( ) . opts . debugging_opts . saturating_float_casts {
853
- if signed {
854
- return bcx. fptosi ( x, int_ty) ;
855
- } else {
856
- return bcx. fptoui ( x, int_ty) ;
857
- }
859
+ return fptosui_result;
858
860
}
859
861
// LLVM's fpto[su]i returns undef when the input x is infinite, NaN, or does not fit into the
860
862
// destination integer type after rounding towards zero. This `undef` value can cause UB in
@@ -875,10 +877,9 @@ fn cast_float_to_int(bcx: &Builder,
875
877
// On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because
876
878
// we're rounding towards zero, we just get float_ty::MAX (which is always an integer).
877
879
// This already happens today with u128::MAX = 2^128 - 1 > f32::MAX.
878
- fn compute_clamp_bounds < F : Float > ( signed : bool , int_ty : Type ) -> ( u128 , u128 , Status ) {
880
+ fn compute_clamp_bounds < F : Float > ( signed : bool , int_ty : Type ) -> ( u128 , u128 ) {
879
881
let f_min = if signed {
880
- let int_min = i128:: MIN >> ( 128 - int_ty. int_width ( ) ) ;
881
- let rounded_min = F :: from_i128_r ( int_min, Round :: TowardZero ) ;
882
+ let rounded_min = F :: from_i128_r ( int_min ( signed, int_ty) , Round :: TowardZero ) ;
882
883
assert_eq ! ( rounded_min. status, Status :: OK ) ;
883
884
rounded_min. value
884
885
} else {
@@ -888,7 +889,7 @@ fn cast_float_to_int(bcx: &Builder,
888
889
let rounded_max = F :: from_u128_r ( int_max ( signed, int_ty) , Round :: TowardZero ) ;
889
890
assert ! ( rounded_max. value. is_finite( ) ) ;
890
891
891
- ( f_min. to_bits ( ) , rounded_max. value . to_bits ( ) , rounded_max . status )
892
+ ( f_min. to_bits ( ) , rounded_max. value . to_bits ( ) )
892
893
}
893
894
fn int_max ( signed : bool , int_ty : Type ) -> u128 {
894
895
let shift_amount = 128 - int_ty. int_width ( ) ;
@@ -898,7 +899,14 @@ fn cast_float_to_int(bcx: &Builder,
898
899
u128:: MAX >> shift_amount
899
900
}
900
901
}
901
- let ( f_min, f_max, f_max_status) = match float_ty. float_width ( ) {
902
+ fn int_min ( signed : bool , int_ty : Type ) -> i128 {
903
+ if signed {
904
+ i128:: MIN >> ( 128 - int_ty. int_width ( ) )
905
+ } else {
906
+ 0
907
+ }
908
+ }
909
+ let ( f_min, f_max) = match float_ty. float_width ( ) {
902
910
32 => compute_clamp_bounds :: < ieee:: Single > ( signed, int_ty) ,
903
911
64 => compute_clamp_bounds :: < ieee:: Double > ( signed, int_ty) ,
904
912
n => bug ! ( "unsupported float width {}" , n) ,
@@ -913,76 +921,60 @@ fn cast_float_to_int(bcx: &Builder,
913
921
} ;
914
922
let f_min = float_bits_to_llval ( f_min) ;
915
923
let f_max = float_bits_to_llval ( f_max) ;
916
- // To implement saturation, we perform the following steps (not all steps are necessary for
917
- // all combinations of int_ty and float_ty, but we'll deal with that below):
924
+ // To implement saturation, we perform the following steps:
918
925
//
919
- // 1. Clamp x into the range [f_min, f_max] in such a way that NaN becomes f_min.
920
- // 2. If x is NaN, replace the result of the clamping with 0.0, otherwise
921
- // keep the clamping result.
922
- // 3. Now cast the result of step 2 with fpto[su]i.
923
- // 4. If x > f_max, return int_ty::MAX, otherwise return the result of step 3.
926
+ // 1. Cast x to an integer with fpto[su]i. This may result in undef.
927
+ // 2. Compare x to f_min and f_max, and use the comparison results to select:
928
+ // a) int_ty::MIN if x < f_min or x is NaN
929
+ // b) int_ty::MAX if x > f_max
930
+ // c) the result of fpto[su]i otherwise
931
+ // 3. If x is NaN, return 0.0, otherwise return the result of step 2.
924
932
//
925
- // This avoids undef because values in range [f_min, f_max] by definition fit into the
926
- // destination type. More importantly, it correctly implements saturating conversion.
933
+ // This avoids resulting undef because values in range [f_min, f_max] by definition fit into the
934
+ // destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of
935
+ // undef does not introduce any non-determinism either.
936
+ // More importantly, the above procedure correctly implements saturating conversion.
927
937
// Proof (sketch):
928
- // If x is NaN, step 2 yields 0.0, which is converted to 0 in step 3, and NaN > f_max does
929
- // not hold in step 4, therefore 0 is returned, as desired.
938
+ // If x is NaN, 0 is trivially returned.
930
939
// Otherwise, x is finite or infinite and thus can be compared with f_min and f_max.
931
940
// This yields three cases to consider:
932
- // (1) if x in [f_min, f_max], steps 1, 2, and 4 do nothing and the result of fpto[su]i
933
- // is returned, which agrees with saturating conversion for inputs in that range.
934
- // (2) if x > f_max, then x is larger than int_ty::MAX and step 4 correctly returns
935
- // int_ty::MAX. This holds even if f_max is rounded (i.e., if f_max < int_ty::MAX)
936
- // because in those cases, nextUp(f_max) is already larger than int_ty::MAX.
937
- // (3) if x < f_min, then x is smaller than int_ty::MIN and is clamped to f_min. As shown
938
- // earlier, f_min exactly equals int_ty::MIN and therefore no fixup analogous to step 4
939
- // is needed. Instead, step 3 casts f_min to int_ty::MIN and step 4 returns this cast
940
- // result, as desired.
941
+ // (1) if x in [f_min, f_max], the result of fpto[su]i is returned, which agrees with
942
+ // saturating conversion for inputs in that range.
943
+ // (2) if x > f_max, then x is larger than int_ty::MAX. This holds even if f_max is rounded
944
+ // (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger
945
+ // than int_ty::MAX. Because x is larger than int_ty::MAX, the return value is correct.
946
+ // (3) if x < f_min, then x is smaller than int_ty::MIN. As shown earlier, f_min exactly equals
947
+ // int_ty::MIN and therefore the return value of int_ty::MIN is immediately correct.
941
948
// QED.
942
949
943
- // Step 1: Clamping. Computed as:
944
- // clamped_to_min = if f_min < x { x } else { f_min };
945
- // clamped_x = if f_max < clamped_to_min { f_max } else { clamped_to_min };
946
- // Note that for x = NaN, both of the above variables become f_min.
947
- let clamped_to_min = bcx. select ( bcx. fcmp ( llvm:: RealOLT , f_min, x) , x, f_min) ;
948
- let clamped_x = bcx. select (
949
- bcx. fcmp ( llvm:: RealOLT , f_max, clamped_to_min) ,
950
- f_max,
951
- clamped_to_min
952
- ) ;
953
-
954
- // Step 2: NaN replacement.
955
- // For unsigned types, f_min == 0.0 and therefore clamped_x is already zero.
950
+ // Step 1 was already performed above.
951
+
952
+ // Step 2: We use two comparisons and two selects, with s1 being the result:
953
+ // %less = fcmp ult %x, %f_min
954
+ // %greater = fcmp olt %x, %f_max
955
+ // %s0 = select %less, int_ty::MIN, %fptosi_result
956
+ // %s1 = select %greater, int_ty::MAX, %s0
957
+ // Note that %less uses an *unordered* comparison. This comparison is true if the operands are
958
+ // not comparable (i.e., if x is NaN). The unordered comparison ensures that s1 becomes
959
+ // int_ty::MIN if x is NaN.
960
+ // Performance note: It can be lowered to a flipped comparison and a negation (and the negation
961
+ // can be merged into the select), so it not necessarily any more expensive than a ordered
962
+ // ("normal") comparison. Whether these optimizations will be performed is ultimately up to the
963
+ // backend but at least x86 does that.
964
+ let less = bcx. fcmp ( llvm:: RealULT , x, f_min) ;
965
+ let greater = bcx. fcmp ( llvm:: RealOGT , x, f_max) ;
966
+ let int_max = C_big_integral ( int_ty, int_max ( signed, int_ty) as u128 ) ;
967
+ let int_min = C_big_integral ( int_ty, int_min ( signed, int_ty) as u128 ) ;
968
+ let s0 = bcx. select ( less, int_min, fptosui_result) ;
969
+ let s1 = bcx. select ( greater, int_max, s0) ;
970
+
971
+ // Step 3: NaN replacement.
972
+ // For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN.
956
973
// Therefore we only need to execute this step for signed integer types.
957
- let clamped_x = if signed {
958
- let zero = match float_ty. float_width ( ) {
959
- 32 => float_bits_to_llval ( ieee:: Single :: ZERO . to_bits ( ) ) ,
960
- 64 => float_bits_to_llval ( ieee:: Double :: ZERO . to_bits ( ) ) ,
961
- n => bug ! ( "unsupported float width {}" , n) ,
962
- } ;
974
+ if signed {
963
975
// LLVM has no isNaN predicate, so we use (x == x) instead
964
- bcx. select ( bcx. fcmp ( llvm:: RealOEQ , x, x) , clamped_x, zero)
965
- } else {
966
- clamped_x
967
- } ;
968
-
969
- // Step 3: fpto[su]i cast
970
- let cast_result = if signed {
971
- bcx. fptosi ( clamped_x, int_ty)
972
- } else {
973
- bcx. fptoui ( clamped_x, int_ty)
974
- } ;
975
-
976
- // Step 4: f_max fixup.
977
- // Note that x > f_max implies that x was clamped to f_max in step 1, and therefore the
978
- // cast result is the integer equal to f_max. If the conversion from int_ty::MAX to f_max
979
- // was exact, then the result of casting f_max is again int_ty::MAX, so we'd return the same
980
- // value whether or not x > f_max holds. Therefore, we only need to execute this step
981
- // if f_max is inexact.
982
- if f_max_status. contains ( Status :: INEXACT ) {
983
- let int_max = C_big_integral ( int_ty, int_max ( signed, int_ty) ) ;
984
- bcx. select ( bcx. fcmp ( llvm:: RealOGT , x, f_max) , int_max, cast_result)
976
+ bcx. select ( bcx. fcmp ( llvm:: RealOEQ , x, x) , s1, C_big_integral ( int_ty, 0 ) )
985
977
} else {
986
- cast_result
978
+ s1
987
979
}
988
980
}
0 commit comments