Skip to content

Commit 0a843df

Browse files
author
Robin Kruppe
committed
Implement more efficient saturation
1 parent 354a5cb commit 0a843df

File tree

1 file changed

+63
-71
lines changed

1 file changed

+63
-71
lines changed

src/librustc_trans/mir/rvalue.rs

+63-71
Original file line numberDiff line numberDiff line change
@@ -849,12 +849,14 @@ fn cast_float_to_int(bcx: &Builder,
849849
x: ValueRef,
850850
float_ty: Type,
851851
int_ty: Type) -> ValueRef {
852+
let fptosui_result = if signed {
853+
bcx.fptosi(x, int_ty)
854+
} else {
855+
bcx.fptoui(x, int_ty)
856+
};
857+
852858
if !bcx.sess().opts.debugging_opts.saturating_float_casts {
853-
if signed {
854-
return bcx.fptosi(x, int_ty);
855-
} else {
856-
return bcx.fptoui(x, int_ty);
857-
}
859+
return fptosui_result;
858860
}
859861
// LLVM's fpto[su]i returns undef when the input x is infinite, NaN, or does not fit into the
860862
// destination integer type after rounding towards zero. This `undef` value can cause UB in
@@ -875,10 +877,9 @@ fn cast_float_to_int(bcx: &Builder,
875877
// On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because
876878
// we're rounding towards zero, we just get float_ty::MAX (which is always an integer).
877879
// This already happens today with u128::MAX = 2^128 - 1 > f32::MAX.
878-
fn compute_clamp_bounds<F: Float>(signed: bool, int_ty: Type) -> (u128, u128, Status) {
880+
fn compute_clamp_bounds<F: Float>(signed: bool, int_ty: Type) -> (u128, u128) {
879881
let f_min = if signed {
880-
let int_min = i128::MIN >> (128 - int_ty.int_width());
881-
let rounded_min = F::from_i128_r(int_min, Round::TowardZero);
882+
let rounded_min = F::from_i128_r(int_min(signed, int_ty), Round::TowardZero);
882883
assert_eq!(rounded_min.status, Status::OK);
883884
rounded_min.value
884885
} else {
@@ -888,7 +889,7 @@ fn cast_float_to_int(bcx: &Builder,
888889
let rounded_max = F::from_u128_r(int_max(signed, int_ty), Round::TowardZero);
889890
assert!(rounded_max.value.is_finite());
890891

891-
(f_min.to_bits(), rounded_max.value.to_bits(), rounded_max.status)
892+
(f_min.to_bits(), rounded_max.value.to_bits())
892893
}
893894
fn int_max(signed: bool, int_ty: Type) -> u128 {
894895
let shift_amount = 128 - int_ty.int_width();
@@ -898,7 +899,14 @@ fn cast_float_to_int(bcx: &Builder,
898899
u128::MAX >> shift_amount
899900
}
900901
}
901-
let (f_min, f_max, f_max_status) = match float_ty.float_width() {
902+
fn int_min(signed: bool, int_ty: Type) -> i128 {
903+
if signed {
904+
i128::MIN >> (128 - int_ty.int_width())
905+
} else {
906+
0
907+
}
908+
}
909+
let (f_min, f_max) = match float_ty.float_width() {
902910
32 => compute_clamp_bounds::<ieee::Single>(signed, int_ty),
903911
64 => compute_clamp_bounds::<ieee::Double>(signed, int_ty),
904912
n => bug!("unsupported float width {}", n),
@@ -913,76 +921,60 @@ fn cast_float_to_int(bcx: &Builder,
913921
};
914922
let f_min = float_bits_to_llval(f_min);
915923
let f_max = float_bits_to_llval(f_max);
916-
// To implement saturation, we perform the following steps (not all steps are necessary for
917-
// all combinations of int_ty and float_ty, but we'll deal with that below):
924+
// To implement saturation, we perform the following steps:
918925
//
919-
// 1. Clamp x into the range [f_min, f_max] in such a way that NaN becomes f_min.
920-
// 2. If x is NaN, replace the result of the clamping with 0.0, otherwise
921-
// keep the clamping result.
922-
// 3. Now cast the result of step 2 with fpto[su]i.
923-
// 4. If x > f_max, return int_ty::MAX, otherwise return the result of step 3.
926+
// 1. Cast x to an integer with fpto[su]i. This may result in undef.
927+
// 2. Compare x to f_min and f_max, and use the comparison results to select:
928+
// a) int_ty::MIN if x < f_min or x is NaN
929+
// b) int_ty::MAX if x > f_max
930+
// c) the result of fpto[su]i otherwise
931+
// 3. If x is NaN, return 0.0, otherwise return the result of step 2.
924932
//
925-
// This avoids undef because values in range [f_min, f_max] by definition fit into the
926-
// destination type. More importantly, it correctly implements saturating conversion.
933+
// This avoids resulting undef because values in range [f_min, f_max] by definition fit into the
934+
// destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of
935+
// undef does not introduce any non-determinism either.
936+
// More importantly, the above procedure correctly implements saturating conversion.
927937
// Proof (sketch):
928-
// If x is NaN, step 2 yields 0.0, which is converted to 0 in step 3, and NaN > f_max does
929-
// not hold in step 4, therefore 0 is returned, as desired.
938+
// If x is NaN, 0 is trivially returned.
930939
// Otherwise, x is finite or infinite and thus can be compared with f_min and f_max.
931940
// This yields three cases to consider:
932-
// (1) if x in [f_min, f_max], steps 1, 2, and 4 do nothing and the result of fpto[su]i
933-
// is returned, which agrees with saturating conversion for inputs in that range.
934-
// (2) if x > f_max, then x is larger than int_ty::MAX and step 4 correctly returns
935-
// int_ty::MAX. This holds even if f_max is rounded (i.e., if f_max < int_ty::MAX)
936-
// because in those cases, nextUp(f_max) is already larger than int_ty::MAX.
937-
// (3) if x < f_min, then x is smaller than int_ty::MIN and is clamped to f_min. As shown
938-
// earlier, f_min exactly equals int_ty::MIN and therefore no fixup analogous to step 4
939-
// is needed. Instead, step 3 casts f_min to int_ty::MIN and step 4 returns this cast
940-
// result, as desired.
941+
// (1) if x in [f_min, f_max], the result of fpto[su]i is returned, which agrees with
942+
// saturating conversion for inputs in that range.
943+
// (2) if x > f_max, then x is larger than int_ty::MAX. This holds even if f_max is rounded
944+
// (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger
945+
// than int_ty::MAX. Because x is larger than int_ty::MAX, the return value is correct.
946+
// (3) if x < f_min, then x is smaller than int_ty::MIN. As shown earlier, f_min exactly equals
947+
// int_ty::MIN and therefore the return value of int_ty::MIN is immediately correct.
941948
// QED.
942949

943-
// Step 1: Clamping. Computed as:
944-
// clamped_to_min = if f_min < x { x } else { f_min };
945-
// clamped_x = if f_max < clamped_to_min { f_max } else { clamped_to_min };
946-
// Note that for x = NaN, both of the above variables become f_min.
947-
let clamped_to_min = bcx.select(bcx.fcmp(llvm::RealOLT, f_min, x), x, f_min);
948-
let clamped_x = bcx.select(
949-
bcx.fcmp(llvm::RealOLT, f_max, clamped_to_min),
950-
f_max,
951-
clamped_to_min
952-
);
953-
954-
// Step 2: NaN replacement.
955-
// For unsigned types, f_min == 0.0 and therefore clamped_x is already zero.
950+
// Step 1 was already performed above.
951+
952+
// Step 2: We use two comparisons and two selects, with s1 being the result:
953+
// %less = fcmp ult %x, %f_min
954+
// %greater = fcmp olt %x, %f_max
955+
// %s0 = select %less, int_ty::MIN, %fptosi_result
956+
// %s1 = select %greater, int_ty::MAX, %s0
957+
// Note that %less uses an *unordered* comparison. This comparison is true if the operands are
958+
// not comparable (i.e., if x is NaN). The unordered comparison ensures that s1 becomes
959+
// int_ty::MIN if x is NaN.
960+
// Performance note: It can be lowered to a flipped comparison and a negation (and the negation
961+
// can be merged into the select), so it not necessarily any more expensive than a ordered
962+
// ("normal") comparison. Whether these optimizations will be performed is ultimately up to the
963+
// backend but at least x86 does that.
964+
let less = bcx.fcmp(llvm::RealULT, x, f_min);
965+
let greater = bcx.fcmp(llvm::RealOGT, x, f_max);
966+
let int_max = C_big_integral(int_ty, int_max(signed, int_ty) as u128);
967+
let int_min = C_big_integral(int_ty, int_min(signed, int_ty) as u128);
968+
let s0 = bcx.select(less, int_min, fptosui_result);
969+
let s1 = bcx.select(greater, int_max, s0);
970+
971+
// Step 3: NaN replacement.
972+
// For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN.
956973
// Therefore we only need to execute this step for signed integer types.
957-
let clamped_x = if signed {
958-
let zero = match float_ty.float_width() {
959-
32 => float_bits_to_llval(ieee::Single::ZERO.to_bits()),
960-
64 => float_bits_to_llval(ieee::Double::ZERO.to_bits()),
961-
n => bug!("unsupported float width {}", n),
962-
};
974+
if signed {
963975
// LLVM has no isNaN predicate, so we use (x == x) instead
964-
bcx.select(bcx.fcmp(llvm::RealOEQ, x, x), clamped_x, zero)
965-
} else {
966-
clamped_x
967-
};
968-
969-
// Step 3: fpto[su]i cast
970-
let cast_result = if signed {
971-
bcx.fptosi(clamped_x, int_ty)
972-
} else {
973-
bcx.fptoui(clamped_x, int_ty)
974-
};
975-
976-
// Step 4: f_max fixup.
977-
// Note that x > f_max implies that x was clamped to f_max in step 1, and therefore the
978-
// cast result is the integer equal to f_max. If the conversion from int_ty::MAX to f_max
979-
// was exact, then the result of casting f_max is again int_ty::MAX, so we'd return the same
980-
// value whether or not x > f_max holds. Therefore, we only need to execute this step
981-
// if f_max is inexact.
982-
if f_max_status.contains(Status::INEXACT) {
983-
let int_max = C_big_integral(int_ty, int_max(signed, int_ty));
984-
bcx.select(bcx.fcmp(llvm::RealOGT, x, f_max), int_max, cast_result)
976+
bcx.select(bcx.fcmp(llvm::RealOEQ, x, x), s1, C_big_integral(int_ty, 0))
985977
} else {
986-
cast_result
978+
s1
987979
}
988980
}

0 commit comments

Comments
 (0)