|
1 | 1 | use super::abi::AbiBuilderMethods;
|
2 | 2 | use super::asm::AsmBuilderMethods;
|
| 3 | +use super::consts::ConstMethods; |
3 | 4 | use super::coverageinfo::CoverageInfoBuilderMethods;
|
4 | 5 | use super::debuginfo::DebugInfoBuilderMethods;
|
5 | 6 | use super::intrinsic::IntrinsicCallMethods;
|
6 |
| -use super::type_::ArgAbiMethods; |
| 7 | +use super::misc::MiscMethods; |
| 8 | +use super::type_::{ArgAbiMethods, BaseTypeMethods}; |
7 | 9 | use super::{HasCodegen, StaticBuilderMethods};
|
8 | 10 |
|
9 | 11 | use crate::common::{
|
10 |
| - AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, |
| 12 | + AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, TypeKind, |
11 | 13 | };
|
12 | 14 | use crate::mir::operand::OperandRef;
|
13 | 15 | use crate::mir::place::PlaceRef;
|
14 | 16 | use crate::MemFlags;
|
15 | 17 |
|
| 18 | +use rustc_apfloat::{ieee, Float, Round, Status}; |
16 | 19 | use rustc_middle::ty::layout::{HasParamEnv, TyAndLayout};
|
17 | 20 | use rustc_middle::ty::Ty;
|
18 | 21 | use rustc_span::Span;
|
@@ -202,6 +205,179 @@ pub trait BuilderMethods<'a, 'tcx>:
|
202 | 205 | fn intcast(&mut self, val: Self::Value, dest_ty: Self::Type, is_signed: bool) -> Self::Value;
|
203 | 206 | fn pointercast(&mut self, val: Self::Value, dest_ty: Self::Type) -> Self::Value;
|
204 | 207 |
|
| 208 | + fn cast_float_to_int( |
| 209 | + &mut self, |
| 210 | + signed: bool, |
| 211 | + x: Self::Value, |
| 212 | + dest_ty: Self::Type, |
| 213 | + ) -> Self::Value { |
| 214 | + let in_ty = self.cx().val_ty(x); |
| 215 | + let (float_ty, int_ty) = if self.cx().type_kind(dest_ty) == TypeKind::Vector |
| 216 | + && self.cx().type_kind(in_ty) == TypeKind::Vector |
| 217 | + { |
| 218 | + (self.cx().element_type(in_ty), self.cx().element_type(dest_ty)) |
| 219 | + } else { |
| 220 | + (in_ty, dest_ty) |
| 221 | + }; |
| 222 | + assert!(matches!(self.cx().type_kind(float_ty), TypeKind::Float | TypeKind::Double)); |
| 223 | + assert_eq!(self.cx().type_kind(int_ty), TypeKind::Integer); |
| 224 | + |
| 225 | + if let Some(false) = self.cx().sess().opts.debugging_opts.saturating_float_casts { |
| 226 | + return if signed { self.fptosi(x, dest_ty) } else { self.fptoui(x, dest_ty) }; |
| 227 | + } |
| 228 | + |
| 229 | + let try_sat_result = |
| 230 | + if signed { self.fptosi_sat(x, dest_ty) } else { self.fptoui_sat(x, dest_ty) }; |
| 231 | + if let Some(try_sat_result) = try_sat_result { |
| 232 | + return try_sat_result; |
| 233 | + } |
| 234 | + |
| 235 | + let int_width = self.cx().int_width(int_ty); |
| 236 | + let float_width = self.cx().float_width(float_ty); |
| 237 | + // LLVM's fpto[su]i returns undef when the input x is infinite, NaN, or does not fit into the |
| 238 | + // destination integer type after rounding towards zero. This `undef` value can cause UB in |
| 239 | + // safe code (see issue #10184), so we implement a saturating conversion on top of it: |
| 240 | + // Semantically, the mathematical value of the input is rounded towards zero to the next |
| 241 | + // mathematical integer, and then the result is clamped into the range of the destination |
| 242 | + // integer type. Positive and negative infinity are mapped to the maximum and minimum value of |
| 243 | + // the destination integer type. NaN is mapped to 0. |
| 244 | + // |
| 245 | + // Define f_min and f_max as the largest and smallest (finite) floats that are exactly equal to |
| 246 | + // a value representable in int_ty. |
| 247 | + // They are exactly equal to int_ty::{MIN,MAX} if float_ty has enough significand bits. |
| 248 | + // Otherwise, int_ty::MAX must be rounded towards zero, as it is one less than a power of two. |
| 249 | + // int_ty::MIN, however, is either zero or a negative power of two and is thus exactly |
| 250 | + // representable. Note that this only works if float_ty's exponent range is sufficiently large. |
| 251 | + // f16 or 256 bit integers would break this property. Right now the smallest float type is f32 |
| 252 | + // with exponents ranging up to 127, which is barely enough for i128::MIN = -2^127. |
| 253 | + // On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because |
| 254 | + // we're rounding towards zero, we just get float_ty::MAX (which is always an integer). |
| 255 | + // This already happens today with u128::MAX = 2^128 - 1 > f32::MAX. |
| 256 | + let int_max = |signed: bool, int_width: u64| -> u128 { |
| 257 | + let shift_amount = 128 - int_width; |
| 258 | + if signed { i128::MAX as u128 >> shift_amount } else { u128::MAX >> shift_amount } |
| 259 | + }; |
| 260 | + let int_min = |signed: bool, int_width: u64| -> i128 { |
| 261 | + if signed { i128::MIN >> (128 - int_width) } else { 0 } |
| 262 | + }; |
| 263 | + |
| 264 | + let compute_clamp_bounds_single = |signed: bool, int_width: u64| -> (u128, u128) { |
| 265 | + let rounded_min = |
| 266 | + ieee::Single::from_i128_r(int_min(signed, int_width), Round::TowardZero); |
| 267 | + assert_eq!(rounded_min.status, Status::OK); |
| 268 | + let rounded_max = |
| 269 | + ieee::Single::from_u128_r(int_max(signed, int_width), Round::TowardZero); |
| 270 | + assert!(rounded_max.value.is_finite()); |
| 271 | + (rounded_min.value.to_bits(), rounded_max.value.to_bits()) |
| 272 | + }; |
| 273 | + let compute_clamp_bounds_double = |signed: bool, int_width: u64| -> (u128, u128) { |
| 274 | + let rounded_min = |
| 275 | + ieee::Double::from_i128_r(int_min(signed, int_width), Round::TowardZero); |
| 276 | + assert_eq!(rounded_min.status, Status::OK); |
| 277 | + let rounded_max = |
| 278 | + ieee::Double::from_u128_r(int_max(signed, int_width), Round::TowardZero); |
| 279 | + assert!(rounded_max.value.is_finite()); |
| 280 | + (rounded_min.value.to_bits(), rounded_max.value.to_bits()) |
| 281 | + }; |
| 282 | + // To implement saturation, we perform the following steps: |
| 283 | + // |
| 284 | + // 1. Cast x to an integer with fpto[su]i. This may result in undef. |
| 285 | + // 2. Compare x to f_min and f_max, and use the comparison results to select: |
| 286 | + // a) int_ty::MIN if x < f_min or x is NaN |
| 287 | + // b) int_ty::MAX if x > f_max |
| 288 | + // c) the result of fpto[su]i otherwise |
| 289 | + // 3. If x is NaN, return 0.0, otherwise return the result of step 2. |
| 290 | + // |
| 291 | + // This avoids resulting undef because values in range [f_min, f_max] by definition fit into the |
| 292 | + // destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of |
| 293 | + // undef does not introduce any non-determinism either. |
| 294 | + // More importantly, the above procedure correctly implements saturating conversion. |
| 295 | + // Proof (sketch): |
| 296 | + // If x is NaN, 0 is returned by definition. |
| 297 | + // Otherwise, x is finite or infinite and thus can be compared with f_min and f_max. |
| 298 | + // This yields three cases to consider: |
| 299 | + // (1) if x in [f_min, f_max], the result of fpto[su]i is returned, which agrees with |
| 300 | + // saturating conversion for inputs in that range. |
| 301 | + // (2) if x > f_max, then x is larger than int_ty::MAX. This holds even if f_max is rounded |
| 302 | + // (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger |
| 303 | + // than int_ty::MAX. Because x is larger than int_ty::MAX, the return value of int_ty::MAX |
| 304 | + // is correct. |
| 305 | + // (3) if x < f_min, then x is smaller than int_ty::MIN. As shown earlier, f_min exactly equals |
| 306 | + // int_ty::MIN and therefore the return value of int_ty::MIN is correct. |
| 307 | + // QED. |
| 308 | + |
| 309 | + let float_bits_to_llval = |bx: &mut Self, bits| { |
| 310 | + let bits_llval = match float_width { |
| 311 | + 32 => bx.cx().const_u32(bits as u32), |
| 312 | + 64 => bx.cx().const_u64(bits as u64), |
| 313 | + n => bug!("unsupported float width {}", n), |
| 314 | + }; |
| 315 | + bx.bitcast(bits_llval, float_ty) |
| 316 | + }; |
| 317 | + let (f_min, f_max) = match float_width { |
| 318 | + 32 => compute_clamp_bounds_single(signed, int_width), |
| 319 | + 64 => compute_clamp_bounds_double(signed, int_width), |
| 320 | + n => bug!("unsupported float width {}", n), |
| 321 | + }; |
| 322 | + let f_min = float_bits_to_llval(self, f_min); |
| 323 | + let f_max = float_bits_to_llval(self, f_max); |
| 324 | + let int_max = self.cx().const_uint_big(int_ty, int_max(signed, int_width)); |
| 325 | + let int_min = self.cx().const_uint_big(int_ty, int_min(signed, int_width) as u128); |
| 326 | + let zero = self.cx().const_uint(int_ty, 0); |
| 327 | + |
| 328 | + // If we're working with vectors, constants must be "splatted": the constant is duplicated |
| 329 | + // into each lane of the vector. The algorithm stays the same, we are just using the |
| 330 | + // same constant across all lanes. |
| 331 | + let maybe_splat = |bx: &mut Self, val| { |
| 332 | + if bx.cx().type_kind(dest_ty) == TypeKind::Vector { |
| 333 | + bx.vector_splat(bx.vector_length(dest_ty), val) |
| 334 | + } else { |
| 335 | + val |
| 336 | + } |
| 337 | + }; |
| 338 | + let f_min = maybe_splat(self, f_min); |
| 339 | + let f_max = maybe_splat(self, f_max); |
| 340 | + let int_max = maybe_splat(self, int_max); |
| 341 | + let int_min = maybe_splat(self, int_min); |
| 342 | + let zero = maybe_splat(self, zero); |
| 343 | + |
| 344 | + // Step 1 ... |
| 345 | + let fptosui_result = if signed { self.fptosi(x, dest_ty) } else { self.fptoui(x, dest_ty) }; |
| 346 | + let less_or_nan = self.fcmp(RealPredicate::RealULT, x, f_min); |
| 347 | + let greater = self.fcmp(RealPredicate::RealOGT, x, f_max); |
| 348 | + |
| 349 | + // Step 2: We use two comparisons and two selects, with %s1 being the |
| 350 | + // result: |
| 351 | + // %less_or_nan = fcmp ult %x, %f_min |
| 352 | + // %greater = fcmp olt %x, %f_max |
| 353 | + // %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result |
| 354 | + // %s1 = select %greater, int_ty::MAX, %s0 |
| 355 | + // Note that %less_or_nan uses an *unordered* comparison. This |
| 356 | + // comparison is true if the operands are not comparable (i.e., if x is |
| 357 | + // NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if |
| 358 | + // x is NaN. |
| 359 | + // |
| 360 | + // Performance note: Unordered comparison can be lowered to a "flipped" |
| 361 | + // comparison and a negation, and the negation can be merged into the |
| 362 | + // select. Therefore, it not necessarily any more expensive than an |
| 363 | + // ordered ("normal") comparison. Whether these optimizations will be |
| 364 | + // performed is ultimately up to the backend, but at least x86 does |
| 365 | + // perform them. |
| 366 | + let s0 = self.select(less_or_nan, int_min, fptosui_result); |
| 367 | + let s1 = self.select(greater, int_max, s0); |
| 368 | + |
| 369 | + // Step 3: NaN replacement. |
| 370 | + // For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN. |
| 371 | + // Therefore we only need to execute this step for signed integer types. |
| 372 | + if signed { |
| 373 | + // LLVM has no isNaN predicate, so we use (x == x) instead |
| 374 | + let cmp = self.fcmp(RealPredicate::RealOEQ, x, x); |
| 375 | + self.select(cmp, s1, zero) |
| 376 | + } else { |
| 377 | + s1 |
| 378 | + } |
| 379 | + } |
| 380 | + |
205 | 381 | fn icmp(&mut self, op: IntPredicate, lhs: Self::Value, rhs: Self::Value) -> Self::Value;
|
206 | 382 | fn fcmp(&mut self, op: RealPredicate, lhs: Self::Value, rhs: Self::Value) -> Self::Value;
|
207 | 383 |
|
|
0 commit comments