Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 95e66ff

Browse files
authoredDec 27, 2024
Rollup merge of #133663 - scottmcm:carrying_mul_add, r=Amanieu
Add a compiler intrinsic to back `bigint_helper_methods` cc #85532 This adds a new `carrying_mul_add` intrinsic, to implement `wide_mul` and `carrying_mul`. It has fallback MIR for all types -- including `u128`, which isn't currently supported on nightly -- so that it'll continue to work on all backends, including CTFE. Then it's overridden in `cg_llvm` to use wider intermediate types, including `i256` for `u128::carrying_mul`.
2 parents 6d3db55 + 4669c0d commit 95e66ff

File tree

12 files changed

+501
-135
lines changed

12 files changed

+501
-135
lines changed
 

‎compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,37 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
340340
self.const_i32(cache_type),
341341
])
342342
}
343+
sym::carrying_mul_add => {
344+
let (size, signed) = fn_args.type_at(0).int_size_and_signed(self.tcx);
345+
346+
let wide_llty = self.type_ix(size.bits() * 2);
347+
let args = args.as_array().unwrap();
348+
let [a, b, c, d] = args.map(|a| self.intcast(a.immediate(), wide_llty, signed));
349+
350+
let wide = if signed {
351+
let prod = self.unchecked_smul(a, b);
352+
let acc = self.unchecked_sadd(prod, c);
353+
self.unchecked_sadd(acc, d)
354+
} else {
355+
let prod = self.unchecked_umul(a, b);
356+
let acc = self.unchecked_uadd(prod, c);
357+
self.unchecked_uadd(acc, d)
358+
};
359+
360+
let narrow_llty = self.type_ix(size.bits());
361+
let low = self.trunc(wide, narrow_llty);
362+
let bits_const = self.const_uint(wide_llty, size.bits());
363+
// No need for ashr when signed; LLVM changes it to lshr anyway.
364+
let high = self.lshr(wide, bits_const);
365+
// FIXME: could be `trunc nuw`, even for signed.
366+
let high = self.trunc(high, narrow_llty);
367+
368+
let pair_llty = self.type_struct(&[narrow_llty, narrow_llty], false);
369+
let pair = self.const_poison(pair_llty);
370+
let pair = self.insert_value(pair, low, 0);
371+
let pair = self.insert_value(pair, high, 1);
372+
pair
373+
}
343374
sym::ctlz
344375
| sym::ctlz_nonzero
345376
| sym::cttz

‎compiler/rustc_codegen_llvm/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#![feature(iter_intersperse)]
1818
#![feature(let_chains)]
1919
#![feature(rustdoc_internals)]
20+
#![feature(slice_as_array)]
2021
#![feature(try_blocks)]
2122
#![warn(unreachable_pub)]
2223
// tidy-alphabetical-end

‎compiler/rustc_hir_analysis/src/check/intrinsic.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ pub fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -
9494
| sym::add_with_overflow
9595
| sym::sub_with_overflow
9696
| sym::mul_with_overflow
97+
| sym::carrying_mul_add
9798
| sym::wrapping_add
9899
| sym::wrapping_sub
99100
| sym::wrapping_mul
@@ -436,6 +437,10 @@ pub fn check_intrinsic_type(
436437
(1, 0, vec![param(0), param(0)], Ty::new_tup(tcx, &[param(0), tcx.types.bool]))
437438
}
438439

440+
sym::carrying_mul_add => {
441+
(2, 0, vec![param(0); 4], Ty::new_tup(tcx, &[param(1), param(0)]))
442+
}
443+
439444
sym::ptr_guaranteed_cmp => (
440445
1,
441446
0,

‎compiler/rustc_span/src/symbol.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ symbols! {
555555
call_ref_future,
556556
caller_location,
557557
capture_disjoint_fields,
558+
carrying_mul_add,
558559
catch_unwind,
559560
cause,
560561
cdylib,
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#![unstable(
2+
feature = "core_intrinsics_fallbacks",
3+
reason = "The fallbacks will never be stable, as they exist only to be called \
4+
by the fallback MIR, but they're exported so they can be tested on \
5+
platforms where the fallback MIR isn't actually used",
6+
issue = "none"
7+
)]
8+
#![allow(missing_docs)]
9+
10+
#[const_trait]
11+
pub trait CarryingMulAdd: Copy + 'static {
12+
type Unsigned: Copy + 'static;
13+
fn carrying_mul_add(
14+
self,
15+
multiplicand: Self,
16+
addend: Self,
17+
carry: Self,
18+
) -> (Self::Unsigned, Self);
19+
}
20+
21+
macro_rules! impl_carrying_mul_add_by_widening {
22+
($($t:ident $u:ident $w:ident,)+) => {$(
23+
#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
24+
impl const CarryingMulAdd for $t {
25+
type Unsigned = $u;
26+
#[inline]
27+
fn carrying_mul_add(self, a: Self, b: Self, c: Self) -> ($u, $t) {
28+
let wide = (self as $w) * (a as $w) + (b as $w) + (c as $w);
29+
(wide as _, (wide >> Self::BITS) as _)
30+
}
31+
}
32+
)+};
33+
}
34+
impl_carrying_mul_add_by_widening! {
35+
u8 u8 u16,
36+
u16 u16 u32,
37+
u32 u32 u64,
38+
u64 u64 u128,
39+
usize usize UDoubleSize,
40+
i8 u8 i16,
41+
i16 u16 i32,
42+
i32 u32 i64,
43+
i64 u64 i128,
44+
isize usize UDoubleSize,
45+
}
46+
47+
#[cfg(target_pointer_width = "16")]
48+
type UDoubleSize = u32;
49+
#[cfg(target_pointer_width = "32")]
50+
type UDoubleSize = u64;
51+
#[cfg(target_pointer_width = "64")]
52+
type UDoubleSize = u128;
53+
54+
#[inline]
55+
const fn wide_mul_u128(a: u128, b: u128) -> (u128, u128) {
56+
#[inline]
57+
const fn to_low_high(x: u128) -> [u128; 2] {
58+
const MASK: u128 = u64::MAX as _;
59+
[x & MASK, x >> 64]
60+
}
61+
#[inline]
62+
const fn from_low_high(x: [u128; 2]) -> u128 {
63+
x[0] | (x[1] << 64)
64+
}
65+
#[inline]
66+
const fn scalar_mul(low_high: [u128; 2], k: u128) -> [u128; 3] {
67+
let [x, c] = to_low_high(k * low_high[0]);
68+
let [y, z] = to_low_high(k * low_high[1] + c);
69+
[x, y, z]
70+
}
71+
let a = to_low_high(a);
72+
let b = to_low_high(b);
73+
let low = scalar_mul(a, b[0]);
74+
let high = scalar_mul(a, b[1]);
75+
let r0 = low[0];
76+
let [r1, c] = to_low_high(low[1] + high[0]);
77+
let [r2, c] = to_low_high(low[2] + high[1] + c);
78+
let r3 = high[2] + c;
79+
(from_low_high([r0, r1]), from_low_high([r2, r3]))
80+
}
81+
82+
#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
83+
impl const CarryingMulAdd for u128 {
84+
type Unsigned = u128;
85+
#[inline]
86+
fn carrying_mul_add(self, b: u128, c: u128, d: u128) -> (u128, u128) {
87+
let (low, mut high) = wide_mul_u128(self, b);
88+
let (low, carry) = u128::overflowing_add(low, c);
89+
high += carry as u128;
90+
let (low, carry) = u128::overflowing_add(low, d);
91+
high += carry as u128;
92+
(low, high)
93+
}
94+
}
95+
96+
#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
97+
impl const CarryingMulAdd for i128 {
98+
type Unsigned = u128;
99+
#[inline]
100+
fn carrying_mul_add(self, b: i128, c: i128, d: i128) -> (u128, i128) {
101+
let (low, high) = wide_mul_u128(self as u128, b as u128);
102+
let mut high = high as i128;
103+
high = high.wrapping_add(i128::wrapping_mul(self >> 127, b));
104+
high = high.wrapping_add(i128::wrapping_mul(self, b >> 127));
105+
let (low, carry) = u128::overflowing_add(low, c as u128);
106+
high = high.wrapping_add((carry as i128) + (c >> 127));
107+
let (low, carry) = u128::overflowing_add(low, d as u128);
108+
high = high.wrapping_add((carry as i128) + (d >> 127));
109+
(low, high)
110+
}
111+
}

‎library/core/src/intrinsics/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ use crate::marker::{DiscriminantKind, Tuple};
6868
use crate::mem::SizedTypeProperties;
6969
use crate::{ptr, ub_checks};
7070

71+
pub mod fallback;
7172
pub mod mir;
7273
pub mod simd;
7374

@@ -3305,6 +3306,34 @@ pub const fn mul_with_overflow<T: Copy>(_x: T, _y: T) -> (T, bool) {
33053306
unimplemented!()
33063307
}
33073308

3309+
/// Performs full-width multiplication and addition with a carry:
3310+
/// `multiplier * multiplicand + addend + carry`.
3311+
///
3312+
/// This is possible without any overflow. For `uN`:
3313+
/// MAX * MAX + MAX + MAX
3314+
/// => (2ⁿ-1) × (2ⁿ-1) + (2ⁿ-1) + (2ⁿ-1)
3315+
/// => (2²ⁿ - 2ⁿ⁺¹ + 1) + (2ⁿ⁺¹ - 2)
3316+
/// => 2²ⁿ - 1
3317+
///
3318+
/// For `iN`, the upper bound is MIN * MIN + MAX + MAX => 2²ⁿ⁻² + 2ⁿ - 2,
3319+
/// and the lower bound is MAX * MIN + MIN + MIN => -2²ⁿ⁻² - 2ⁿ + 2ⁿ⁺¹.
3320+
///
3321+
/// This currently supports unsigned integers *only*, no signed ones.
3322+
/// The stabilized versions of this intrinsic are available on integers.
3323+
#[unstable(feature = "core_intrinsics", issue = "none")]
3324+
#[rustc_const_unstable(feature = "const_carrying_mul_add", issue = "85532")]
3325+
#[rustc_nounwind]
3326+
#[cfg_attr(not(bootstrap), rustc_intrinsic)]
3327+
#[cfg_attr(not(bootstrap), miri::intrinsic_fallback_is_spec)]
3328+
pub const fn carrying_mul_add<T: ~const fallback::CarryingMulAdd<Unsigned = U>, U>(
3329+
multiplier: T,
3330+
multiplicand: T,
3331+
addend: T,
3332+
carry: T,
3333+
) -> (U, T) {
3334+
multiplier.carrying_mul_add(multiplicand, addend, carry)
3335+
}
3336+
33083337
/// Performs an exact division, resulting in undefined behavior where
33093338
/// `x % y != 0` or `y == 0` or `x == T::MIN && y == -1`
33103339
///

‎library/core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
#![cfg_attr(bootstrap, feature(do_not_recommend))]
111111
#![feature(array_ptr_get)]
112112
#![feature(asm_experimental_arch)]
113+
#![feature(const_carrying_mul_add)]
113114
#![feature(const_eval_select)]
114115
#![feature(const_typed_swap)]
115116
#![feature(core_intrinsics)]

‎library/core/src/num/mod.rs

Lines changed: 0 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -228,134 +228,6 @@ macro_rules! midpoint_impl {
228228
};
229229
}
230230

231-
macro_rules! widening_impl {
232-
($SelfT:ty, $WideT:ty, $BITS:literal, unsigned) => {
233-
/// Calculates the complete product `self * rhs` without the possibility to overflow.
234-
///
235-
/// This returns the low-order (wrapping) bits and the high-order (overflow) bits
236-
/// of the result as two separate values, in that order.
237-
///
238-
/// If you also need to add a carry to the wide result, then you want
239-
/// [`Self::carrying_mul`] instead.
240-
///
241-
/// # Examples
242-
///
243-
/// Basic usage:
244-
///
245-
/// Please note that this example is shared between integer types.
246-
/// Which explains why `u32` is used here.
247-
///
248-
/// ```
249-
/// #![feature(bigint_helper_methods)]
250-
/// assert_eq!(5u32.widening_mul(2), (10, 0));
251-
/// assert_eq!(1_000_000_000u32.widening_mul(10), (1410065408, 2));
252-
/// ```
253-
#[unstable(feature = "bigint_helper_methods", issue = "85532")]
254-
#[must_use = "this returns the result of the operation, \
255-
without modifying the original"]
256-
#[inline]
257-
pub const fn widening_mul(self, rhs: Self) -> (Self, Self) {
258-
// note: longer-term this should be done via an intrinsic,
259-
// but for now we can deal without an impl for u128/i128
260-
// SAFETY: overflow will be contained within the wider types
261-
let wide = unsafe { (self as $WideT).unchecked_mul(rhs as $WideT) };
262-
(wide as $SelfT, (wide >> $BITS) as $SelfT)
263-
}
264-
265-
/// Calculates the "full multiplication" `self * rhs + carry`
266-
/// without the possibility to overflow.
267-
///
268-
/// This returns the low-order (wrapping) bits and the high-order (overflow) bits
269-
/// of the result as two separate values, in that order.
270-
///
271-
/// Performs "long multiplication" which takes in an extra amount to add, and may return an
272-
/// additional amount of overflow. This allows for chaining together multiple
273-
/// multiplications to create "big integers" which represent larger values.
274-
///
275-
/// If you don't need the `carry`, then you can use [`Self::widening_mul`] instead.
276-
///
277-
/// # Examples
278-
///
279-
/// Basic usage:
280-
///
281-
/// Please note that this example is shared between integer types.
282-
/// Which explains why `u32` is used here.
283-
///
284-
/// ```
285-
/// #![feature(bigint_helper_methods)]
286-
/// assert_eq!(5u32.carrying_mul(2, 0), (10, 0));
287-
/// assert_eq!(5u32.carrying_mul(2, 10), (20, 0));
288-
/// assert_eq!(1_000_000_000u32.carrying_mul(10, 0), (1410065408, 2));
289-
/// assert_eq!(1_000_000_000u32.carrying_mul(10, 10), (1410065418, 2));
290-
#[doc = concat!("assert_eq!(",
291-
stringify!($SelfT), "::MAX.carrying_mul(", stringify!($SelfT), "::MAX, ", stringify!($SelfT), "::MAX), ",
292-
"(0, ", stringify!($SelfT), "::MAX));"
293-
)]
294-
/// ```
295-
///
296-
/// This is the core operation needed for scalar multiplication when
297-
/// implementing it for wider-than-native types.
298-
///
299-
/// ```
300-
/// #![feature(bigint_helper_methods)]
301-
/// fn scalar_mul_eq(little_endian_digits: &mut Vec<u16>, multiplicand: u16) {
302-
/// let mut carry = 0;
303-
/// for d in little_endian_digits.iter_mut() {
304-
/// (*d, carry) = d.carrying_mul(multiplicand, carry);
305-
/// }
306-
/// if carry != 0 {
307-
/// little_endian_digits.push(carry);
308-
/// }
309-
/// }
310-
///
311-
/// let mut v = vec![10, 20];
312-
/// scalar_mul_eq(&mut v, 3);
313-
/// assert_eq!(v, [30, 60]);
314-
///
315-
/// assert_eq!(0x87654321_u64 * 0xFEED, 0x86D3D159E38D);
316-
/// let mut v = vec![0x4321, 0x8765];
317-
/// scalar_mul_eq(&mut v, 0xFEED);
318-
/// assert_eq!(v, [0xE38D, 0xD159, 0x86D3]);
319-
/// ```
320-
///
321-
/// If `carry` is zero, this is similar to [`overflowing_mul`](Self::overflowing_mul),
322-
/// except that it gives the value of the overflow instead of just whether one happened:
323-
///
324-
/// ```
325-
/// #![feature(bigint_helper_methods)]
326-
/// let r = u8::carrying_mul(7, 13, 0);
327-
/// assert_eq!((r.0, r.1 != 0), u8::overflowing_mul(7, 13));
328-
/// let r = u8::carrying_mul(13, 42, 0);
329-
/// assert_eq!((r.0, r.1 != 0), u8::overflowing_mul(13, 42));
330-
/// ```
331-
///
332-
/// The value of the first field in the returned tuple matches what you'd get
333-
/// by combining the [`wrapping_mul`](Self::wrapping_mul) and
334-
/// [`wrapping_add`](Self::wrapping_add) methods:
335-
///
336-
/// ```
337-
/// #![feature(bigint_helper_methods)]
338-
/// assert_eq!(
339-
/// 789_u16.carrying_mul(456, 123).0,
340-
/// 789_u16.wrapping_mul(456).wrapping_add(123),
341-
/// );
342-
/// ```
343-
#[unstable(feature = "bigint_helper_methods", issue = "85532")]
344-
#[must_use = "this returns the result of the operation, \
345-
without modifying the original"]
346-
#[inline]
347-
pub const fn carrying_mul(self, rhs: Self, carry: Self) -> (Self, Self) {
348-
// note: longer-term this should be done via an intrinsic,
349-
// but for now we can deal without an impl for u128/i128
350-
// SAFETY: overflow will be contained within the wider types
351-
let wide = unsafe {
352-
(self as $WideT).unchecked_mul(rhs as $WideT).unchecked_add(carry as $WideT)
353-
};
354-
(wide as $SelfT, (wide >> $BITS) as $SelfT)
355-
}
356-
};
357-
}
358-
359231
impl i8 {
360232
int_impl! {
361233
Self = i8,
@@ -576,7 +448,6 @@ impl u8 {
576448
from_xe_bytes_doc = u8_xe_bytes_doc!(),
577449
bound_condition = "",
578450
}
579-
widening_impl! { u8, u16, 8, unsigned }
580451
midpoint_impl! { u8, u16, unsigned }
581452

582453
/// Checks if the value is within the ASCII range.
@@ -1192,7 +1063,6 @@ impl u16 {
11921063
from_xe_bytes_doc = "",
11931064
bound_condition = "",
11941065
}
1195-
widening_impl! { u16, u32, 16, unsigned }
11961066
midpoint_impl! { u16, u32, unsigned }
11971067

11981068
/// Checks if the value is a Unicode surrogate code point, which are disallowed values for [`char`].
@@ -1240,7 +1110,6 @@ impl u32 {
12401110
from_xe_bytes_doc = "",
12411111
bound_condition = "",
12421112
}
1243-
widening_impl! { u32, u64, 32, unsigned }
12441113
midpoint_impl! { u32, u64, unsigned }
12451114
}
12461115

@@ -1264,7 +1133,6 @@ impl u64 {
12641133
from_xe_bytes_doc = "",
12651134
bound_condition = "",
12661135
}
1267-
widening_impl! { u64, u128, 64, unsigned }
12681136
midpoint_impl! { u64, u128, unsigned }
12691137
}
12701138

@@ -1314,7 +1182,6 @@ impl usize {
13141182
from_xe_bytes_doc = usize_isize_from_xe_bytes_doc!(),
13151183
bound_condition = " on 16-bit targets",
13161184
}
1317-
widening_impl! { usize, u32, 16, unsigned }
13181185
midpoint_impl! { usize, u32, unsigned }
13191186
}
13201187

@@ -1339,7 +1206,6 @@ impl usize {
13391206
from_xe_bytes_doc = usize_isize_from_xe_bytes_doc!(),
13401207
bound_condition = " on 32-bit targets",
13411208
}
1342-
widening_impl! { usize, u64, 32, unsigned }
13431209
midpoint_impl! { usize, u64, unsigned }
13441210
}
13451211

@@ -1364,7 +1230,6 @@ impl usize {
13641230
from_xe_bytes_doc = usize_isize_from_xe_bytes_doc!(),
13651231
bound_condition = " on 64-bit targets",
13661232
}
1367-
widening_impl! { usize, u128, 64, unsigned }
13681233
midpoint_impl! { usize, u128, unsigned }
13691234
}
13701235

‎library/core/src/num/uint_macros.rs

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3347,6 +3347,122 @@ macro_rules! uint_impl {
33473347
unsafe { mem::transmute(bytes) }
33483348
}
33493349

3350+
/// Calculates the complete product `self * rhs` without the possibility to overflow.
3351+
///
3352+
/// This returns the low-order (wrapping) bits and the high-order (overflow) bits
3353+
/// of the result as two separate values, in that order.
3354+
///
3355+
/// If you also need to add a carry to the wide result, then you want
3356+
/// [`Self::carrying_mul`] instead.
3357+
///
3358+
/// # Examples
3359+
///
3360+
/// Basic usage:
3361+
///
3362+
/// Please note that this example is shared between integer types.
3363+
/// Which explains why `u32` is used here.
3364+
///
3365+
/// ```
3366+
/// #![feature(bigint_helper_methods)]
3367+
/// assert_eq!(5u32.widening_mul(2), (10, 0));
3368+
/// assert_eq!(1_000_000_000u32.widening_mul(10), (1410065408, 2));
3369+
/// ```
3370+
#[unstable(feature = "bigint_helper_methods", issue = "85532")]
3371+
#[rustc_const_unstable(feature = "bigint_helper_methods", issue = "85532")]
3372+
#[must_use = "this returns the result of the operation, \
3373+
without modifying the original"]
3374+
#[inline]
3375+
pub const fn widening_mul(self, rhs: Self) -> (Self, Self) {
3376+
Self::carrying_mul(self, rhs, 0)
3377+
}
3378+
3379+
/// Calculates the "full multiplication" `self * rhs + carry`
3380+
/// without the possibility to overflow.
3381+
///
3382+
/// This returns the low-order (wrapping) bits and the high-order (overflow) bits
3383+
/// of the result as two separate values, in that order.
3384+
///
3385+
/// Performs "long multiplication" which takes in an extra amount to add, and may return an
3386+
/// additional amount of overflow. This allows for chaining together multiple
3387+
/// multiplications to create "big integers" which represent larger values.
3388+
///
3389+
/// If you don't need the `carry`, then you can use [`Self::widening_mul`] instead.
3390+
///
3391+
/// # Examples
3392+
///
3393+
/// Basic usage:
3394+
///
3395+
/// Please note that this example is shared between integer types.
3396+
/// Which explains why `u32` is used here.
3397+
///
3398+
/// ```
3399+
/// #![feature(bigint_helper_methods)]
3400+
/// assert_eq!(5u32.carrying_mul(2, 0), (10, 0));
3401+
/// assert_eq!(5u32.carrying_mul(2, 10), (20, 0));
3402+
/// assert_eq!(1_000_000_000u32.carrying_mul(10, 0), (1410065408, 2));
3403+
/// assert_eq!(1_000_000_000u32.carrying_mul(10, 10), (1410065418, 2));
3404+
#[doc = concat!("assert_eq!(",
3405+
stringify!($SelfT), "::MAX.carrying_mul(", stringify!($SelfT), "::MAX, ", stringify!($SelfT), "::MAX), ",
3406+
"(0, ", stringify!($SelfT), "::MAX));"
3407+
)]
3408+
/// ```
3409+
///
3410+
/// This is the core operation needed for scalar multiplication when
3411+
/// implementing it for wider-than-native types.
3412+
///
3413+
/// ```
3414+
/// #![feature(bigint_helper_methods)]
3415+
/// fn scalar_mul_eq(little_endian_digits: &mut Vec<u16>, multiplicand: u16) {
3416+
/// let mut carry = 0;
3417+
/// for d in little_endian_digits.iter_mut() {
3418+
/// (*d, carry) = d.carrying_mul(multiplicand, carry);
3419+
/// }
3420+
/// if carry != 0 {
3421+
/// little_endian_digits.push(carry);
3422+
/// }
3423+
/// }
3424+
///
3425+
/// let mut v = vec![10, 20];
3426+
/// scalar_mul_eq(&mut v, 3);
3427+
/// assert_eq!(v, [30, 60]);
3428+
///
3429+
/// assert_eq!(0x87654321_u64 * 0xFEED, 0x86D3D159E38D);
3430+
/// let mut v = vec![0x4321, 0x8765];
3431+
/// scalar_mul_eq(&mut v, 0xFEED);
3432+
/// assert_eq!(v, [0xE38D, 0xD159, 0x86D3]);
3433+
/// ```
3434+
///
3435+
/// If `carry` is zero, this is similar to [`overflowing_mul`](Self::overflowing_mul),
3436+
/// except that it gives the value of the overflow instead of just whether one happened:
3437+
///
3438+
/// ```
3439+
/// #![feature(bigint_helper_methods)]
3440+
/// let r = u8::carrying_mul(7, 13, 0);
3441+
/// assert_eq!((r.0, r.1 != 0), u8::overflowing_mul(7, 13));
3442+
/// let r = u8::carrying_mul(13, 42, 0);
3443+
/// assert_eq!((r.0, r.1 != 0), u8::overflowing_mul(13, 42));
3444+
/// ```
3445+
///
3446+
/// The value of the first field in the returned tuple matches what you'd get
3447+
/// by combining the [`wrapping_mul`](Self::wrapping_mul) and
3448+
/// [`wrapping_add`](Self::wrapping_add) methods:
3449+
///
3450+
/// ```
3451+
/// #![feature(bigint_helper_methods)]
3452+
/// assert_eq!(
3453+
/// 789_u16.carrying_mul(456, 123).0,
3454+
/// 789_u16.wrapping_mul(456).wrapping_add(123),
3455+
/// );
3456+
/// ```
3457+
#[unstable(feature = "bigint_helper_methods", issue = "85532")]
3458+
#[rustc_const_unstable(feature = "bigint_helper_methods", issue = "85532")]
3459+
#[must_use = "this returns the result of the operation, \
3460+
without modifying the original"]
3461+
#[inline]
3462+
pub const fn carrying_mul(self, rhs: Self, carry: Self) -> (Self, Self) {
3463+
intrinsics::carrying_mul_add(self, rhs, 0, carry)
3464+
}
3465+
33503466
/// New code should prefer to use
33513467
#[doc = concat!("[`", stringify!($SelfT), "::MIN", "`] instead.")]
33523468
///

‎library/core/tests/intrinsics.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,71 @@ fn test_three_way_compare_in_const_contexts() {
125125
assert_eq!(SIGNED_EQUAL, Equal);
126126
assert_eq!(SIGNED_GREATER, Greater);
127127
}
128+
129+
fn fallback_cma<T: core::intrinsics::fallback::CarryingMulAdd>(
130+
a: T,
131+
b: T,
132+
c: T,
133+
d: T,
134+
) -> (T::Unsigned, T) {
135+
a.carrying_mul_add(b, c, d)
136+
}
137+
138+
#[test]
139+
fn carrying_mul_add_fallback_u32() {
140+
let r = fallback_cma::<u32>(0x9e37_79b9, 0x7f4a_7c15, 0xf39c_c060, 0x5ced_c834);
141+
assert_eq!(r, (0x2087_20c1, 0x4eab_8e1d));
142+
let r = fallback_cma::<u32>(0x1082_276b, 0xf3a2_7251, 0xf86c_6a11, 0xd0c1_8e95);
143+
assert_eq!(r, (0x7aa0_1781, 0x0fb6_0528));
144+
}
145+
146+
#[test]
147+
fn carrying_mul_add_fallback_i32() {
148+
let r = fallback_cma::<i32>(-1, -1, -1, -1);
149+
assert_eq!(r, (u32::MAX, -1));
150+
let r = fallback_cma::<i32>(1, -1, 1, 1);
151+
assert_eq!(r, (1, 0));
152+
}
153+
154+
#[test]
155+
fn carrying_mul_add_fallback_u128() {
156+
assert_eq!(fallback_cma::<u128>(u128::MAX, u128::MAX, 0, 0), (1, u128::MAX - 1));
157+
assert_eq!(fallback_cma::<u128>(1, 1, 1, 1), (3, 0));
158+
assert_eq!(fallback_cma::<u128>(0, 0, u128::MAX, u128::MAX), (u128::MAX - 1, 1));
159+
assert_eq!(
160+
fallback_cma::<u128>(u128::MAX, u128::MAX, u128::MAX, u128::MAX),
161+
(u128::MAX, u128::MAX),
162+
);
163+
164+
let r = fallback_cma::<u128>(
165+
0x243f6a8885a308d313198a2e03707344,
166+
0xa4093822299f31d0082efa98ec4e6c89,
167+
0x452821e638d01377be5466cf34e90c6c,
168+
0xc0ac29b7c97c50dd3f84d5b5b5470917,
169+
);
170+
assert_eq!(r, (0x8050ec20ed554e40338d277e00b674e7, 0x1739ee6cea07da409182d003859b59d8));
171+
let r = fallback_cma::<u128>(
172+
0x9216d5d98979fb1bd1310ba698dfb5ac,
173+
0x2ffd72dbd01adfb7b8e1afed6a267e96,
174+
0xba7c9045f12c7f9924a19947b3916cf7,
175+
0x0801f2e2858efc16636920d871574e69,
176+
);
177+
assert_eq!(r, (0x185525545fdb2fefb502a3a602efd628, 0x1b62d35fe3bff6b566f99667ef7ebfd6));
178+
}
179+
180+
#[test]
181+
fn carrying_mul_add_fallback_i128() {
182+
assert_eq!(fallback_cma::<i128>(-1, -1, 0, 0), (1, 0));
183+
let r = fallback_cma::<i128>(-1, -1, -1, -1);
184+
assert_eq!(r, (u128::MAX, -1));
185+
let r = fallback_cma::<i128>(1, -1, 1, 1);
186+
assert_eq!(r, (1, 0));
187+
assert_eq!(
188+
fallback_cma::<i128>(i128::MAX, i128::MAX, i128::MAX, i128::MAX),
189+
(u128::MAX, i128::MAX / 2),
190+
);
191+
assert_eq!(
192+
fallback_cma::<i128>(i128::MIN, i128::MIN, i128::MAX, i128::MAX),
193+
(u128::MAX - 1, -(i128::MIN / 2)),
194+
);
195+
}

‎library/core/tests/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#![feature(const_swap_nonoverlapping)]
2020
#![feature(const_trait_impl)]
2121
#![feature(core_intrinsics)]
22+
#![feature(core_intrinsics_fallbacks)]
2223
#![feature(core_io_borrowed_buf)]
2324
#![feature(core_private_bignum)]
2425
#![feature(core_private_diy_float)]
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
//@ revisions: RAW OPT
2+
//@ compile-flags: -C opt-level=1
3+
//@[RAW] compile-flags: -C no-prepopulate-passes
4+
//@[OPT] min-llvm-version: 19
5+
6+
#![crate_type = "lib"]
7+
#![feature(core_intrinsics)]
8+
#![feature(core_intrinsics_fallbacks)]
9+
10+
// Note that LLVM seems to sometimes permute the order of arguments to mul and add,
11+
// so these tests don't check the arguments in the optimized revision.
12+
13+
use std::intrinsics::{carrying_mul_add, fallback};
14+
15+
// The fallbacks are emitted even when they're never used, but optimize out.
16+
17+
// RAW: wide_mul_u128
18+
// OPT-NOT: wide_mul_u128
19+
20+
// CHECK-LABEL: @cma_u8
21+
#[no_mangle]
22+
pub unsafe fn cma_u8(a: u8, b: u8, c: u8, d: u8) -> (u8, u8) {
23+
// CHECK: [[A:%.+]] = zext i8 %a to i16
24+
// CHECK: [[B:%.+]] = zext i8 %b to i16
25+
// CHECK: [[C:%.+]] = zext i8 %c to i16
26+
// CHECK: [[D:%.+]] = zext i8 %d to i16
27+
// CHECK: [[AB:%.+]] = mul nuw i16
28+
// RAW-SAME: [[A]], [[B]]
29+
// CHECK: [[ABC:%.+]] = add nuw i16
30+
// RAW-SAME: [[AB]], [[C]]
31+
// CHECK: [[ABCD:%.+]] = add nuw i16
32+
// RAW-SAME: [[ABC]], [[D]]
33+
// CHECK: [[LOW:%.+]] = trunc i16 [[ABCD]] to i8
34+
// CHECK: [[HIGHW:%.+]] = lshr i16 [[ABCD]], 8
35+
// RAW: [[HIGH:%.+]] = trunc i16 [[HIGHW]] to i8
36+
// OPT: [[HIGH:%.+]] = trunc nuw i16 [[HIGHW]] to i8
37+
// CHECK: [[PAIR0:%.+]] = insertvalue { i8, i8 } poison, i8 [[LOW]], 0
38+
// CHECK: [[PAIR1:%.+]] = insertvalue { i8, i8 } [[PAIR0]], i8 [[HIGH]], 1
39+
// OPT: ret { i8, i8 } [[PAIR1]]
40+
carrying_mul_add(a, b, c, d)
41+
}
42+
43+
// CHECK-LABEL: @cma_u32
44+
#[no_mangle]
45+
pub unsafe fn cma_u32(a: u32, b: u32, c: u32, d: u32) -> (u32, u32) {
46+
// CHECK: [[A:%.+]] = zext i32 %a to i64
47+
// CHECK: [[B:%.+]] = zext i32 %b to i64
48+
// CHECK: [[C:%.+]] = zext i32 %c to i64
49+
// CHECK: [[D:%.+]] = zext i32 %d to i64
50+
// CHECK: [[AB:%.+]] = mul nuw i64
51+
// RAW-SAME: [[A]], [[B]]
52+
// CHECK: [[ABC:%.+]] = add nuw i64
53+
// RAW-SAME: [[AB]], [[C]]
54+
// CHECK: [[ABCD:%.+]] = add nuw i64
55+
// RAW-SAME: [[ABC]], [[D]]
56+
// CHECK: [[LOW:%.+]] = trunc i64 [[ABCD]] to i32
57+
// CHECK: [[HIGHW:%.+]] = lshr i64 [[ABCD]], 32
58+
// RAW: [[HIGH:%.+]] = trunc i64 [[HIGHW]] to i32
59+
// OPT: [[HIGH:%.+]] = trunc nuw i64 [[HIGHW]] to i32
60+
// CHECK: [[PAIR0:%.+]] = insertvalue { i32, i32 } poison, i32 [[LOW]], 0
61+
// CHECK: [[PAIR1:%.+]] = insertvalue { i32, i32 } [[PAIR0]], i32 [[HIGH]], 1
62+
// OPT: ret { i32, i32 } [[PAIR1]]
63+
carrying_mul_add(a, b, c, d)
64+
}
65+
66+
// CHECK-LABEL: @cma_u128
67+
// CHECK-SAME: sret{{.+}}dereferenceable(32){{.+}}%_0,{{.+}}%a,{{.+}}%b,{{.+}}%c,{{.+}}%d
68+
#[no_mangle]
69+
pub unsafe fn cma_u128(a: u128, b: u128, c: u128, d: u128) -> (u128, u128) {
70+
// CHECK: [[A:%.+]] = zext i128 %a to i256
71+
// CHECK: [[B:%.+]] = zext i128 %b to i256
72+
// CHECK: [[C:%.+]] = zext i128 %c to i256
73+
// CHECK: [[D:%.+]] = zext i128 %d to i256
74+
// CHECK: [[AB:%.+]] = mul nuw i256
75+
// RAW-SAME: [[A]], [[B]]
76+
// CHECK: [[ABC:%.+]] = add nuw i256
77+
// RAW-SAME: [[AB]], [[C]]
78+
// CHECK: [[ABCD:%.+]] = add nuw i256
79+
// RAW-SAME: [[ABC]], [[D]]
80+
// CHECK: [[LOW:%.+]] = trunc i256 [[ABCD]] to i128
81+
// CHECK: [[HIGHW:%.+]] = lshr i256 [[ABCD]], 128
82+
// RAW: [[HIGH:%.+]] = trunc i256 [[HIGHW]] to i128
83+
// OPT: [[HIGH:%.+]] = trunc nuw i256 [[HIGHW]] to i128
84+
// RAW: [[PAIR0:%.+]] = insertvalue { i128, i128 } poison, i128 [[LOW]], 0
85+
// RAW: [[PAIR1:%.+]] = insertvalue { i128, i128 } [[PAIR0]], i128 [[HIGH]], 1
86+
// OPT: store i128 [[LOW]], ptr %_0
87+
// OPT: [[P1:%.+]] = getelementptr inbounds i8, ptr %_0, {{i32|i64}} 16
88+
// OPT: store i128 [[HIGH]], ptr [[P1]]
89+
// CHECK: ret void
90+
carrying_mul_add(a, b, c, d)
91+
}
92+
93+
// CHECK-LABEL: @cma_i128
94+
// CHECK-SAME: sret{{.+}}dereferenceable(32){{.+}}%_0,{{.+}}%a,{{.+}}%b,{{.+}}%c,{{.+}}%d
95+
#[no_mangle]
96+
pub unsafe fn cma_i128(a: i128, b: i128, c: i128, d: i128) -> (u128, i128) {
97+
// CHECK: [[A:%.+]] = sext i128 %a to i256
98+
// CHECK: [[B:%.+]] = sext i128 %b to i256
99+
// CHECK: [[C:%.+]] = sext i128 %c to i256
100+
// CHECK: [[D:%.+]] = sext i128 %d to i256
101+
// CHECK: [[AB:%.+]] = mul nsw i256
102+
// RAW-SAME: [[A]], [[B]]
103+
// CHECK: [[ABC:%.+]] = add nsw i256
104+
// RAW-SAME: [[AB]], [[C]]
105+
// CHECK: [[ABCD:%.+]] = add nsw i256
106+
// RAW-SAME: [[ABC]], [[D]]
107+
// CHECK: [[LOW:%.+]] = trunc i256 [[ABCD]] to i128
108+
// CHECK: [[HIGHW:%.+]] = lshr i256 [[ABCD]], 128
109+
// RAW: [[HIGH:%.+]] = trunc i256 [[HIGHW]] to i128
110+
// OPT: [[HIGH:%.+]] = trunc nuw i256 [[HIGHW]] to i128
111+
// RAW: [[PAIR0:%.+]] = insertvalue { i128, i128 } poison, i128 [[LOW]], 0
112+
// RAW: [[PAIR1:%.+]] = insertvalue { i128, i128 } [[PAIR0]], i128 [[HIGH]], 1
113+
// OPT: store i128 [[LOW]], ptr %_0
114+
// OPT: [[P1:%.+]] = getelementptr inbounds i8, ptr %_0, {{i32|i64}} 16
115+
// OPT: store i128 [[HIGH]], ptr [[P1]]
116+
// CHECK: ret void
117+
carrying_mul_add(a, b, c, d)
118+
}
119+
120+
// CHECK-LABEL: @fallback_cma_u32
121+
#[no_mangle]
122+
pub unsafe fn fallback_cma_u32(a: u32, b: u32, c: u32, d: u32) -> (u32, u32) {
123+
// OPT-DAG: [[A:%.+]] = zext i32 %a to i64
124+
// OPT-DAG: [[B:%.+]] = zext i32 %b to i64
125+
// OPT-DAG: [[AB:%.+]] = mul nuw i64
126+
// OPT-DAG: [[C:%.+]] = zext i32 %c to i64
127+
// OPT-DAG: [[ABC:%.+]] = add nuw i64{{.+}}[[C]]
128+
// OPT-DAG: [[D:%.+]] = zext i32 %d to i64
129+
// OPT-DAG: [[ABCD:%.+]] = add nuw i64{{.+}}[[D]]
130+
// OPT-DAG: [[LOW:%.+]] = trunc i64 [[ABCD]] to i32
131+
// OPT-DAG: [[HIGHW:%.+]] = lshr i64 [[ABCD]], 32
132+
// OPT-DAG: [[HIGH:%.+]] = trunc nuw i64 [[HIGHW]] to i32
133+
// OPT-DAG: [[PAIR0:%.+]] = insertvalue { i32, i32 } poison, i32 [[LOW]], 0
134+
// OPT-DAG: [[PAIR1:%.+]] = insertvalue { i32, i32 } [[PAIR0]], i32 [[HIGH]], 1
135+
// OPT-DAG: ret { i32, i32 } [[PAIR1]]
136+
fallback::CarryingMulAdd::carrying_mul_add(a, b, c, d)
137+
}

0 commit comments

Comments
 (0)
Please sign in to comment.