Skip to content

Commit a1ea210

Browse files
authored
uint: faster mul by u64 (#125)
* uint: convert benchmarks to criterion * rustfmt * add u256_rem_small * add u512_rem_large bench * uint: add mulmod comparison benches * uint: add comparison with gmp * uint: clean up benches * uint: fix windows ci * uint: fix gmp conversions * uint: make sure bytes are little endian for gmp conversion * uint: compare mul by u32 and u64 * uint: fast path for mul by u64 * uint: make free-standing functions static * uint: remove fast-path for mul by u64
1 parent 4310011 commit a1ea210

File tree

2 files changed

+128
-87
lines changed

2 files changed

+128
-87
lines changed

uint/benches/bigint.rs

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ criterion_group!(
6262
u512_mul,
6363
u512_div,
6464
u512_rem,
65+
u512_mul_u32_vs_u64,
6566
mulmod_u512_vs_biguint_vs_gmp,
6667
conversions,
6768
u512_bit_and,
@@ -141,7 +142,14 @@ fn u256_mul(c: &mut Criterion) {
141142
black_box(x.overflowing_mul(y).0)
142143
})
143144
},
144-
vec![(U256::max_value(), 1u64), (U256::from(3), u64::max_value())],
145+
vec![
146+
(U256::max_value(), 1u64),
147+
(U256::from(3), u64::max_value()),
148+
(
149+
U256::from_dec_str("21674844646682989462120101885968193938394323990565507610662749").unwrap(),
150+
173,
151+
),
152+
],
145153
),
146154
);
147155
}
@@ -330,6 +338,31 @@ fn bench_convert_to_gmp(b: &mut Bencher, i: u64) {
330338
});
331339
}
332340

341+
fn u512_mul_u32_vs_u64(c: &mut Criterion) {
342+
let mods = vec![1u32, 42, 10_000_001, u32::max_value()];
343+
c.bench(
344+
"multiply u512 by u32 vs u64",
345+
ParameterizedBenchmark::new("u32", |b, i| bench_u512_mul_u32(b, *i), mods)
346+
.with_function("u64", |b, i| bench_u512_mul_u64(b, u64::from(*i))),
347+
);
348+
}
349+
350+
fn bench_u512_mul_u32(b: &mut Bencher, i: u32) {
351+
let x =
352+
U512::from_str("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF").unwrap();
353+
b.iter(|| {
354+
black_box(x * i)
355+
});
356+
}
357+
358+
fn bench_u512_mul_u64(b: &mut Bencher, i: u64) {
359+
let x =
360+
U512::from_str("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF").unwrap();
361+
b.iter(|| {
362+
black_box(x * i)
363+
});
364+
}
365+
333366
fn mulmod_u512_vs_biguint_vs_gmp(c: &mut Criterion) {
334367
let mods = vec![1u64, 42, 10_000_001, u64::max_value()];
335368
c.bench(

uint/src/uint.rs

Lines changed: 94 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ macro_rules! uint_full_mul_reg {
173173
if $check(me[j], carry) {
174174
let a = me[j];
175175

176-
let (hi, low) = $crate::split_u128(a as u128 * b as u128);
176+
let (hi, low) = Self::split_u128(a as u128 * b as u128);
177177

178178
let overflow = {
179179
let existing_low = &mut ret[i + j];
@@ -303,13 +303,7 @@ macro_rules! impl_mul_from {
303303
result
304304
}
305305
}
306-
}
307-
}
308306

309-
#[macro_export]
310-
#[doc(hidden)]
311-
macro_rules! impl_mulassign_from {
312-
($name: ident, $other: ident) => {
313307
impl $crate::core_::ops::MulAssign<$other> for $name {
314308
fn mul_assign(&mut self, other: $other) {
315309
let result = *self * other;
@@ -319,29 +313,57 @@ macro_rules! impl_mulassign_from {
319313
}
320314
}
321315

322-
#[inline(always)]
316+
#[macro_export]
323317
#[doc(hidden)]
324-
pub fn mul_u32(a: (u64, u64), b: u64, carry: u64) -> (u64, u64) {
325-
let upper = b * a.0;
326-
let lower = b * a.1;
318+
macro_rules! impl_mul_for_primitive {
319+
($name: ty, $other: ident) => {
320+
impl $crate::core_::ops::Mul<$other> for $name {
321+
type Output = $name;
327322

328-
let (res1, overflow1) = lower.overflowing_add(upper << 32);
329-
let (res2, overflow2) = res1.overflowing_add(carry);
323+
fn mul(self, other: $other) -> $name {
324+
let (result, carry) = self.overflowing_mul_u64(other as u64);
325+
panic_on_overflow!(carry > 0);
326+
result
327+
}
328+
}
330329

331-
let carry = (upper >> 32) + overflow1 as u64 + overflow2 as u64;
332-
(res2, carry)
333-
}
330+
impl<'a> $crate::core_::ops::Mul<&'a $other> for $name {
331+
type Output = $name;
334332

335-
#[inline(always)]
336-
#[doc(hidden)]
337-
pub fn split(a: u64) -> (u64, u64) {
338-
(a >> 32, a & 0xFFFF_FFFF)
339-
}
333+
fn mul(self, other: &'a $other) -> $name {
334+
let (result, carry) = self.overflowing_mul_u64(*other as u64);
335+
panic_on_overflow!(carry > 0);
336+
result
337+
}
338+
}
340339

341-
#[inline(always)]
342-
#[doc(hidden)]
343-
pub fn split_u128(a: u128) -> (u64, u64) {
344-
((a >> 64) as _, (a & 0xFFFFFFFFFFFFFFFF) as _)
340+
impl<'a> $crate::core_::ops::Mul<&'a $other> for &'a $name {
341+
type Output = $name;
342+
343+
fn mul(self, other: &'a $other) -> $name {
344+
let (result, carry) = self.overflowing_mul_u64(*other as u64);
345+
panic_on_overflow!(carry > 0);
346+
result
347+
}
348+
}
349+
350+
impl<'a> $crate::core_::ops::Mul<$other> for &'a $name {
351+
type Output = $name;
352+
353+
fn mul(self, other: $other) -> $name {
354+
let (result, carry) = self.overflowing_mul_u64(other as u64);
355+
panic_on_overflow!(carry > 0);
356+
result
357+
}
358+
}
359+
360+
impl $crate::core_::ops::MulAssign<$other> for $name {
361+
fn mul_assign(&mut self, other: $other) {
362+
let result = *self * (other as u64);
363+
*self = result
364+
}
365+
}
366+
}
345367
}
346368

347369
#[macro_export]
@@ -428,8 +450,8 @@ macro_rules! construct_uint {
428450

429451
let mut res = Self::default();
430452
for b in value.bytes().map(|b| b - 48) {
431-
let (r, overflow) = res.overflowing_mul_u32(10);
432-
if overflow {
453+
let (r, overflow) = res.overflowing_mul_u64(10);
454+
if overflow > 0 {
433455
return Err($crate::FromDecStrErr::InvalidLength);
434456
}
435457
let (r, overflow) = r.overflowing_add(b.into());
@@ -512,6 +534,15 @@ macro_rules! construct_uint {
512534
return true;
513535
}
514536

537+
// Whether this fits u64.
538+
#[inline]
539+
fn fits_word(&self) -> bool {
540+
let &$name(ref arr) = self;
541+
for i in 1..$n_words { if arr[i] != 0 { return false; } }
542+
return true;
543+
}
544+
545+
515546
/// Return the least number of bits needed to represent the number
516547
#[inline]
517548
pub fn bits(&self) -> usize {
@@ -788,20 +819,35 @@ macro_rules! construct_uint {
788819
}
789820
}
790821

791-
/// Overflowing multiplication by u32.
792-
fn overflowing_mul_u32(self, other: u32) -> (Self, bool) {
793-
let $name(ref arr) = self;
794-
let mut ret = [0u64; $n_words];
795-
let mut carry = 0;
796-
let o = other as u64;
822+
#[inline(always)]
823+
fn mul_u64(a: u64, b: u64, carry: u64) -> (u64, u64) {
824+
let (hi, lo) = Self::split_u128(u128::from(a) * u128::from(b) + u128::from(carry));
825+
(lo, hi)
826+
}
797827

798-
for i in 0..$n_words {
799-
let (res, carry2) = $crate::mul_u32($crate::split(arr[i]), o, carry);
800-
ret[i] = res;
801-
carry = carry2;
828+
#[inline(always)]
829+
fn split(a: u64) -> (u64, u64) {
830+
(a >> 32, a & 0xFFFF_FFFF)
831+
}
832+
833+
#[inline(always)]
834+
fn split_u128(a: u128) -> (u64, u64) {
835+
((a >> 64) as _, (a & 0xFFFFFFFFFFFFFFFF) as _)
836+
}
837+
838+
839+
/// Overflowing multiplication by u64.
840+
/// Returns the result and carry.
841+
fn overflowing_mul_u64(mut self, other: u64) -> (Self, u64) {
842+
let mut carry = 0u64;
843+
844+
for d in self.0.iter_mut() {
845+
let (res, c) = Self::mul_u64(*d, other, carry);
846+
*d = res;
847+
carry = c;
802848
}
803849

804-
($name(ret), carry > 0)
850+
(self, carry)
805851
}
806852

807853
/// Converts from big endian representation bytes in memory.
@@ -950,56 +996,18 @@ macro_rules! construct_uint {
950996
}
951997
}
952998

953-
// specialization for u32
954-
impl $crate::core_::ops::Mul<u32> for $name {
955-
type Output = $name;
956-
957-
fn mul(self, other: u32) -> $name {
958-
let (ret, overflow) = self.overflowing_mul_u32(other);
959-
panic_on_overflow!(overflow);
960-
ret
961-
}
962-
}
963-
964-
impl<'a> $crate::core_::ops::Mul<u32> for &'a $name {
965-
type Output = $name;
966-
967-
fn mul(self, other: u32) -> $name {
968-
*self * other
969-
}
970-
}
971-
972-
impl $crate::core_::ops::MulAssign<u32> for $name {
973-
fn mul_assign(&mut self, other: u32) {
974-
let result = *self * other;
975-
*self = result
976-
}
977-
}
978-
979999
// all other impls
980-
impl_mul_from!($name, u8);
981-
impl_mul_from!($name, u16);
982-
impl_mul_from!($name, u64);
983-
impl_mul_from!($name, usize);
984-
985-
impl_mul_from!($name, i8);
986-
impl_mul_from!($name, i16);
987-
impl_mul_from!($name, i64);
988-
impl_mul_from!($name, isize);
989-
9901000
impl_mul_from!($name, $name);
991-
992-
impl_mulassign_from!($name, u8);
993-
impl_mulassign_from!($name, u16);
994-
impl_mulassign_from!($name, u64);
995-
impl_mulassign_from!($name, usize);
996-
997-
impl_mulassign_from!($name, i8);
998-
impl_mulassign_from!($name, i16);
999-
impl_mulassign_from!($name, i64);
1000-
impl_mulassign_from!($name, isize);
1001-
1002-
impl_mulassign_from!($name, $name);
1001+
impl_mul_for_primitive!($name, u8);
1002+
impl_mul_for_primitive!($name, u16);
1003+
impl_mul_for_primitive!($name, u32);
1004+
impl_mul_for_primitive!($name, u64);
1005+
impl_mul_for_primitive!($name, usize);
1006+
impl_mul_for_primitive!($name, i8);
1007+
impl_mul_for_primitive!($name, i16);
1008+
impl_mul_for_primitive!($name, i32);
1009+
impl_mul_for_primitive!($name, i64);
1010+
impl_mul_for_primitive!($name, isize);
10031011

10041012
impl<T> $crate::core_::ops::Div<T> for $name where T: Into<$name> {
10051013
type Output = $name;

0 commit comments

Comments
 (0)