Skip to content

Commit d1f961c

Browse files
authored
Improve SmallRng initialization performance (#1482)
1 parent d2eb51b commit d1f961c

File tree

5 files changed

+123
-16
lines changed

5 files changed

+123
-16
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.
2424
- Add `UniformUsize` and use to make `Uniform` for `usize` portable (#1487)
2525
- Remove support for generating `isize` and `usize` values with `Standard`, `Uniform` and `Fill` and usage as a `WeightedAliasIndex` weight (#1487)
2626
- Require `Clone` and `AsRef` bound for `SeedableRng::Seed`. (#1491)
27+
- Improve SmallRng initialization performance (#1482)
2728
- Implement `Distribution<u64>` for `Poisson<f64>` (#1498)
2829
- Limit the maximal acceptable lambda for `Poisson` to solve (#1312) (#1498)
2930
- Rename `Rng::gen_iter` to `random_iter` (#1500)

benches/benches/generators.rs

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use rand_pcg::{Pcg32, Pcg64, Pcg64Dxsm, Pcg64Mcg};
1919
criterion_group!(
2020
name = benches;
2121
config = Criterion::default();
22-
targets = gen_bytes, gen_u32, gen_u64, init_gen, reseeding_bytes
22+
targets = gen_bytes, gen_u32, gen_u64, init_gen, init_from_u64, init_from_seed, reseeding_bytes
2323
);
2424
criterion_main!(benches);
2525

@@ -133,6 +133,62 @@ pub fn init_gen(c: &mut Criterion) {
133133
bench::<ChaCha12Rng>(&mut g, "chacha12");
134134
bench::<ChaCha20Rng>(&mut g, "chacha20");
135135
bench::<StdRng>(&mut g, "std");
136+
bench::<SmallRng>(&mut g, "small");
137+
138+
g.finish()
139+
}
140+
141+
pub fn init_from_u64(c: &mut Criterion) {
142+
let mut g = c.benchmark_group("init_from_u64");
143+
g.warm_up_time(Duration::from_millis(500));
144+
g.measurement_time(Duration::from_millis(1000));
145+
146+
fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str) {
147+
g.bench_function(name, |b| {
148+
let mut rng = Pcg32::from_os_rng();
149+
let seed = rng.random();
150+
b.iter(|| R::seed_from_u64(black_box(seed)));
151+
});
152+
}
153+
154+
bench::<Pcg32>(&mut g, "pcg32");
155+
bench::<Pcg64>(&mut g, "pcg64");
156+
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
157+
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
158+
bench::<ChaCha8Rng>(&mut g, "chacha8");
159+
bench::<ChaCha12Rng>(&mut g, "chacha12");
160+
bench::<ChaCha20Rng>(&mut g, "chacha20");
161+
bench::<StdRng>(&mut g, "std");
162+
bench::<SmallRng>(&mut g, "small");
163+
164+
g.finish()
165+
}
166+
167+
pub fn init_from_seed(c: &mut Criterion) {
168+
let mut g = c.benchmark_group("init_from_seed");
169+
g.warm_up_time(Duration::from_millis(500));
170+
g.measurement_time(Duration::from_millis(1000));
171+
172+
fn bench<R: SeedableRng>(g: &mut BenchmarkGroup<WallTime>, name: &str)
173+
where
174+
rand::distr::Standard: Distribution<<R as SeedableRng>::Seed>,
175+
{
176+
g.bench_function(name, |b| {
177+
let mut rng = Pcg32::from_os_rng();
178+
let seed = rng.random();
179+
b.iter(|| R::from_seed(black_box(seed.clone())));
180+
});
181+
}
182+
183+
bench::<Pcg32>(&mut g, "pcg32");
184+
bench::<Pcg64>(&mut g, "pcg64");
185+
bench::<Pcg64Mcg>(&mut g, "pcg64mcg");
186+
bench::<Pcg64Dxsm>(&mut g, "pcg64dxsm");
187+
bench::<ChaCha8Rng>(&mut g, "chacha8");
188+
bench::<ChaCha12Rng>(&mut g, "chacha12");
189+
bench::<ChaCha20Rng>(&mut g, "chacha20");
190+
bench::<StdRng>(&mut g, "std");
191+
bench::<SmallRng>(&mut g, "small");
136192

137193
g.finish()
138194
}

src/rngs/small.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ impl SeedableRng for SmallRng {
8383

8484
#[inline(always)]
8585
fn from_seed(seed: Self::Seed) -> Self {
86-
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap();
86+
// This is for compatibility with 32-bit platforms where Rng::Seed has a different seed size
87+
// With MSRV >= 1.77: let seed = *seed.first_chunk().unwrap()
8788
const LEN: usize = core::mem::size_of::<<Rng as SeedableRng>::Seed>();
8889
let seed = (&seed[..LEN]).try_into().unwrap();
8990
SmallRng(Rng::from_seed(seed))

src/rngs/xoshiro128plusplus.rs

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,29 +33,36 @@ impl SeedableRng for Xoshiro128PlusPlus {
3333
/// mapped to a different seed.
3434
#[inline]
3535
fn from_seed(seed: [u8; 16]) -> Xoshiro128PlusPlus {
36-
if seed.iter().all(|&x| x == 0) {
37-
return Self::seed_from_u64(0);
38-
}
3936
let mut state = [0; 4];
4037
read_u32_into(&seed, &mut state);
38+
// Check for zero on aligned integers for better code generation.
39+
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
40+
if state.iter().all(|&x| x == 0) {
41+
return Self::seed_from_u64(0);
42+
}
4143
Xoshiro128PlusPlus { s: state }
4244
}
4345

4446
/// Create a new `Xoshiro128PlusPlus` from a `u64` seed.
4547
///
4648
/// This uses the SplitMix64 generator internally.
49+
#[inline]
4750
fn seed_from_u64(mut state: u64) -> Self {
4851
const PHI: u64 = 0x9e3779b97f4a7c15;
49-
let mut seed = Self::Seed::default();
50-
for chunk in seed.as_mut().chunks_mut(8) {
52+
let mut s = [0; 4];
53+
for i in s.chunks_exact_mut(2) {
5154
state = state.wrapping_add(PHI);
5255
let mut z = state;
5356
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
5457
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
5558
z = z ^ (z >> 31);
56-
chunk.copy_from_slice(&z.to_le_bytes());
59+
i[0] = z as u32;
60+
i[1] = (z >> 32) as u32;
5761
}
58-
Self::from_seed(seed)
62+
// By using a non-zero PHI we are guaranteed to generate a non-zero state
63+
// Thus preventing a recursion between from_seed and seed_from_u64.
64+
debug_assert_ne!(s, [0; 4]);
65+
Xoshiro128PlusPlus { s }
5966
}
6067
}
6168

@@ -113,4 +120,18 @@ mod tests {
113120
assert_eq!(rng.next_u32(), e);
114121
}
115122
}
123+
124+
#[test]
125+
fn stable_seed_from_u64() {
126+
// We don't guarantee value-stability for SmallRng but this
127+
// could influence keeping stability whenever possible (e.g. after optimizations).
128+
let mut rng = Xoshiro128PlusPlus::seed_from_u64(0);
129+
let expected = [
130+
1179900579, 1938959192, 3089844957, 3657088315, 1015453891, 479942911, 3433842246,
131+
669252886, 3985671746, 2737205563,
132+
];
133+
for &e in &expected {
134+
assert_eq!(rng.next_u32(), e);
135+
}
136+
}
116137
}

src/rngs/xoshiro256plusplus.rs

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,29 +33,35 @@ impl SeedableRng for Xoshiro256PlusPlus {
3333
/// mapped to a different seed.
3434
#[inline]
3535
fn from_seed(seed: [u8; 32]) -> Xoshiro256PlusPlus {
36-
if seed.iter().all(|&x| x == 0) {
37-
return Self::seed_from_u64(0);
38-
}
3936
let mut state = [0; 4];
4037
read_u64_into(&seed, &mut state);
38+
// Check for zero on aligned integers for better code generation.
39+
// Furtermore, seed_from_u64(0) will expand to a constant when optimized.
40+
if state.iter().all(|&x| x == 0) {
41+
return Self::seed_from_u64(0);
42+
}
4143
Xoshiro256PlusPlus { s: state }
4244
}
4345

4446
/// Create a new `Xoshiro256PlusPlus` from a `u64` seed.
4547
///
4648
/// This uses the SplitMix64 generator internally.
49+
#[inline]
4750
fn seed_from_u64(mut state: u64) -> Self {
4851
const PHI: u64 = 0x9e3779b97f4a7c15;
49-
let mut seed = Self::Seed::default();
50-
for chunk in seed.as_mut().chunks_mut(8) {
52+
let mut s = [0; 4];
53+
for i in s.iter_mut() {
5154
state = state.wrapping_add(PHI);
5255
let mut z = state;
5356
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
5457
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
5558
z = z ^ (z >> 31);
56-
chunk.copy_from_slice(&z.to_le_bytes());
59+
*i = z;
5760
}
58-
Self::from_seed(seed)
61+
// By using a non-zero PHI we are guaranteed to generate a non-zero state
62+
// Thus preventing a recursion between from_seed and seed_from_u64.
63+
debug_assert_ne!(s, [0; 4]);
64+
Xoshiro256PlusPlus { s }
5965
}
6066
}
6167

@@ -126,4 +132,26 @@ mod tests {
126132
assert_eq!(rng.next_u64(), e);
127133
}
128134
}
135+
136+
#[test]
137+
fn stable_seed_from_u64() {
138+
// We don't guarantee value-stability for SmallRng but this
139+
// could influence keeping stability whenever possible (e.g. after optimizations).
140+
let mut rng = Xoshiro256PlusPlus::seed_from_u64(0);
141+
let expected = [
142+
5987356902031041503,
143+
7051070477665621255,
144+
6633766593972829180,
145+
211316841551650330,
146+
9136120204379184874,
147+
379361710973160858,
148+
15813423377499357806,
149+
15596884590815070553,
150+
5439680534584881407,
151+
1369371744833522710,
152+
];
153+
for &e in &expected {
154+
assert_eq!(rng.next_u64(), e);
155+
}
156+
}
129157
}

0 commit comments

Comments
 (0)