Skip to content

Commit 9699207

Browse files
pitdickerdhardy
authored andcommitted
Improve performance of isaac64::next_u32.
This does some crazy things with indexing, but is 45% faster. We are no longer throwing away half of the results. [Cherry-picked from 415ef6f]
1 parent 358fe2f commit 9699207

File tree

1 file changed

+41
-20
lines changed

1 file changed

+41
-20
lines changed

src/prng/isaac64.rs

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,13 @@ impl Isaac64Rng {
136136
/// - We fill `rsl` backwards. The reference implementation reads values
137137
/// from `rsl` in reverse. We read them in the normal direction, to make
138138
/// `fill_bytes` a memcopy. To maintain compatibility we fill in reverse.
139+
/// - We store `index` as if `rsl` contains `u32`'s instead of `u64`'s, plus
140+
/// one. This way we can make more efficient use of the generated results
141+
/// in `next_u32`.
142+
/// For `next_u32` the correct index is `index - 1`.
143+
/// For `next_u64` the correct index is `index >> 1`, which also takes
144+
/// care of any alignment issues that could arise if `next_u64` was called
145+
/// after `next_u32`.
139146
fn isaac64(&mut self) {
140147
self.c += w(1);
141148
// abbreviations
@@ -185,41 +192,63 @@ impl Isaac64Rng {
185192

186193
self.a = a;
187194
self.b = b;
188-
self.index = 0;
195+
self.index = 1;
189196
}
190197
}
191198

192199
impl Rng for Isaac64Rng {
193200
#[inline]
194201
fn next_u32(&mut self) -> u32 {
195-
self.next_u64() as u32
202+
// Using a local variable for `index`, and checking the size avoids a
203+
// bounds check later on.
204+
let mut index = self.index as usize - 1;
205+
if index >= RAND_SIZE * 2 {
206+
self.isaac64();
207+
index = 0;
208+
}
209+
210+
let value;
211+
if cfg!(target_endian = "little") {
212+
// Index as if this is a u32 slice.
213+
let rsl = unsafe { &*(&mut self.rsl as *mut [u64; RAND_SIZE]
214+
as *mut [u32; RAND_SIZE * 2]) };
215+
value = rsl[index];
216+
} else {
217+
// Index into the u64 slice, rotate and truncate the result.
218+
// Works always, also on big-endian systems, but is slower.
219+
let tmp = self.rsl[index >> 1];
220+
value = tmp as u32;
221+
self.rsl[index >> 1] = tmp.rotate_right(32);
222+
}
223+
self.index += 1;
224+
value
196225
}
197226

198227
#[inline]
199228
fn next_u64(&mut self) -> u64 {
200-
let mut index = self.index as usize;
229+
let mut index = self.index as usize >> 1;
201230
if index >= RAND_SIZE {
202231
self.isaac64();
203232
index = 0;
204233
}
205234

206235
let value = self.rsl[index];
207-
self.index += 1;
236+
self.index += 2;
208237
value
209238
}
210239

211240
fn fill_bytes(&mut self, dest: &mut [u8]) {
212241
let mut read_len = 0;
213242
while read_len < dest.len() {
214-
if self.index as usize >= RAND_SIZE {
243+
if (self.index as usize >> 1) >= RAND_SIZE {
215244
self.isaac64();
216245
}
217246

218247
let (consumed_u64, filled_u8) =
219-
impls::fill_via_u64_chunks(&mut self.rsl[(self.index as usize)..],
248+
impls::fill_via_u64_chunks(&mut self.rsl[(self.index as usize >> 1)..],
220249
&mut dest[read_len..]);
221250

222-
self.index += consumed_u64 as u32;
251+
self.index += consumed_u64 as u32 * 2;
223252
read_len += filled_u8;
224253
}
225254
}
@@ -386,20 +415,12 @@ mod test {
386415
let mut rng1 = Isaac64Rng::from_seed(seed);
387416
let v = (0..10).map(|_| rng1.next_u32()).collect::<Vec<_>>();
388417
// Subset of above values, as an LE u32 sequence
389-
// TODO: switch to this sequence?
390-
// assert_eq!(v,
391-
// [141028748, 127386717,
392-
// 1058730652, 3347555894,
393-
// 851491469, 4039984500,
394-
// 2692730210, 288449107,
395-
// 646103879, 2782923823]);
396-
// Subset of above values, using only low-half of each u64
397418
assert_eq!(v,
398-
[141028748, 1058730652,
399-
851491469, 2692730210,
400-
646103879, 4195642895,
401-
2836348583, 1312677241,
402-
999139615, 253604626]);
419+
[141028748, 127386717,
420+
1058730652, 3347555894,
421+
851491469, 4039984500,
422+
2692730210, 288449107,
423+
646103879, 2782923823]);
403424
}
404425

405426
#[test]

0 commit comments

Comments
 (0)