Skip to content

Commit 885efc0

Browse files
committed
Merge branch 'prng'
2 parents 3b9a702 + e973214 commit 885efc0

File tree

10 files changed

+1018
-503
lines changed

10 files changed

+1018
-503
lines changed

src/distributions/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,8 @@ fn ziggurat<R: Rng, P, Z>(
279279

280280
#[cfg(test)]
281281
mod tests {
282-
283282
use {Rng, Rand};
283+
use impls;
284284
use super::{RandSample, WeightedChoice, Weighted, Sample, IndependentSample};
285285

286286
#[derive(PartialEq, Debug)]
@@ -301,6 +301,10 @@ mod tests {
301301
fn next_u64(&mut self) -> u64 {
302302
self.next_u32() as u64
303303
}
304+
305+
fn fill_bytes(&mut self, dest: &mut [u8]) {
306+
impls::fill_bytes_via_u32(self, dest)
307+
}
304308
}
305309

306310
#[test]

src/impls.rs

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
// Copyright 2013-2017 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
//! Helper functions for implementing `Rng` functions.
12+
//!
13+
//! For cross-platform reproducibility, these functions all use Little Endian:
14+
//! least-significant part first. For example, `next_u64_via_u32` takes `u32`
15+
//! values `x, y`, then outputs `(y << 32) | x`. To implement `next_u32`
16+
//! from `next_u64` in little-endian order, one should use `next_u64() as u32`.
17+
//!
18+
//! Byte-swapping (like the std `to_le` functions) is only needed to convert
19+
//! to/from byte sequences, and since its purpose is reproducibility,
20+
//! non-reproducible sources (e.g. `OsRng`) need not bother with it.
21+
22+
// TODO: eventually these should be exported somehow
23+
#![allow(unused)]
24+
25+
use core::intrinsics::transmute;
26+
use core::slice;
27+
use core::cmp::min;
28+
use core::mem::size_of;
29+
use Rng;
30+
31+
/// Implement `next_u64` via `next_u32`, little-endian order.
32+
pub fn next_u64_via_u32<R: Rng+?Sized>(rng: &mut R) -> u64 {
33+
// Use LE; we explicitly generate one value before the next.
34+
let x = rng.next_u32() as u64;
35+
let y = rng.next_u32() as u64;
36+
(y << 32) | x
37+
}
38+
39+
macro_rules! fill_bytes_via {
40+
($rng:ident, $next_u:ident, $BYTES:expr, $dest:ident) => {{
41+
let mut left = $dest;
42+
while left.len() >= $BYTES {
43+
let (l, r) = {left}.split_at_mut($BYTES);
44+
left = r;
45+
let chunk: [u8; $BYTES] = unsafe {
46+
transmute($rng.$next_u().to_le())
47+
};
48+
l.copy_from_slice(&chunk);
49+
}
50+
let n = left.len();
51+
if n > 0 {
52+
let chunk: [u8; $BYTES] = unsafe {
53+
transmute($rng.$next_u().to_le())
54+
};
55+
left.copy_from_slice(&chunk[..n]);
56+
}
57+
}}
58+
}
59+
60+
/// Implement `fill_bytes` via `next_u32`, little-endian order.
61+
pub fn fill_bytes_via_u32<R: Rng+?Sized>(rng: &mut R, dest: &mut [u8]) {
62+
fill_bytes_via!(rng, next_u32, 4, dest)
63+
}
64+
65+
/// Implement `fill_bytes` via `next_u64`, little-endian order.
66+
pub fn fill_bytes_via_u64<R: Rng+?Sized>(rng: &mut R, dest: &mut [u8]) {
67+
fill_bytes_via!(rng, next_u64, 8, dest)
68+
}
69+
70+
macro_rules! impl_uint_from_fill {
71+
($rng:expr, $ty:ty, $N:expr) => ({
72+
debug_assert!($N == size_of::<$ty>());
73+
74+
let mut int: $ty = 0;
75+
unsafe {
76+
let ptr = &mut int as *mut $ty as *mut u8;
77+
let slice = slice::from_raw_parts_mut(ptr, $N);
78+
$rng.fill_bytes(slice);
79+
}
80+
int
81+
});
82+
}
83+
84+
macro_rules! fill_via_chunks {
85+
($src:expr, $dest:expr, $N:expr) => ({
86+
let chunk_size_u8 = min($src.len() * $N, $dest.len());
87+
let chunk_size = (chunk_size_u8 + $N - 1) / $N;
88+
89+
// Convert to little-endian:
90+
for ref mut x in $src[0..chunk_size].iter_mut() {
91+
**x = (*x).to_le();
92+
}
93+
94+
let bytes = unsafe { slice::from_raw_parts($src.as_ptr() as *const u8,
95+
$src.len() * $N) };
96+
97+
let dest_chunk = &mut $dest[0..chunk_size_u8];
98+
dest_chunk.copy_from_slice(&bytes[0..chunk_size_u8]);
99+
100+
(chunk_size, chunk_size_u8)
101+
});
102+
}
103+
104+
/// Implement `fill_bytes` by reading chunks from the output buffer of a block
105+
/// based RNG.
106+
///
107+
/// The return values are `(consumed_u32, filled_u8)`.
108+
///
109+
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
110+
/// the length of `dest`.
111+
/// `consumed_u32` is the number of words consumed from `src`, which is the same
112+
/// as `filled_u8 / 4` rounded up.
113+
///
114+
/// Note that on big-endian systems values in the output buffer `src` are
115+
/// mutated. `src[0..consumed_u32]` get converted to little-endian before
116+
/// copying.
117+
///
118+
/// # Example
119+
/// (from `IsaacRng`)
120+
///
121+
/// ```rust,ignore
122+
/// fn fill_bytes(&mut self, dest: &mut [u8]) {
123+
/// let mut read_len = 0;
124+
/// while read_len < dest.len() {
125+
/// if self.index >= self.rsl.len() {
126+
/// self.isaac();
127+
/// }
128+
///
129+
/// let (consumed_u32, filled_u8) =
130+
/// impls::fill_via_u32_chunks(&mut self.rsl[self.index..],
131+
/// &mut dest[read_len..]);
132+
///
133+
/// self.index += consumed_u32;
134+
/// read_len += filled_u8;
135+
/// }
136+
/// }
137+
/// ```
138+
pub fn fill_via_u32_chunks(src: &mut [u32], dest: &mut [u8]) -> (usize, usize) {
139+
fill_via_chunks!(src, dest, 4)
140+
}
141+
142+
/// Implement `fill_bytes` by reading chunks from the output buffer of a block
143+
/// based RNG.
144+
///
145+
/// The return values are `(consumed_u64, filled_u8)`.
146+
/// `filled_u8` is the number of filled bytes in `dest`, which may be less than
147+
/// the length of `dest`.
148+
/// `consumed_u64` is the number of words consumed from `src`, which is the same
149+
/// as `filled_u8 / 8` rounded up.
150+
///
151+
/// Note that on big-endian systems values in the output buffer `src` are
152+
/// mutated. `src[0..consumed_u64]` get converted to little-endian before
153+
/// copying.
154+
///
155+
/// See `fill_via_u32_chunks` for an example.
156+
pub fn fill_via_u64_chunks(src: &mut [u64], dest: &mut [u8]) -> (usize, usize) {
157+
fill_via_chunks!(src, dest, 8)
158+
}
159+
160+
/// Implement `next_u32` via `fill_bytes`, little-endian order.
161+
pub fn next_u32_via_fill<R: Rng+?Sized>(rng: &mut R) -> u32 {
162+
impl_uint_from_fill!(rng, u32, 4)
163+
}
164+
165+
/// Implement `next_u64` via `fill_bytes`, little-endian order.
166+
pub fn next_u64_via_fill<R: Rng+?Sized>(rng: &mut R) -> u64 {
167+
impl_uint_from_fill!(rng, u64, 8)
168+
}
169+
170+
// TODO: implement tests for the above

src/jitter.rs

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
//! Non-physical true random number generator based on timing jitter.
1818
19-
use Rng;
19+
use {Rng, impls};
2020

2121
use core::{fmt, mem, ptr};
2222
#[cfg(feature="std")]
@@ -731,22 +731,7 @@ impl Rng for JitterRng {
731731
}
732732

733733
fn fill_bytes(&mut self, dest: &mut [u8]) {
734-
let mut left = dest;
735-
while left.len() >= 8 {
736-
let (l, r) = {left}.split_at_mut(8);
737-
left = r;
738-
let chunk: [u8; 8] = unsafe {
739-
mem::transmute(self.next_u64().to_le())
740-
};
741-
l.copy_from_slice(&chunk);
742-
}
743-
let n = left.len();
744-
if n > 0 {
745-
let chunk: [u8; 8] = unsafe {
746-
mem::transmute(self.next_u64().to_le())
747-
};
748-
left.copy_from_slice(&chunk[..n]);
749-
}
734+
impls::fill_bytes_via_u64(self, dest)
750735
}
751736
}
752737

src/lib.rs

Lines changed: 55 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ use distributions::range::SampleRange;
277277

278278
// public modules
279279
pub mod distributions;
280+
mod impls;
280281
pub mod jitter;
281282
#[cfg(feature="std")] pub mod os;
282283
#[cfg(feature="std")] pub mod read;
@@ -338,21 +339,28 @@ pub trait Rand : Sized {
338339

339340
/// A random number generator.
340341
pub trait Rng {
341-
/// Return the next random u32.
342-
///
343-
/// This rarely needs to be called directly, prefer `r.gen()` to
344-
/// `r.next_u32()`.
345-
// FIXME #rust-lang/rfcs#628: Should be implemented in terms of next_u64
342+
/// Return the next random `u32`.
343+
///
344+
/// Implementations of this trait must implement at least one of
345+
/// `next_u32`, `next_u64` and `fill_bytes` directly. In the case this
346+
/// function is not implemented directly, it can be implemented using
347+
/// `self.next_u64() as u32` or via `fill_bytes` (TODO: expose helper
348+
/// function).
346349
fn next_u32(&mut self) -> u32;
347350

348-
/// Return the next random u64.
349-
///
350-
/// By default this is implemented in terms of `next_u32`. An
351-
/// implementation of this trait must provide at least one of
352-
/// these two methods. Similarly to `next_u32`, this rarely needs
353-
/// to be called directly, prefer `r.gen()` to `r.next_u64()`.
351+
/// Return the next random `u64`.
352+
///
353+
/// Implementations of this trait must implement at least one of
354+
/// `next_u32`, `next_u64` and `fill_bytes` directly. In the case this
355+
/// function is not implemented directly, the default implementation will
356+
/// generate values via `next_u32` in little-endian fashion, or this
357+
/// function can be implemented via `fill_bytes` (TODO: expose helper
358+
/// function).
359+
///
360+
/// Types wrapping an inner RNG must not use the default implementation,
361+
/// since the inner RNG's implementation may produce different values.
354362
fn next_u64(&mut self) -> u64 {
355-
((self.next_u32() as u64) << 32) | (self.next_u32() as u64)
363+
impls::next_u64_via_u32(self)
356364
}
357365

358366
/// Return the next random f32 selected from the half-open
@@ -408,18 +416,22 @@ pub trait Rng {
408416
}
409417

410418
/// Fill `dest` with random data.
411-
///
412-
/// This has a default implementation in terms of `next_u64` and
413-
/// `next_u32`, but should be overridden by implementations that
414-
/// offer a more efficient solution than just calling those
415-
/// methods repeatedly.
416-
///
417-
/// This method does *not* have a requirement to bear any fixed
418-
/// relationship to the other methods, for example, it does *not*
419-
/// have to result in the same output as progressively filling
420-
/// `dest` with `self.gen::<u8>()`, and any such behaviour should
421-
/// not be relied upon.
422-
///
419+
///
420+
/// Implementations of this trait must implement at least one of
421+
/// `next_u32`, `next_u64` and `fill_bytes` directly. In the case this
422+
/// function is not implemented directly, the default implementation will
423+
/// generate values via `next_u64` in little-endian fashion.
424+
/// (TODO: expose helper function to allow implementation via `next_u32`.)
425+
///
426+
/// There is no requirement on how this method generates values relative to
427+
/// `next_u32` or `next_u64`; e.g. a `u64` cast to bytes is not required to
428+
/// have the same value as eight bytes filled via this function. There *is*
429+
/// a requirement of portability for reproducible generators which implies
430+
/// that any seedable generator must fix endianness when generating bytes.
431+
///
432+
/// Types wrapping an inner RNG must not use the default implementation,
433+
/// since the inner RNG's implementation may produce different values.
434+
///
423435
/// This method should guarantee that `dest` is entirely filled
424436
/// with new data, and may panic if this is impossible
425437
/// (e.g. reading past the end of a file that is being used as the
@@ -435,27 +447,7 @@ pub trait Rng {
435447
/// println!("{:?}", &v[..]);
436448
/// ```
437449
fn fill_bytes(&mut self, dest: &mut [u8]) {
438-
// this could, in theory, be done by transmuting dest to a
439-
// [u64], but this is (1) likely to be undefined behaviour for
440-
// LLVM, (2) has to be very careful about alignment concerns,
441-
// (3) adds more `unsafe` that needs to be checked, (4)
442-
// probably doesn't give much performance gain if
443-
// optimisations are on.
444-
let mut count = 0;
445-
let mut num = 0;
446-
for byte in dest.iter_mut() {
447-
if count == 0 {
448-
// we could micro-optimise here by generating a u32 if
449-
// we only need a few more bytes to fill the vector
450-
// (i.e. at most 4).
451-
num = self.next_u64();
452-
count = 8;
453-
}
454-
455-
*byte = (num & 0xff) as u8;
456-
num >>= 8;
457-
count -= 1;
458-
}
450+
impls::fill_bytes_via_u64(self, dest)
459451
}
460452

461453
/// Return a random value of a `Rand` type.
@@ -768,7 +760,7 @@ pub struct Closed01<F>(pub F);
768760

769761
/// The standard RNG. This is designed to be efficient on the current
770762
/// platform.
771-
#[derive(Copy, Clone, Debug)]
763+
#[derive(Clone, Debug)]
772764
pub struct StdRng {
773765
rng: IsaacWordRng,
774766
}
@@ -811,6 +803,11 @@ impl Rng for StdRng {
811803
fn next_u64(&mut self) -> u64 {
812804
self.rng.next_u64()
813805
}
806+
807+
#[inline]
808+
fn fill_bytes(&mut self, dest: &mut [u8]) {
809+
self.rng.fill_bytes(dest)
810+
}
814811
}
815812

816813
impl<'a> SeedableRng<&'a [usize]> for StdRng {
@@ -985,17 +982,21 @@ pub fn sample<T, I, R>(rng: &mut R, iterable: I, amount: usize) -> Vec<T>
985982

986983
#[cfg(test)]
987984
mod test {
985+
use impls;
988986
use super::{Rng, thread_rng, random, SeedableRng, StdRng, weak_rng};
989987
use std::iter::repeat;
990988

991989
pub struct MyRng<R> { inner: R }
992990

993991
impl<R: Rng> Rng for MyRng<R> {
994992
fn next_u32(&mut self) -> u32 {
995-
fn next<T: Rng>(t: &mut T) -> u32 {
996-
t.next_u32()
997-
}
998-
next(&mut self.inner)
993+
self.inner.next_u32()
994+
}
995+
fn next_u64(&mut self) -> u64 {
996+
self.inner.next_u64()
997+
}
998+
fn fill_bytes(&mut self, dest: &mut [u8]) {
999+
self.inner.fill_bytes(dest)
9991000
}
10001001
}
10011002

@@ -1007,8 +1008,10 @@ mod test {
10071008
impl Rng for ConstRng {
10081009
fn next_u32(&mut self) -> u32 { self.i as u32 }
10091010
fn next_u64(&mut self) -> u64 { self.i }
1010-
1011-
// no fill_bytes on purpose
1011+
1012+
fn fill_bytes(&mut self, dest: &mut [u8]) {
1013+
impls::fill_bytes_via_u64(self, dest)
1014+
}
10121015
}
10131016

10141017
pub fn iter_eq<I, J>(i: I, j: J) -> bool

0 commit comments

Comments
 (0)