Skip to content

Commit 8933c05

Browse files
authored
Help the compiler avoid inlining lazy init functions (#443)
Before this change, the compiler generates code that looks like this: ``` if not initialized: goto init do_work: do the actual work goto exit init: inilned init() goto do_work exit: ret ``` If the initialization code is small, this works fine. But, for (bad) reasons, is_rdrand_good is particularly huge. Thus, jumping over its inined code is wasteful because it puts bad pressure on the instruction cache. With this change, the generated code looks like this: ``` if not initialized: goto init do_work: do the actual work goto exit init: call init() goto do_work exit: ret ``` I verified these claims by running: ``` $ cargo asm --rust getrandom_inner --lib --target=x86_64-fortanix-unknown-sgx ``` This is also what other implementations (e.g. OnceCell) do. While here, I made the analogous change to LazyPtr, and rewrote LazyPtr to the same form as LazyUsize. I didn't check the generated code for LazyPtr though. (Why is `is_rdrand_good` huge? The compiler unrolls the 10 iteration retry loop, and then it unrolls the 8 iteration self-test loop, so the result is `rdrand()` is inlined 80 times inside is_rdrand_good. This is something to address separately as it also affects `getrandom_inner` itself.)
1 parent a4b1f2f commit 8933c05

File tree

2 files changed

+53
-32
lines changed

2 files changed

+53
-32
lines changed

src/lazy.rs

+24-12
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,20 @@ impl LazyUsize {
3636
// init(). Multiple callers can run their init() functions in parallel.
3737
// init() should always return the same value, if it succeeds.
3838
pub fn unsync_init(&self, init: impl FnOnce() -> usize) -> usize {
39+
#[cold]
40+
fn do_init(this: &LazyUsize, init: impl FnOnce() -> usize) -> usize {
41+
let val = init();
42+
this.0.store(val, Ordering::Relaxed);
43+
val
44+
}
45+
3946
// Relaxed ordering is fine, as we only have a single atomic variable.
40-
let mut val = self.0.load(Ordering::Relaxed);
41-
if val == Self::UNINIT {
42-
val = init();
43-
self.0.store(val, Ordering::Relaxed);
47+
let val = self.0.load(Ordering::Relaxed);
48+
if val != Self::UNINIT {
49+
val
50+
} else {
51+
do_init(self, init)
4452
}
45-
val
4653
}
4754
}
4855

@@ -92,19 +99,24 @@ impl LazyPtr {
9299
// init(). Multiple callers can run their init() functions in parallel.
93100
// init() should always return the same value, if it succeeds.
94101
pub fn unsync_init(&self, init: impl Fn() -> *mut c_void) -> *mut c_void {
102+
#[cold]
103+
fn do_init(this: &LazyPtr, init: impl Fn() -> *mut c_void) -> *mut c_void {
104+
let addr = init();
105+
this.addr.store(addr, Ordering::Release);
106+
addr
107+
}
108+
95109
// Despite having only a single atomic variable (self.addr), we still
96110
// cannot always use Ordering::Relaxed, as we need to make sure a
97111
// successful call to `init` is "ordered before" any data read through
98112
// the returned pointer (which occurs when the function is called).
99113
// Our implementation mirrors that of the one in libstd, meaning that
100114
// the use of non-Relaxed operations is probably unnecessary.
101-
match self.addr.load(Ordering::Acquire) {
102-
Self::UNINIT => {
103-
let addr = init();
104-
self.addr.store(addr, Ordering::Release);
105-
addr
106-
}
107-
addr => addr,
115+
let val = self.addr.load(Ordering::Acquire);
116+
if val != Self::UNINIT {
117+
val
118+
} else {
119+
do_init(self, init)
108120
}
109121
}
110122
}

src/use_file.rs

+29-20
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ use core::{
1919
const FILE_PATH: &[u8] = b"/dev/urandom\0";
2020
const FD_UNINIT: usize = usize::max_value();
2121

22+
// Do not inline this when it is the fallback implementation, but don't mark it
23+
// `#[cold]` because it is hot when it is actually used.
24+
#[cfg_attr(any(target_os = "android", target_os = "linux"), inline(never))]
2225
pub fn getrandom_inner(dest: &mut [MaybeUninit<u8>]) -> Result<(), Error> {
2326
let fd = get_rng_fd()?;
2427
sys_fill_exact(dest, |buf| unsafe {
@@ -31,38 +34,44 @@ pub fn getrandom_inner(dest: &mut [MaybeUninit<u8>]) -> Result<(), Error> {
3134
// return the same file descriptor. This file descriptor is never closed.
3235
fn get_rng_fd() -> Result<libc::c_int, Error> {
3336
static FD: AtomicUsize = AtomicUsize::new(FD_UNINIT);
37+
3438
fn get_fd() -> Option<libc::c_int> {
3539
match FD.load(Relaxed) {
3640
FD_UNINIT => None,
3741
val => Some(val as libc::c_int),
3842
}
3943
}
4044

41-
// Use double-checked locking to avoid acquiring the lock if possible.
42-
if let Some(fd) = get_fd() {
43-
return Ok(fd);
44-
}
45+
#[cold]
46+
fn get_fd_locked() -> Result<libc::c_int, Error> {
47+
// SAFETY: We use the mutex only in this method, and we always unlock it
48+
// before returning, making sure we don't violate the pthread_mutex_t API.
49+
static MUTEX: Mutex = Mutex::new();
50+
unsafe { MUTEX.lock() };
51+
let _guard = DropGuard(|| unsafe { MUTEX.unlock() });
4552

46-
// SAFETY: We use the mutex only in this method, and we always unlock it
47-
// before returning, making sure we don't violate the pthread_mutex_t API.
48-
static MUTEX: Mutex = Mutex::new();
49-
unsafe { MUTEX.lock() };
50-
let _guard = DropGuard(|| unsafe { MUTEX.unlock() });
53+
if let Some(fd) = get_fd() {
54+
return Ok(fd);
55+
}
5156

52-
if let Some(fd) = get_fd() {
53-
return Ok(fd);
54-
}
57+
// On Linux, /dev/urandom might return insecure values.
58+
#[cfg(any(target_os = "android", target_os = "linux"))]
59+
wait_until_rng_ready()?;
5560

56-
// On Linux, /dev/urandom might return insecure values.
57-
#[cfg(any(target_os = "android", target_os = "linux"))]
58-
wait_until_rng_ready()?;
61+
let fd = open_readonly(FILE_PATH)?;
62+
// The fd always fits in a usize without conflicting with FD_UNINIT.
63+
debug_assert!(fd >= 0 && (fd as usize) < FD_UNINIT);
64+
FD.store(fd as usize, Relaxed);
5965

60-
let fd = open_readonly(FILE_PATH)?;
61-
// The fd always fits in a usize without conflicting with FD_UNINIT.
62-
debug_assert!(fd >= 0 && (fd as usize) < FD_UNINIT);
63-
FD.store(fd as usize, Relaxed);
66+
Ok(fd)
67+
}
6468

65-
Ok(fd)
69+
// Use double-checked locking to avoid acquiring the lock if possible.
70+
if let Some(fd) = get_fd() {
71+
Ok(fd)
72+
} else {
73+
get_fd_locked()
74+
}
6675
}
6776

6877
// Polls /dev/random to make sure it is ok to read from /dev/urandom.

0 commit comments

Comments
 (0)