Skip to content

Commit 865b7e3

Browse files
committed
Optionally increase the size of the pipe buffer
This commit attempts to address rust-lang/cargo#9739 by optionally increasing the capacity of pipes created on Linux. This may be required if the pipe initially starts out with only one page of buffer which apparently means that normal jobserver usage might cause the pipe to deadlock where all writers are blocked.
1 parent 5756d02 commit 865b7e3

File tree

2 files changed

+128
-0
lines changed

2 files changed

+128
-0
lines changed

src/unix.rs

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ pub struct Acquired {
2323
impl Client {
2424
pub fn new(limit: usize) -> io::Result<Client> {
2525
let client = unsafe { Client::mk()? };
26+
client.configure_capacity(limit)?;
2627
// I don't think the character written here matters, but I could be
2728
// wrong!
2829
for _ in 0..limit {
@@ -63,6 +64,70 @@ impl Client {
6364
Ok(Client::from_fds(pipes[0], pipes[1]))
6465
}
6566

67+
fn configure_capacity(&self, required_capacity: usize) -> io::Result<()> {
68+
// On Linux we may need to increase the capacity of the pipe for the
69+
// jobserver to work correctly. Linux seems to exhibit behavior where it
70+
// implements a ring-buffer internally but apparently the ring-ness of
71+
// the ring-buffer is connected to *pages* of the ring buffer rather
72+
// than actual bytes of the ring buffer. This means that if the pipe has
73+
// only one page of capacity we can hit a possible deadlock situation
74+
// where a bunch of threads are writing to the pipe but they're all
75+
// blocked, despite the current used capacity of the pipe being less
76+
// than a page.
77+
//
78+
// This was first discovered in rust-lang/cargo#9739 where a system with
79+
// a large amount of concurrency would hang in `cargo build` when the
80+
// jobserver pipe only had one page of capacity. This was reduced to a
81+
// reproduction program [1] which indeed showed that the system would
82+
// deadlock if the capacity of the pipe was just one page.
83+
//
84+
// To fix this issue, on Linux only, we may increase the capacity of the
85+
// pipe. The main thing here is that if the capacity of the pipe is a
86+
// single page we try to increase it to two pages, otherwise we fail
87+
// because a deadlock might happen. While we're at it this goes ahead
88+
// and factors in the `required_capacity` requested by the client to
89+
// this calculation as well. If for some reason you want 10_000 units of
90+
// concurrency in the pipe that means we'll need more than 2 pages
91+
// (typically 8192 bytes), so we round that up to 3 pages as well.
92+
//
93+
// Someone with more understanding of linux pipes and how they buffer
94+
// internally should probably review this at some point. The exact cause
95+
// of the deadlock seems a little uncertain and it's not clear why the
96+
// example program [1] deadlocks and why simply adding another page
97+
// fixes things. Is this a kernel bug? Do we need to always guarantee at
98+
// least one free page? I'm not sure! Hopefully for now this is enough
99+
// to fix the problem until machines start having more than 4k cores,
100+
// which seems like it might be awhile.
101+
//
102+
// [1]: https://github.com/rust-lang/cargo/issues/9739#issuecomment-889183009
103+
#[cfg(target_os = "linux")]
104+
unsafe {
105+
let page_size = libc::sysconf(libc::_SC_PAGESIZE);
106+
let actual_capacity = cvt(libc::fcntl(self.write.as_raw_fd(), libc::F_GETPIPE_SZ))?;
107+
108+
if let Some(c) = calculate_capacity(
109+
required_capacity,
110+
actual_capacity as usize,
111+
page_size as usize,
112+
) {
113+
cvt(libc::fcntl(self.write.as_raw_fd(), libc::F_SETPIPE_SZ, c)).map_err(|e| {
114+
io::Error::new(
115+
e.kind(),
116+
format!(
117+
"failed to increase jobserver pipe capacity from {} to {}; \
118+
jobserver otherwise might deadlock",
119+
actual_capacity, c,
120+
),
121+
)
122+
123+
// ...
124+
})?;
125+
}
126+
}
127+
128+
Ok(())
129+
}
130+
66131
pub unsafe fn open(s: &str) -> Option<Client> {
67132
let mut parts = s.splitn(2, ',');
68133
let read = parts.next().unwrap();
@@ -337,3 +402,44 @@ extern "C" fn sigusr1_handler(
337402
) {
338403
// nothing to do
339404
}
405+
406+
#[allow(dead_code)]
407+
fn calculate_capacity(
408+
required_capacity: usize,
409+
actual_capacity: usize,
410+
page_size: usize,
411+
) -> Option<usize> {
412+
if actual_capacity < required_capacity {
413+
let mut rounded_capacity = round_up_to(required_capacity, page_size);
414+
if rounded_capacity < page_size * 2 {
415+
rounded_capacity += page_size;
416+
}
417+
return Some(rounded_capacity);
418+
}
419+
420+
if actual_capacity <= page_size {
421+
return Some(page_size * 2);
422+
}
423+
424+
return None;
425+
426+
fn round_up_to(a: usize, b: usize) -> usize {
427+
assert!(b.is_power_of_two());
428+
(a + (b - 1)) & (!(b - 1))
429+
}
430+
}
431+
432+
#[cfg(test)]
433+
mod tests {
434+
use super::calculate_capacity;
435+
436+
#[test]
437+
fn test_calculate_capacity() {
438+
assert_eq!(calculate_capacity(1, 65536, 4096), None);
439+
assert_eq!(calculate_capacity(500, 65536, 4096), None);
440+
assert_eq!(calculate_capacity(5000, 4096, 4096), Some(8192));
441+
assert_eq!(calculate_capacity(1, 4096, 4096), Some(8192));
442+
assert_eq!(calculate_capacity(4096, 4096, 4096), Some(8192));
443+
assert_eq!(calculate_capacity(8192, 4096, 4096), Some(8192));
444+
}
445+
}

tests/server.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,25 @@ fn zero_client() {
156156
assert!(rx.try_recv().is_err());
157157
}
158158
}
159+
160+
#[test]
161+
fn highly_concurrent() {
162+
const N: usize = 10000;
163+
164+
let client = t!(Client::new(80));
165+
166+
let threads = (0..80)
167+
.map(|_| {
168+
let client = client.clone();
169+
std::thread::spawn(move || {
170+
for _ in 0..N {
171+
drop(client.acquire().unwrap());
172+
}
173+
})
174+
})
175+
.collect::<Vec<_>>();
176+
177+
for t in threads {
178+
t.join().unwrap();
179+
}
180+
}

0 commit comments

Comments
 (0)