@@ -23,6 +23,7 @@ pub struct Acquired {
23
23
impl Client {
24
24
pub fn new ( limit : usize ) -> io:: Result < Client > {
25
25
let client = unsafe { Client :: mk ( ) ? } ;
26
+ client. configure_capacity ( limit) ?;
26
27
// I don't think the character written here matters, but I could be
27
28
// wrong!
28
29
for _ in 0 ..limit {
@@ -63,6 +64,70 @@ impl Client {
63
64
Ok ( Client :: from_fds ( pipes[ 0 ] , pipes[ 1 ] ) )
64
65
}
65
66
67
+ fn configure_capacity ( & self , required_capacity : usize ) -> io:: Result < ( ) > {
68
+ // On Linux we may need to increase the capacity of the pipe for the
69
+ // jobserver to work correctly. Linux seems to exhibit behavior where it
70
+ // implements a ring-buffer internally but apparently the ring-ness of
71
+ // the ring-buffer is connected to *pages* of the ring buffer rather
72
+ // than actual bytes of the ring buffer. This means that if the pipe has
73
+ // only one page of capacity we can hit a possible deadlock situation
74
+ // where a bunch of threads are writing to the pipe but they're all
75
+ // blocked, despite the current used capacity of the pipe being less
76
+ // than a page.
77
+ //
78
+ // This was first discovered in rust-lang/cargo#9739 where a system with
79
+ // a large amount of concurrency would hang in `cargo build` when the
80
+ // jobserver pipe only had one page of capacity. This was reduced to a
81
+ // reproduction program [1] which indeed showed that the system would
82
+ // deadlock if the capacity of the pipe was just one page.
83
+ //
84
+ // To fix this issue, on Linux only, we may increase the capacity of the
85
+ // pipe. The main thing here is that if the capacity of the pipe is a
86
+ // single page we try to increase it to two pages, otherwise we fail
87
+ // because a deadlock might happen. While we're at it this goes ahead
88
+ // and factors in the `required_capacity` requested by the client to
89
+ // this calculation as well. If for some reason you want 10_000 units of
90
+ // concurrency in the pipe that means we'll need more than 2 pages
91
+ // (typically 8192 bytes), so we round that up to 3 pages as well.
92
+ //
93
+ // Someone with more understanding of linux pipes and how they buffer
94
+ // internally should probably review this at some point. The exact cause
95
+ // of the deadlock seems a little uncertain and it's not clear why the
96
+ // example program [1] deadlocks and why simply adding another page
97
+ // fixes things. Is this a kernel bug? Do we need to always guarantee at
98
+ // least one free page? I'm not sure! Hopefully for now this is enough
99
+ // to fix the problem until machines start having more than 4k cores,
100
+ // which seems like it might be awhile.
101
+ //
102
+ // [1]: https://github.com/rust-lang/cargo/issues/9739#issuecomment-889183009
103
+ #[ cfg( target_os = "linux" ) ]
104
+ unsafe {
105
+ let page_size = libc:: sysconf ( libc:: _SC_PAGESIZE) ;
106
+ let actual_capacity = cvt ( libc:: fcntl ( self . write . as_raw_fd ( ) , libc:: F_GETPIPE_SZ ) ) ?;
107
+
108
+ if let Some ( c) = calculate_capacity (
109
+ required_capacity,
110
+ actual_capacity as usize ,
111
+ page_size as usize ,
112
+ ) {
113
+ cvt ( libc:: fcntl ( self . write . as_raw_fd ( ) , libc:: F_SETPIPE_SZ , c) ) . map_err ( |e| {
114
+ io:: Error :: new (
115
+ e. kind ( ) ,
116
+ format ! (
117
+ "failed to increase jobserver pipe capacity from {} to {}; \
118
+ jobserver otherwise might deadlock",
119
+ actual_capacity, c,
120
+ ) ,
121
+ )
122
+
123
+ // ...
124
+ } ) ?;
125
+ }
126
+ }
127
+
128
+ Ok ( ( ) )
129
+ }
130
+
66
131
pub unsafe fn open ( s : & str ) -> Option < Client > {
67
132
let mut parts = s. splitn ( 2 , ',' ) ;
68
133
let read = parts. next ( ) . unwrap ( ) ;
@@ -337,3 +402,44 @@ extern "C" fn sigusr1_handler(
337
402
) {
338
403
// nothing to do
339
404
}
405
+
406
+ #[ allow( dead_code) ]
407
+ fn calculate_capacity (
408
+ required_capacity : usize ,
409
+ actual_capacity : usize ,
410
+ page_size : usize ,
411
+ ) -> Option < usize > {
412
+ if actual_capacity < required_capacity {
413
+ let mut rounded_capacity = round_up_to ( required_capacity, page_size) ;
414
+ if rounded_capacity < page_size * 2 {
415
+ rounded_capacity += page_size;
416
+ }
417
+ return Some ( rounded_capacity) ;
418
+ }
419
+
420
+ if actual_capacity <= page_size {
421
+ return Some ( page_size * 2 ) ;
422
+ }
423
+
424
+ return None ;
425
+
426
+ fn round_up_to ( a : usize , b : usize ) -> usize {
427
+ assert ! ( b. is_power_of_two( ) ) ;
428
+ ( a + ( b - 1 ) ) & ( !( b - 1 ) )
429
+ }
430
+ }
431
+
432
+ #[ cfg( test) ]
433
+ mod tests {
434
+ use super :: calculate_capacity;
435
+
436
+ #[ test]
437
+ fn test_calculate_capacity ( ) {
438
+ assert_eq ! ( calculate_capacity( 1 , 65536 , 4096 ) , None ) ;
439
+ assert_eq ! ( calculate_capacity( 500 , 65536 , 4096 ) , None ) ;
440
+ assert_eq ! ( calculate_capacity( 5000 , 4096 , 4096 ) , Some ( 8192 ) ) ;
441
+ assert_eq ! ( calculate_capacity( 1 , 4096 , 4096 ) , Some ( 8192 ) ) ;
442
+ assert_eq ! ( calculate_capacity( 4096 , 4096 , 4096 ) , Some ( 8192 ) ) ;
443
+ assert_eq ! ( calculate_capacity( 8192 , 4096 , 4096 ) , Some ( 8192 ) ) ;
444
+ }
445
+ }
0 commit comments