Skip to content

Commit 0933cf9

Browse files
committed
improve shard_amount usage
1 parent 256e30d commit 0933cf9

File tree

4 files changed

+49
-14
lines changed

4 files changed

+49
-14
lines changed

turbopack/crates/turbo-tasks-backend/src/backend/mod.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ use std::{
1515
Arc,
1616
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
1717
},
18-
thread::available_parallelism,
1918
};
2019

2120
use anyhow::{Result, bail};
@@ -67,7 +66,7 @@ use crate::{
6766
},
6867
utils::{
6968
bi_map::BiMap, chunked_vec::ChunkedVec, dash_map_drop_contents::drop_contents,
70-
ptr_eq_arc::PtrEqArc, sharded::Sharded, swap_retain,
69+
ptr_eq_arc::PtrEqArc, shard_amount::compute_shard_amount, sharded::Sharded, swap_retain,
7170
},
7271
};
7372

@@ -233,14 +232,12 @@ impl<B: BackingStorage> TurboTasksBackend<B> {
233232

234233
impl<B: BackingStorage> TurboTasksBackendInner<B> {
235234
pub fn new(mut options: BackendOptions, backing_storage: B) -> Self {
236-
let shard_amount =
237-
(available_parallelism().map_or(4, |v| v.get()) * 64).next_power_of_two();
235+
let shard_amount = compute_shard_amount(options.num_workers, options.small_preallocation);
238236
let need_log = matches!(options.storage_mode, Some(StorageMode::ReadWrite));
239237
if !options.dependency_tracking {
240238
options.active_tracking = false;
241239
}
242240
let small_preallocation = options.small_preallocation;
243-
let num_workers = options.num_workers;
244241
let next_task_id = backing_storage
245242
.next_free_task_id()
246243
.expect("Failed to get task id");
@@ -262,7 +259,7 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
262259
task_cache: BiMap::new(),
263260
transient_tasks: FxDashMap::default(),
264261
local_is_partial: AtomicBool::new(next_task_id != TaskId::MIN),
265-
storage: Storage::new(num_workers, small_preallocation),
262+
storage: Storage::new(shard_amount, small_preallocation),
266263
in_progress_operations: AtomicUsize::new(0),
267264
snapshot_request: Mutex::new(SnapshotRequest::new()),
268265
operations_suspended: Condvar::new(),

turbopack/crates/turbo-tasks-backend/src/backend/storage.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ use std::{
22
hash::Hash,
33
ops::{Deref, DerefMut},
44
sync::{Arc, atomic::AtomicBool},
5-
thread::available_parallelism,
65
};
76

87
use bitfield::bitfield;
@@ -616,19 +615,13 @@ pub struct Storage {
616615
}
617616

618617
impl Storage {
619-
pub fn new(num_worker: Option<usize>, small_preallocation: bool) -> Self {
618+
pub fn new(shard_amount: usize, small_preallocation: bool) -> Self {
620619
let map_capacity: usize = if small_preallocation {
621620
1024
622621
} else {
623622
1024 * 1024
624623
};
625624
let modified_capacity: usize = if small_preallocation { 0 } else { 1024 };
626-
let shard_factor: usize = if small_preallocation { 1 } else { 16 };
627-
628-
let num_workers =
629-
num_worker.unwrap_or_else(|| available_parallelism().map_or(4, |v| v.get()));
630-
631-
let shard_amount = (num_workers * num_workers * shard_factor).next_power_of_two();
632625

633626
Self {
634627
snapshot_mode: AtomicBool::new(false),

turbopack/crates/turbo-tasks-backend/src/utils/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ pub mod chunked_vec;
33
pub mod dash_map_drop_contents;
44
pub mod dash_map_multi;
55
pub mod ptr_eq_arc;
6+
pub mod shard_amount;
67
pub mod sharded;
78
pub mod swap_retain;
89

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
use std::thread::available_parallelism;
2+
3+
/// Compute a good number of shards to use for sharded data structures.
4+
/// The number of shards is computed based on the number of worker threads
5+
/// and whether a small preallocation is requested.
6+
/// A small preallocation is useful for tests where performance is not
7+
/// critical and we want to reduce memory usage and startup time.
8+
/// The number of shards is chosen to minimize the probability of shard
9+
/// collisions (which can lead to false sharing) while keeping memory
10+
/// usage reasonable.
11+
/// The returned number is always a power of two as this is often required
12+
/// by sharded data structures. The maximum shard amount is capped at 1 << 16 (65536).
13+
pub fn compute_shard_amount(num_workers: Option<usize>, small_preallocation: bool) -> usize {
14+
let num_workers = num_workers.unwrap_or_else(|| available_parallelism().map_or(4, |v| v.get()));
15+
16+
// Once can compute the probability of a shard collision (which leads to false sharing) using
17+
// the birthday paradox formula. It's notable that the probability of collisions increases
18+
// with more worker threads. To mitigate this effect, the number of shards need to grow
19+
// quadratically with the number of worker threads. This way the probability of at least one
20+
// collision remains constant.
21+
//
22+
// Lets call the worker thread count `N` and the number of shards `S`. When using `S = k * N^2`
23+
// for some constant `k` the probability of at least one collision for large `N` can be
24+
// approximated as: P = 1 - exp(-N^2 / (2*S)) = 1 - exp(-1/(2*k))
25+
//
26+
// For `k = 16` this results in a collision probability of about 3%.
27+
// For `k = 1` this results in a collision probability of about 39%.
28+
//
29+
// We clamp the number of shards to 1 << 16 to avoid excessive memory usage in case of a very
30+
// high number of worker threads. This case is hit with more than 64 worker threads for `k =
31+
// 16` and more than 256 worker threads for `k = 1`.
32+
33+
if small_preallocation {
34+
// We also clamp the minimum number of workers to 256 so all following multiplications can't
35+
// overflow.
36+
let num_workers = num_workers.max(256);
37+
(num_workers * num_workers).next_power_of_two()
38+
} else {
39+
// We also clamp the minimum number of workers to 64 so all following multiplications can't
40+
// overflow.
41+
let num_workers = num_workers.max(64);
42+
(num_workers * num_workers * 16).next_power_of_two()
43+
}
44+
}

0 commit comments

Comments
 (0)