Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust nofile limit recommendations #1641

Merged
merged 1 commit into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions nativelink-config/examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ The `private` server consists of a `listener` object and a `services` object. Th

```json5
"global": {
"max_open_files": 512
"max_open_files": 24576
}
```

Expand Down Expand Up @@ -562,7 +562,7 @@ Below, you will find a fully tested example that you can also find in [basic_cas
}
}],
"global": {
"max_open_files": 512
"max_open_files": 24576
}
}
```
Expand Down
2 changes: 1 addition & 1 deletion nativelink-config/examples/basic_cas.json5
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,6 @@
}
}],
"global": {
"max_open_files": 512
"max_open_files": 24576
}
}
4 changes: 2 additions & 2 deletions nativelink-config/src/cas_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -707,12 +707,12 @@ pub struct GlobalConfig {
/// This value is not strictly enforced, it is a best effort. Some internal libraries
/// open files or read metadata from a files which do not obey this limit, however
/// the vast majority of cases will have this limit be honored.
/// As a rule of thumb this value should be less than half the value of `ulimit -n`.
/// This value must be larger than `ulimit -n` to have any effect.
/// Any network open file descriptors is not counted in this limit, but is counted
/// in the kernel limit. It is a good idea to set a very large `ulimit -n`.
/// Note: This value must be greater than 10.
///
/// Default: 512
/// Default: 24576 (= 24 * 1024)
#[serde(deserialize_with = "convert_numeric_with_shellexpand")]
pub max_open_files: usize,

Expand Down
80 changes: 48 additions & 32 deletions nativelink-util/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ impl AsyncWrite for FileSlot {

// Note: If the default changes make sure you update the documentation in
// `config/cas_server.rs`.
pub const DEFAULT_OPEN_FILE_PERMITS: usize = 24 * 1024; // 24k.
static TOTAL_FILE_SEMAPHORES: AtomicUsize = AtomicUsize::new(DEFAULT_OPEN_FILE_PERMITS);
pub static OPEN_FILE_SEMAPHORE: Semaphore = Semaphore::const_new(DEFAULT_OPEN_FILE_PERMITS);
pub const DEFAULT_OPEN_FILE_LIMIT: usize = 24 * 1024; // 24k.
static OPEN_FILE_LIMIT: AtomicUsize = AtomicUsize::new(DEFAULT_OPEN_FILE_LIMIT);
pub static OPEN_FILE_SEMAPHORE: Semaphore = Semaphore::const_new(DEFAULT_OPEN_FILE_LIMIT);

/// Try to acquire a permit from the open file semaphore.
#[inline]
Expand All @@ -139,59 +139,75 @@ where
.unwrap_or_else(|e| Err(make_err!(Code::Internal, "background task failed: {e:?}")))
}

pub fn set_open_file_limit(limit: usize) {
let new_limit = {
// We increase the limit by 20% to give extra
// room for other file descriptors like sockets,
// pipes, and other things.
let fs_ulimit =
u64::try_from(limit.saturating_add(limit / 5)).expect("set_open_file_limit too large");
match increase_nofile_limit(fs_ulimit) {
Ok(new_fs_ulimit) => {
/// Sets the soft nofile limit to `desired_open_file_limit` and adjusts
/// `OPEN_FILE_SEMAPHORE` accordingly.
///
/// # Panics
///
/// If any type conversion fails. This can't happen if `usize` is smaller than
/// `u64`.
pub fn set_open_file_limit(desired_open_file_limit: usize) {
let new_open_file_limit = {
match increase_nofile_limit(
u64::try_from(desired_open_file_limit)
.expect("desired_open_file_limit is too large to convert to u64."),
) {
Ok(open_file_limit) => {
event!(
Level::INFO,
"set_open_file_limit({limit})::ulimit success. New fs.ulimit: {fs_ulimit} (20% increase of {limit}).",
"set_open_file_limit() assigns new open file limit {open_file_limit}.",
);
usize::try_from(new_fs_ulimit).expect("new_fs_ulimit too large")
usize::try_from(open_file_limit)
.expect("open_file_limit is too large to convert to usize.")
}
Err(e) => {
event!(
Level::ERROR,
"set_open_file_limit({limit})::ulimit failed. Maybe system does not have ulimits, continuing anyway. - {e:?}",
"set_open_file_limit() failed to assign open file limit. Maybe system does not have ulimits, continuing anyway. - {e:?}",
);
limit
DEFAULT_OPEN_FILE_LIMIT
}
}
};
if new_limit < DEFAULT_OPEN_FILE_PERMITS {
// TODO(jaroeichler): Can we give a better estimate?
if new_open_file_limit < DEFAULT_OPEN_FILE_LIMIT {
event!(
Level::WARN,
"set_open_file_limit({limit}) succeeded, but this is below the default limit of {DEFAULT_OPEN_FILE_PERMITS}. Will continue, but we recommend increasing the limit to at least the default.",
"The new open file limit ({new_open_file_limit}) is below the recommended value of {DEFAULT_OPEN_FILE_LIMIT}. Consider raising max_open_files.",
);
}
if new_limit < limit {

// Use only 80% of the open file limit for permits from OPEN_FILE_SEMAPHORE
// to give extra room for other file descriptors like sockets, pipes, and
// other things.
let reduced_open_file_limit = new_open_file_limit.saturating_sub(new_open_file_limit / 5);
let previous_open_file_limit = OPEN_FILE_LIMIT.load(Ordering::Acquire);
// No permit should be aquired yet, so this warning should not occur.
if (OPEN_FILE_SEMAPHORE.available_permits() + reduced_open_file_limit)
< previous_open_file_limit
{
event!(
Level::WARN,
"set_open_file_limit({limit}) succeeded, but new open file limit is {new_limit}. Will continue, but likely a config or system options (ie: ulimit) needs updated.",
"There are not enough available permits to remove {previous_open_file_limit} - {reduced_open_file_limit} permits.",
);
}

let current_total = TOTAL_FILE_SEMAPHORES.load(Ordering::Acquire);
if limit < current_total {
event!(
Level::ERROR,
"set_open_file_limit({}) must be greater than {}",
limit,
current_total
if previous_open_file_limit <= reduced_open_file_limit {
OPEN_FILE_LIMIT.fetch_add(
reduced_open_file_limit - previous_open_file_limit,
Ordering::Release,
);
OPEN_FILE_SEMAPHORE.add_permits(reduced_open_file_limit - previous_open_file_limit);
} else {
OPEN_FILE_LIMIT.fetch_sub(
previous_open_file_limit - reduced_open_file_limit,
Ordering::Release,
);
return;
OPEN_FILE_SEMAPHORE.forget_permits(previous_open_file_limit - reduced_open_file_limit);
}
TOTAL_FILE_SEMAPHORES.fetch_add(limit - current_total, Ordering::Release);
OPEN_FILE_SEMAPHORE.add_permits(limit - current_total);
}

pub fn get_open_files_for_test() -> usize {
TOTAL_FILE_SEMAPHORES.load(Ordering::Acquire) - OPEN_FILE_SEMAPHORE.available_permits()
OPEN_FILE_LIMIT.load(Ordering::Acquire) - OPEN_FILE_SEMAPHORE.available_permits()
}

pub async fn open_file(
Expand Down
4 changes: 2 additions & 2 deletions src/bin/nativelink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut metrics_enabled = {
let global_cfg = if let Some(global_cfg) = &mut cfg.global {
if global_cfg.max_open_files == 0 {
global_cfg.max_open_files = fs::DEFAULT_OPEN_FILE_PERMITS;
global_cfg.max_open_files = fs::DEFAULT_OPEN_FILE_LIMIT;
}
if global_cfg.default_digest_size_health_check == 0 {
global_cfg.default_digest_size_health_check = DEFAULT_DIGEST_SIZE_HEALTH_CHECK_CFG;
Expand All @@ -1021,7 +1021,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
*global_cfg
} else {
GlobalConfig {
max_open_files: fs::DEFAULT_OPEN_FILE_PERMITS,
max_open_files: fs::DEFAULT_OPEN_FILE_LIMIT,
disable_metrics: cfg.servers.iter().all(|v| {
let Some(service) = &v.services else {
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@
}
}],
"global": {
"max_open_files": 512
// Defaults to 24576 = 24 * 1024.
"max_open_files": 24576
}
}
4 changes: 2 additions & 2 deletions web/platform/src/content/docs/docs/config/basic-configs.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ memory and filesystem stores instead of S3 and Redis.
}
}],
"global": {
"max_open_files": 512
"max_open_files": 24576
}
}

Expand Down Expand Up @@ -360,7 +360,7 @@ memory and filesystem stores instead of S3 and Redis.
}
}],
"global": {
"max_open_files": 512
"max_open_files": 24576
}
}
```
Loading