Skip to content

Improve cgu naming and ordering #112946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 42 additions & 21 deletions compiler/rustc_codegen_ssa/src/back/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -698,28 +698,49 @@ impl<B: WriteBackendMethods> WorkItem<B> {

/// Generate a short description of this work item suitable for use as a thread name.
fn short_description(&self) -> String {
// `pthread_setname()` on *nix is limited to 15 characters and longer names are ignored.
// Use very short descriptions in this case to maximize the space available for the module name.
// Windows does not have that limitation so use slightly more descriptive names there.
// `pthread_setname()` on *nix ignores anything beyond the first 15
// bytes. Use short descriptions to maximize the space available for
// the module name.
#[cfg(not(windows))]
fn desc(short: &str, _long: &str, name: &str) -> String {
// The short label is three bytes, and is followed by a space. That
// leaves 11 bytes for the CGU name. How we obtain those 11 bytes
// depends on the the CGU name form.
//
// - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part
// before the `-cgu.0` is the same for every CGU, so use the
// `cgu.0` part. The number suffix will be different for each
// CGU.
//
// - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole
// name because each CGU will have a unique ASCII hash, and the
// first 11 bytes will be enough to identify it.
//
// - Incremental (with `-Zhuman-readable-cgu-names`), e.g.
// `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole
// name. The first 11 bytes won't be enough to uniquely identify
// it, but no obvious substring will, and this is a rarely used
// option so it doesn't matter much.
//
assert_eq!(short.len(), 3);
let name = if let Some(index) = name.find("-cgu.") {
&name[index + 1..] // +1 skips the leading '-'.
} else {
name
};
format!("{short} {name}")
}

// Windows has no thread name length limit, so use more descriptive names.
#[cfg(windows)]
fn desc(_short: &str, long: &str, name: &str) -> String {
format!("{long} {name}")
}

match self {
WorkItem::Optimize(m) => {
#[cfg(windows)]
return format!("optimize module {}", m.name);
#[cfg(not(windows))]
return format!("opt {}", m.name);
}
WorkItem::CopyPostLtoArtifacts(m) => {
#[cfg(windows)]
return format!("copy LTO artifacts for {}", m.name);
#[cfg(not(windows))]
return format!("copy {}", m.name);
}
WorkItem::LTO(m) => {
#[cfg(windows)]
return format!("LTO module {}", m.name());
#[cfg(not(windows))]
return format!("LTO {}", m.name());
}
WorkItem::Optimize(m) => desc("opt", "optimize module {}", &m.name),
WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for {}", &m.name),
WorkItem::LTO(m) => desc("lto", "LTO module {}", m.name()),
}
}
}
Expand Down
36 changes: 30 additions & 6 deletions compiler/rustc_monomorphize/src/partitioning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ fn merge_codegen_units<'tcx>(

let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);

// Rename the newly merged CGUs.
if cx.tcx.sess.opts.incremental.is_some() {
// If we are doing incremental compilation, we want CGU names to
// reflect the path of the source level module they correspond to.
Expand Down Expand Up @@ -404,18 +405,41 @@ fn merge_codegen_units<'tcx>(
}
}
}

// A sorted order here ensures what follows can be deterministic.
codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
} else {
// If we are compiling non-incrementally we just generate simple CGU
// names containing an index.
// When compiling non-incrementally, we rename the CGUS so they have
// identical names except for the numeric suffix, something like
// `regex.f10ba03eb5ec7975-cgu.N`, where `N` varies.
//
// It is useful for debugging and profiling purposes if the resulting
// CGUs are sorted by name *and* reverse sorted by size. (CGU 0 is the
// biggest, CGU 1 is the second biggest, etc.)
//
// So first we reverse sort by size. Then we generate the names with
// zero-padded suffixes, which means they are automatically sorted by
// names. The numeric suffix width depends on the number of CGUs, which
// is always greater than zero:
// - [1,9] CGUS: `0`, `1`, `2`, ...
// - [10,99] CGUS: `00`, `01`, `02`, ...
// - [100,999] CGUS: `000`, `001`, `002`, ...
// - etc.
//
// If we didn't zero-pad the sorted-by-name order would be `XYZ-cgu.0`,
// `XYZ-cgu.1`, `XYZ-cgu.10`, `XYZ-cgu.11`, ..., `XYZ-cgu.2`, etc.
codegen_units.sort_by_key(|cgu| cmp::Reverse(cgu.size_estimate()));
let num_digits = codegen_units.len().ilog10() as usize + 1;
for (index, cgu) in codegen_units.iter_mut().enumerate() {
// Note: `WorkItem::short_description` depends on this name ending
// with `-cgu.` followed by a numeric suffix. Please keep it in
// sync with this code.
let suffix = format!("{index:0num_digits$}");
let numbered_codegen_unit_name =
cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(index));
cgu_name_builder.build_cgu_name_no_mangle(LOCAL_CRATE, &["cgu"], Some(suffix));
cgu.set_name(numbered_codegen_unit_name);
}
}

// A sorted order here ensures what follows can be deterministic.
codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
}

fn internalize_symbols<'tcx>(
Expand Down