Skip to content

Commit 69e5959

Browse files
authored
refactor(fingerprint): Track the intent for each use of UnitHash (#14826)
### What does this PR try to resolve? We have - `-C metadata` - `-C extra-filename` - Uniquifying build scripts - Uniquifying scraped documentation Currently, these are all the same value. For #8716, we might want to have `-C metadata` and `-C extra-filename` hash different parts of the `Unit`. I figured that this change helps to clarify intent so that even if we don't change the definitions, this could still be worth it. The last two I'm tracking as a `unit_id`. As we evolve the first two hashes, we can decide which would be a better fit for backing the `unit_id`. For scraping, I could have treated this as `-C extra-filename` but then we'd have a lot of `.expect()`s. I moved the accessors from `CompilationFiles` to `MetaInfo` so we could closely associate the documentation, API, and implementation. Making `CompilationFiles::c_metadata` the main entry point that gets documented would be weird because the hashing is associated with `MetaInfo` (umbrella type, was private, now `Metadata`) or `Metadata` (agnostic of each use, now `UnitHash`) ### How should we test and review this PR? ### Additional information
2 parents 9c3c48f + 1f4eeb4 commit 69e5959

File tree

7 files changed

+97
-85
lines changed

7 files changed

+97
-85
lines changed

src/cargo/core/compiler/build_runner/compilation_files.rs

Lines changed: 53 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,22 @@ use crate::util::{self, CargoResult, StableHasher};
2525
/// use the same rustc version.
2626
const METADATA_VERSION: u8 = 2;
2727

28+
/// Uniquely identify a [`Unit`] under specific circumstances, see [`Metadata`] for more.
29+
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
30+
pub struct UnitHash(u64);
31+
32+
impl fmt::Display for UnitHash {
33+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34+
write!(f, "{:016x}", self.0)
35+
}
36+
}
37+
38+
impl fmt::Debug for UnitHash {
39+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40+
write!(f, "UnitHash({:016x})", self.0)
41+
}
42+
}
43+
2844
/// The `Metadata` is a hash used to make unique file names for each unit in a
2945
/// build. It is also used for symbol mangling.
3046
///
@@ -68,30 +84,27 @@ const METADATA_VERSION: u8 = 2;
6884
///
6985
/// Note that the `Fingerprint` is in charge of tracking everything needed to determine if a
7086
/// rebuild is needed.
71-
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd)]
72-
pub struct Metadata(u64);
87+
#[derive(Copy, Clone, Debug)]
88+
pub struct Metadata {
89+
meta_hash: UnitHash,
90+
use_extra_filename: bool,
91+
}
7392

74-
impl fmt::Display for Metadata {
75-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76-
write!(f, "{:016x}", self.0)
93+
impl Metadata {
94+
/// A hash to identify a given [`Unit`] in the build graph
95+
pub fn unit_id(&self) -> UnitHash {
96+
self.meta_hash
7797
}
78-
}
7998

80-
impl fmt::Debug for Metadata {
81-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
82-
write!(f, "Metadata({:016x})", self.0)
99+
/// A hash to add to symbol naming through `-C metadata`
100+
pub fn c_metadata(&self) -> UnitHash {
101+
self.meta_hash
83102
}
84-
}
85103

86-
/// Information about the metadata hashes used for a `Unit`.
87-
struct MetaInfo {
88-
/// The symbol hash to use.
89-
meta_hash: Metadata,
90-
/// Whether or not the `-C extra-filename` flag is used to generate unique
91-
/// output filenames for this `Unit`.
92-
///
93-
/// If this is `true`, the `meta_hash` is used for the filename.
94-
use_extra_filename: bool,
104+
/// A hash to add to file names through `-C extra-filename`
105+
pub fn c_extra_filename(&self) -> Option<UnitHash> {
106+
self.use_extra_filename.then_some(self.meta_hash)
107+
}
95108
}
96109

97110
/// Collection of information about the files emitted by the compiler, and the
@@ -108,7 +121,7 @@ pub struct CompilationFiles<'a, 'gctx> {
108121
roots: Vec<Unit>,
109122
ws: &'a Workspace<'gctx>,
110123
/// Metadata hash to use for each unit.
111-
metas: HashMap<Unit, MetaInfo>,
124+
metas: HashMap<Unit, Metadata>,
112125
/// For each Unit, a list all files produced.
113126
outputs: HashMap<Unit, LazyCell<Arc<Vec<OutputFile>>>>,
114127
}
@@ -177,13 +190,7 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
177190
///
178191
/// [`fingerprint`]: super::super::fingerprint#fingerprints-and-metadata
179192
pub fn metadata(&self, unit: &Unit) -> Metadata {
180-
self.metas[unit].meta_hash
181-
}
182-
183-
/// Returns whether or not `-C extra-filename` is used to extend the
184-
/// output filenames to make them unique.
185-
pub fn use_extra_filename(&self, unit: &Unit) -> bool {
186-
self.metas[unit].use_extra_filename
193+
self.metas[unit]
187194
}
188195

189196
/// Gets the short hash based only on the `PackageId`.
@@ -224,9 +231,9 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
224231
/// taken in those cases!
225232
fn pkg_dir(&self, unit: &Unit) -> String {
226233
let name = unit.pkg.package_id().name();
227-
let meta = &self.metas[unit];
228-
if meta.use_extra_filename {
229-
format!("{}-{}", name, meta.meta_hash)
234+
let meta = self.metas[unit];
235+
if let Some(c_extra_filename) = meta.c_extra_filename() {
236+
format!("{}-{}", name, c_extra_filename)
230237
} else {
231238
format!("{}-{}", name, self.target_short_hash(unit))
232239
}
@@ -467,7 +474,11 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
467474
// The file name needs to be stable across Cargo sessions.
468475
// This originally used unit.buildkey(), but that isn't stable,
469476
// so we use metadata instead (prefixed with name for debugging).
470-
let file_name = format!("{}-{}.examples", unit.pkg.name(), self.metadata(unit));
477+
let file_name = format!(
478+
"{}-{}.examples",
479+
unit.pkg.name(),
480+
self.metadata(unit).unit_id()
481+
);
471482
let path = self.deps_dir(unit).join(file_name);
472483
vec![OutputFile {
473484
path,
@@ -523,8 +534,8 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
523534
// Convert FileType to OutputFile.
524535
let mut outputs = Vec::new();
525536
for file_type in file_types {
526-
let meta = &self.metas[unit];
527-
let meta_opt = meta.use_extra_filename.then(|| meta.meta_hash.to_string());
537+
let meta = self.metas[unit];
538+
let meta_opt = meta.c_extra_filename().map(|h| h.to_string());
528539
let path = out_dir.join(file_type.output_filename(&unit.target, meta_opt.as_deref()));
529540

530541
// If, the `different_binary_name` feature is enabled, the name of the hardlink will
@@ -558,8 +569,8 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
558569
fn metadata_of<'a>(
559570
unit: &Unit,
560571
build_runner: &BuildRunner<'_, '_>,
561-
metas: &'a mut HashMap<Unit, MetaInfo>,
562-
) -> &'a MetaInfo {
572+
metas: &'a mut HashMap<Unit, Metadata>,
573+
) -> &'a Metadata {
563574
if !metas.contains_key(unit) {
564575
let meta = compute_metadata(unit, build_runner, metas);
565576
metas.insert(unit.clone(), meta);
@@ -574,8 +585,8 @@ fn metadata_of<'a>(
574585
fn compute_metadata(
575586
unit: &Unit,
576587
build_runner: &BuildRunner<'_, '_>,
577-
metas: &mut HashMap<Unit, MetaInfo>,
578-
) -> MetaInfo {
588+
metas: &mut HashMap<Unit, Metadata>,
589+
) -> Metadata {
579590
let bcx = &build_runner.bcx;
580591
let mut hasher = StableHasher::new();
581592

@@ -669,9 +680,9 @@ fn compute_metadata(
669680
target_configs_are_different.hash(&mut hasher);
670681
}
671682

672-
MetaInfo {
673-
meta_hash: Metadata(hasher.finish()),
674-
use_extra_filename: should_use_metadata(bcx, unit),
683+
Metadata {
684+
meta_hash: UnitHash(hasher.finish()),
685+
use_extra_filename: use_extra_filename(bcx, unit),
675686
}
676687
}
677688

@@ -717,8 +728,8 @@ fn hash_rustc_version(bcx: &BuildContext<'_, '_>, hasher: &mut StableHasher, uni
717728
// between different backends without recompiling.
718729
}
719730

720-
/// Returns whether or not this unit should use a metadata hash.
721-
fn should_use_metadata(bcx: &BuildContext<'_, '_>, unit: &Unit) -> bool {
731+
/// Returns whether or not this unit should use a hash in the filename to make it unique.
732+
fn use_extra_filename(bcx: &BuildContext<'_, '_>, unit: &Unit) -> bool {
722733
if unit.mode.is_doc_test() || unit.mode.is_doc() {
723734
// Doc tests do not have metadata.
724735
return false;

src/cargo/core/compiler/build_runner/mod.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use super::{
2727

2828
mod compilation_files;
2929
use self::compilation_files::CompilationFiles;
30-
pub use self::compilation_files::{Metadata, OutputFile};
30+
pub use self::compilation_files::{Metadata, OutputFile, UnitHash};
3131

3232
/// Collection of all the stuff that is needed to perform a build.
3333
///
@@ -86,7 +86,7 @@ pub struct BuildRunner<'a, 'gctx> {
8686
/// Set of metadata of Docscrape units that fail before completion, e.g.
8787
/// because the target has a type error. This is in an Arc<Mutex<..>>
8888
/// because it is continuously updated as the job progresses.
89-
pub failed_scrape_units: Arc<Mutex<HashSet<Metadata>>>,
89+
pub failed_scrape_units: Arc<Mutex<HashSet<UnitHash>>>,
9090
}
9191

9292
impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
@@ -435,15 +435,15 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
435435
/// the given unit.
436436
///
437437
/// If the package does not have a build script, this returns None.
438-
pub fn find_build_script_metadata(&self, unit: &Unit) -> Option<Metadata> {
438+
pub fn find_build_script_metadata(&self, unit: &Unit) -> Option<UnitHash> {
439439
let script_unit = self.find_build_script_unit(unit)?;
440440
Some(self.get_run_build_script_metadata(&script_unit))
441441
}
442442

443443
/// Returns the metadata hash for a `RunCustomBuild` unit.
444-
pub fn get_run_build_script_metadata(&self, unit: &Unit) -> Metadata {
444+
pub fn get_run_build_script_metadata(&self, unit: &Unit) -> UnitHash {
445445
assert!(unit.mode.is_run_custom_build());
446-
self.files().metadata(unit)
446+
self.files().metadata(unit).unit_id()
447447
}
448448

449449
pub fn is_primary_package(&self, unit: &Unit) -> bool {

src/cargo/core/compiler/compilation.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use cargo_util::{paths, ProcessBuilder};
99

1010
use crate::core::compiler::apply_env_config;
1111
use crate::core::compiler::BuildContext;
12-
use crate::core::compiler::{CompileKind, Metadata, Unit};
12+
use crate::core::compiler::{CompileKind, Unit, UnitHash};
1313
use crate::core::Package;
1414
use crate::util::{context, CargoResult, GlobalContext};
1515

@@ -45,7 +45,7 @@ pub struct Doctest {
4545
/// The script metadata, if this unit's package has a build script.
4646
///
4747
/// This is used for indexing [`Compilation::extra_env`].
48-
pub script_meta: Option<Metadata>,
48+
pub script_meta: Option<UnitHash>,
4949

5050
/// Environment variables to set in the rustdoc process.
5151
pub env: HashMap<String, OsString>,
@@ -61,7 +61,7 @@ pub struct UnitOutput {
6161
/// The script metadata, if this unit's package has a build script.
6262
///
6363
/// This is used for indexing [`Compilation::extra_env`].
64-
pub script_meta: Option<Metadata>,
64+
pub script_meta: Option<UnitHash>,
6565
}
6666

6767
/// A structure returning the result of a compilation.
@@ -101,7 +101,7 @@ pub struct Compilation<'gctx> {
101101
///
102102
/// The key is the build script metadata for uniquely identifying the
103103
/// `RunCustomBuild` unit that generated these env vars.
104-
pub extra_env: HashMap<Metadata, Vec<(String, String)>>,
104+
pub extra_env: HashMap<UnitHash, Vec<(String, String)>>,
105105

106106
/// Libraries to test with rustdoc.
107107
pub to_doc_test: Vec<Doctest>,
@@ -197,7 +197,7 @@ impl<'gctx> Compilation<'gctx> {
197197
pub fn rustdoc_process(
198198
&self,
199199
unit: &Unit,
200-
script_meta: Option<Metadata>,
200+
script_meta: Option<UnitHash>,
201201
) -> CargoResult<ProcessBuilder> {
202202
let mut rustdoc = ProcessBuilder::new(&*self.gctx.rustdoc()?);
203203
if self.gctx.extra_verbose() {
@@ -256,7 +256,7 @@ impl<'gctx> Compilation<'gctx> {
256256
cmd: T,
257257
kind: CompileKind,
258258
pkg: &Package,
259-
script_meta: Option<Metadata>,
259+
script_meta: Option<UnitHash>,
260260
) -> CargoResult<ProcessBuilder> {
261261
let builder = if let Some((runner, args)) = self.target_runner(kind) {
262262
let mut builder = ProcessBuilder::new(runner);
@@ -285,7 +285,7 @@ impl<'gctx> Compilation<'gctx> {
285285
&self,
286286
mut cmd: ProcessBuilder,
287287
pkg: &Package,
288-
script_meta: Option<Metadata>,
288+
script_meta: Option<UnitHash>,
289289
kind: CompileKind,
290290
tool_kind: ToolKind,
291291
) -> CargoResult<ProcessBuilder> {

src/cargo/core/compiler/custom_build.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
3434
use super::{fingerprint, BuildRunner, Job, Unit, Work};
3535
use crate::core::compiler::artifact;
36-
use crate::core::compiler::build_runner::Metadata;
36+
use crate::core::compiler::build_runner::UnitHash;
3737
use crate::core::compiler::fingerprint::DirtyReason;
3838
use crate::core::compiler::job_queue::JobState;
3939
use crate::core::{profiles::ProfileRoot, PackageId, Target};
@@ -111,13 +111,13 @@ pub struct BuildOutput {
111111
/// inserted during `build_map`. The rest of the entries are added
112112
/// immediately after each build script runs.
113113
///
114-
/// The `Metadata` is the unique metadata hash for the `RunCustomBuild` Unit of
114+
/// The [`UnitHash`] is the unique metadata hash for the `RunCustomBuild` Unit of
115115
/// the package. It needs a unique key, since the build script can be run
116116
/// multiple times with different profiles or features. We can't embed a
117117
/// `Unit` because this structure needs to be shareable between threads.
118118
#[derive(Default)]
119119
pub struct BuildScriptOutputs {
120-
outputs: HashMap<Metadata, BuildOutput>,
120+
outputs: HashMap<UnitHash, BuildOutput>,
121121
}
122122

123123
/// Linking information for a `Unit`.
@@ -141,9 +141,9 @@ pub struct BuildScripts {
141141
/// usage here doesn't blow up too much.
142142
///
143143
/// For more information, see #2354.
144-
pub to_link: Vec<(PackageId, Metadata)>,
144+
pub to_link: Vec<(PackageId, UnitHash)>,
145145
/// This is only used while constructing `to_link` to avoid duplicates.
146-
seen_to_link: HashSet<(PackageId, Metadata)>,
146+
seen_to_link: HashSet<(PackageId, UnitHash)>,
147147
/// Host-only dependencies that have build scripts. Each element is an
148148
/// index into `BuildScriptOutputs`.
149149
///
@@ -152,7 +152,7 @@ pub struct BuildScripts {
152152
/// Any `BuildOutput::library_paths` path relative to `target` will be
153153
/// added to `LD_LIBRARY_PATH` so that the compiler can find any dynamic
154154
/// libraries a build script may have generated.
155-
pub plugins: BTreeSet<(PackageId, Metadata)>,
155+
pub plugins: BTreeSet<(PackageId, UnitHash)>,
156156
}
157157

158158
/// Dependency information as declared by a build script that might trigger
@@ -649,7 +649,7 @@ fn build_work(build_runner: &mut BuildRunner<'_, '_>, unit: &Unit) -> CargoResul
649649
fn insert_log_messages_in_build_outputs(
650650
build_script_outputs: Arc<Mutex<BuildScriptOutputs>>,
651651
id: PackageId,
652-
metadata_hash: Metadata,
652+
metadata_hash: UnitHash,
653653
log_messages: Vec<LogMessage>,
654654
) {
655655
let build_output_with_only_log_messages = BuildOutput {
@@ -1245,7 +1245,7 @@ pub fn build_map(build_runner: &mut BuildRunner<'_, '_>) -> CargoResult<()> {
12451245

12461246
// When adding an entry to 'to_link' we only actually push it on if the
12471247
// script hasn't seen it yet (e.g., we don't push on duplicates).
1248-
fn add_to_link(scripts: &mut BuildScripts, pkg: PackageId, metadata: Metadata) {
1248+
fn add_to_link(scripts: &mut BuildScripts, pkg: PackageId, metadata: UnitHash) {
12491249
if scripts.seen_to_link.insert((pkg, metadata)) {
12501250
scripts.to_link.push((pkg, metadata));
12511251
}
@@ -1297,7 +1297,7 @@ fn prev_build_output(
12971297

12981298
impl BuildScriptOutputs {
12991299
/// Inserts a new entry into the map.
1300-
fn insert(&mut self, pkg_id: PackageId, metadata: Metadata, parsed_output: BuildOutput) {
1300+
fn insert(&mut self, pkg_id: PackageId, metadata: UnitHash, parsed_output: BuildOutput) {
13011301
match self.outputs.entry(metadata) {
13021302
Entry::Vacant(entry) => {
13031303
entry.insert(parsed_output);
@@ -1314,17 +1314,17 @@ impl BuildScriptOutputs {
13141314
}
13151315

13161316
/// Returns `true` if the given key already exists.
1317-
fn contains_key(&self, metadata: Metadata) -> bool {
1317+
fn contains_key(&self, metadata: UnitHash) -> bool {
13181318
self.outputs.contains_key(&metadata)
13191319
}
13201320

13211321
/// Gets the build output for the given key.
1322-
pub fn get(&self, meta: Metadata) -> Option<&BuildOutput> {
1322+
pub fn get(&self, meta: UnitHash) -> Option<&BuildOutput> {
13231323
self.outputs.get(&meta)
13241324
}
13251325

13261326
/// Returns an iterator over all entries.
1327-
pub fn iter(&self) -> impl Iterator<Item = (&Metadata, &BuildOutput)> {
1327+
pub fn iter(&self) -> impl Iterator<Item = (&UnitHash, &BuildOutput)> {
13281328
self.outputs.iter()
13291329
}
13301330
}

src/cargo/core/compiler/job_queue/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ impl<'gctx> DrainState<'gctx> {
685685
.failed_scrape_units
686686
.lock()
687687
.unwrap()
688-
.insert(build_runner.files().metadata(&unit));
688+
.insert(build_runner.files().metadata(&unit).unit_id());
689689
self.queue.finish(&unit, &artifact);
690690
}
691691
Err(error) => {

0 commit comments

Comments
 (0)