Skip to content

Commit 8c35f4a

Browse files
committed
Auto merge of #137535 - Kobzol:split-metadata, r=petrochenkov
Introduce `-Zembed-metadata` to allow omitting full metadata from rlibs and dylibs This is a continuation of #120855 (I was mentored by `@bjorn3` to move it forward). Most of the original code was written by bjorn3, I tried to clean it up a bit and add some documentation and tests. This PR introduces a new unstable compiler flag called `-Zembed-metadata=[no|yes]`, with the default being `yes` (see #57076 for context). When set to `no`, rustc will only store a small metadata stub inside rlibs/dylibs instead of the full metadata, to keep their size smaller. It should be used in combination with `--emit=metadata`, so that the users of such a compiled library can still read the metadata from the corresponding `.rmeta` file. [This comment](#120855 (comment)) shows an example of binary/artifact size wins that can be achieved using this approach. Contrary to #120855, this PR only introduces the new flag, along with a couple of run-make tests and documentation, but does not yet use it in bootstrap to actually compile rustc. I plan to do that as a follow-up step (along with integration in Cargo, which should ideally just always pass this flag to reduce the size of target directories). Fixes #23366 Closes #29511 Fixes #57076 Another attempt of #93945 and #120855. r? `@petrochenkov`
2 parents ed20157 + f0efb97 commit 8c35f4a

File tree

15 files changed

+253
-60
lines changed

15 files changed

+253
-60
lines changed

compiler/rustc_codegen_ssa/src/back/link.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ fn link_rlib<'a>(
294294
let (metadata, metadata_position) = create_wrapper_file(
295295
sess,
296296
".rmeta".to_string(),
297-
codegen_results.metadata.raw_data(),
297+
codegen_results.metadata.stub_or_full(),
298298
);
299299
let metadata = emit_wrapper_file(sess, &metadata, tmpdir, METADATA_FILENAME);
300300
match metadata_position {

compiler/rustc_codegen_ssa/src/back/metadata.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -540,8 +540,8 @@ pub fn create_compressed_metadata_file(
540540
symbol_name: &str,
541541
) -> Vec<u8> {
542542
let mut packed_metadata = rustc_metadata::METADATA_HEADER.to_vec();
543-
packed_metadata.write_all(&(metadata.raw_data().len() as u64).to_le_bytes()).unwrap();
544-
packed_metadata.extend(metadata.raw_data());
543+
packed_metadata.write_all(&(metadata.stub_or_full().len() as u64).to_le_bytes()).unwrap();
544+
packed_metadata.extend(metadata.stub_or_full());
545545

546546
let Some(mut file) = create_object_file(sess) else {
547547
if sess.target.is_like_wasm {

compiler/rustc_interface/src/tests.rs

+1
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,7 @@ fn test_unstable_options_tracking_hash() {
787787
tracked!(direct_access_external_data, Some(true));
788788
tracked!(dual_proc_macros, true);
789789
tracked!(dwarf_version, Some(5));
790+
tracked!(embed_metadata, false);
790791
tracked!(embed_source, true);
791792
tracked!(emit_thin_lto, false);
792793
tracked!(emscripten_wasm_eh, true);

compiler/rustc_metadata/messages.ftl

+4
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ metadata_found_staticlib =
9797
found staticlib `{$crate_name}` instead of rlib or dylib{$add_info}
9898
.help = please recompile that crate using --crate-type lib
9999
100+
metadata_full_metadata_not_found =
101+
only metadata stub found for `{$flavor}` dependency `{$crate_name}`
102+
please provide path to the corresponding .rmeta file with full metadata
103+
100104
metadata_global_alloc_required =
101105
no global memory allocator found but one is required; link to std or add `#[global_allocator]` to a static item that implements the GlobalAlloc trait
102106

compiler/rustc_metadata/src/errors.rs

+9
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,15 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for MultipleCandidates {
525525
}
526526
}
527527

528+
#[derive(Diagnostic)]
529+
#[diag(metadata_full_metadata_not_found)]
530+
pub(crate) struct FullMetadataNotFound {
531+
#[primary_span]
532+
pub span: Span,
533+
pub flavor: CrateFlavor,
534+
pub crate_name: Symbol,
535+
}
536+
528537
#[derive(Diagnostic)]
529538
#[diag(metadata_symbol_conflicts_current, code = E0519)]
530539
pub struct SymbolConflictsCurrent {

compiler/rustc_metadata/src/fs.rs

+20-6
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::{fs, io};
33

44
use rustc_data_structures::temp_dir::MaybeTempDir;
55
use rustc_middle::ty::TyCtxt;
6-
use rustc_session::config::{OutFileName, OutputType};
6+
use rustc_session::config::{CrateType, OutFileName, OutputType};
77
use rustc_session::output::filename_for_metadata;
88
use rustc_session::{MetadataKind, Session};
99
use tempfile::Builder as TempFileBuilder;
@@ -50,7 +50,14 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
5050
.tempdir_in(out_filename.parent().unwrap_or_else(|| Path::new("")))
5151
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailedCreateTempdir { err }));
5252
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
53-
let metadata_filename = metadata_tmpdir.as_ref().join(METADATA_FILENAME);
53+
let metadata_filename = metadata_tmpdir.as_ref().join("full.rmeta");
54+
let metadata_stub_filename = if !tcx.sess.opts.unstable_opts.embed_metadata
55+
&& !tcx.crate_types().contains(&CrateType::ProcMacro)
56+
{
57+
Some(metadata_tmpdir.as_ref().join("stub.rmeta"))
58+
} else {
59+
None
60+
};
5461

5562
// Always create a file at `metadata_filename`, even if we have nothing to write to it.
5663
// This simplifies the creation of the output `out_filename` when requested.
@@ -60,9 +67,15 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
6067
std::fs::File::create(&metadata_filename).unwrap_or_else(|err| {
6168
tcx.dcx().emit_fatal(FailedCreateFile { filename: &metadata_filename, err });
6269
});
70+
if let Some(metadata_stub_filename) = &metadata_stub_filename {
71+
std::fs::File::create(metadata_stub_filename).unwrap_or_else(|err| {
72+
tcx.dcx()
73+
.emit_fatal(FailedCreateFile { filename: &metadata_stub_filename, err });
74+
});
75+
}
6376
}
6477
MetadataKind::Uncompressed | MetadataKind::Compressed => {
65-
encode_metadata(tcx, &metadata_filename);
78+
encode_metadata(tcx, &metadata_filename, metadata_stub_filename.as_deref())
6679
}
6780
};
6881

@@ -100,9 +113,10 @@ pub fn encode_and_write_metadata(tcx: TyCtxt<'_>) -> (EncodedMetadata, bool) {
100113

101114
// Load metadata back to memory: codegen may need to include it in object files.
102115
let metadata =
103-
EncodedMetadata::from_path(metadata_filename, metadata_tmpdir).unwrap_or_else(|err| {
104-
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
105-
});
116+
EncodedMetadata::from_path(metadata_filename, metadata_stub_filename, metadata_tmpdir)
117+
.unwrap_or_else(|err| {
118+
tcx.dcx().emit_fatal(FailedCreateEncodedMetadata { err });
119+
});
106120

107121
let need_metadata_module = metadata_kind == MetadataKind::Compressed;
108122

compiler/rustc_metadata/src/locator.rs

+38-29
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,24 @@ impl<'a> CrateLocator<'a> {
654654
continue;
655655
}
656656
}
657-
*slot = Some((hash, metadata, lib.clone()));
657+
658+
// We error eagerly here. If we're locating a rlib, then in theory the full metadata
659+
// could still be in a (later resolved) dylib. In practice, if the rlib and dylib
660+
// were produced in a way where one has full metadata and the other hasn't, it would
661+
// mean that they were compiled using different compiler flags and probably also have
662+
// a different SVH value.
663+
if metadata.get_header().is_stub {
664+
// `is_stub` should never be true for .rmeta files.
665+
assert_ne!(flavor, CrateFlavor::Rmeta);
666+
667+
// Because rmeta files are resolved before rlib/dylib files, if this is a stub and
668+
// we haven't found a slot already, it means that the full metadata is missing.
669+
if slot.is_none() {
670+
return Err(CrateError::FullMetadataNotFound(self.crate_name, flavor));
671+
}
672+
} else {
673+
*slot = Some((hash, metadata, lib.clone()));
674+
}
658675
ret = Some((lib, kind));
659676
}
660677

@@ -728,37 +745,25 @@ impl<'a> CrateLocator<'a> {
728745
let Some(file) = loc_orig.file_name().and_then(|s| s.to_str()) else {
729746
return Err(CrateError::ExternLocationNotFile(self.crate_name, loc_orig.clone()));
730747
};
731-
// FnMut cannot return reference to captured value, so references
732-
// must be taken outside the closure.
733-
let rlibs = &mut rlibs;
734-
let rmetas = &mut rmetas;
735-
let dylibs = &mut dylibs;
736-
let type_via_filename = (|| {
737-
if file.starts_with("lib") {
738-
if file.ends_with(".rlib") {
739-
return Some(rlibs);
740-
}
741-
if file.ends_with(".rmeta") {
742-
return Some(rmetas);
743-
}
744-
}
745-
let dll_prefix = self.target.dll_prefix.as_ref();
746-
let dll_suffix = self.target.dll_suffix.as_ref();
747-
if file.starts_with(dll_prefix) && file.ends_with(dll_suffix) {
748-
return Some(dylibs);
749-
}
750-
None
751-
})();
752-
match type_via_filename {
753-
Some(type_via_filename) => {
754-
type_via_filename.insert(loc_canon.clone(), PathKind::ExternFlag);
748+
if file.starts_with("lib") {
749+
if file.ends_with(".rlib") {
750+
rlibs.insert(loc_canon.clone(), PathKind::ExternFlag);
751+
continue;
755752
}
756-
None => {
757-
self.crate_rejections
758-
.via_filename
759-
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
753+
if file.ends_with(".rmeta") {
754+
rmetas.insert(loc_canon.clone(), PathKind::ExternFlag);
755+
continue;
760756
}
761757
}
758+
let dll_prefix = self.target.dll_prefix.as_ref();
759+
let dll_suffix = self.target.dll_suffix.as_ref();
760+
if file.starts_with(dll_prefix) && file.ends_with(dll_suffix) {
761+
dylibs.insert(loc_canon.clone(), PathKind::ExternFlag);
762+
continue;
763+
}
764+
self.crate_rejections
765+
.via_filename
766+
.push(CrateMismatch { path: loc_orig.clone(), got: String::new() });
762767
}
763768

764769
// Extract the dylib/rlib/rmeta triple.
@@ -928,6 +933,7 @@ pub(crate) enum CrateError {
928933
ExternLocationNotExist(Symbol, PathBuf),
929934
ExternLocationNotFile(Symbol, PathBuf),
930935
MultipleCandidates(Symbol, CrateFlavor, Vec<PathBuf>),
936+
FullMetadataNotFound(Symbol, CrateFlavor),
931937
SymbolConflictsCurrent(Symbol),
932938
StableCrateIdCollision(Symbol, Symbol),
933939
DlOpen(String, String),
@@ -978,6 +984,9 @@ impl CrateError {
978984
CrateError::MultipleCandidates(crate_name, flavor, candidates) => {
979985
dcx.emit_err(errors::MultipleCandidates { span, crate_name, flavor, candidates });
980986
}
987+
CrateError::FullMetadataNotFound(crate_name, flavor) => {
988+
dcx.emit_err(errors::FullMetadataNotFound { span, crate_name, flavor });
989+
}
981990
CrateError::SymbolConflictsCurrent(root_name) => {
982991
dcx.emit_err(errors::SymbolConflictsCurrent { span, crate_name: root_name });
983992
}

compiler/rustc_metadata/src/rmeta/encoder.rs

+72-19
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
701701
triple: tcx.sess.opts.target_triple.clone(),
702702
hash: tcx.crate_hash(LOCAL_CRATE),
703703
is_proc_macro_crate: proc_macro_data.is_some(),
704+
is_stub: false,
704705
},
705706
extra_filename: tcx.sess.opts.cg.extra_filename.clone(),
706707
stable_crate_id: tcx.def_path_hash(LOCAL_CRATE.as_def_id()).stable_crate_id(),
@@ -2231,54 +2232,75 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
22312232
// generated regardless of trailing bytes that end up in it.
22322233

22332234
pub struct EncodedMetadata {
2234-
// The declaration order matters because `mmap` should be dropped before `_temp_dir`.
2235-
mmap: Option<Mmap>,
2235+
// The declaration order matters because `full_metadata` should be dropped
2236+
// before `_temp_dir`.
2237+
full_metadata: Option<Mmap>,
2238+
// This is an optional stub metadata containing only the crate header.
2239+
// The header should be very small, so we load it directly into memory.
2240+
stub_metadata: Option<Vec<u8>>,
22362241
// We need to carry MaybeTempDir to avoid deleting the temporary
22372242
// directory while accessing the Mmap.
22382243
_temp_dir: Option<MaybeTempDir>,
22392244
}
22402245

22412246
impl EncodedMetadata {
22422247
#[inline]
2243-
pub fn from_path(path: PathBuf, temp_dir: Option<MaybeTempDir>) -> std::io::Result<Self> {
2248+
pub fn from_path(
2249+
path: PathBuf,
2250+
stub_path: Option<PathBuf>,
2251+
temp_dir: Option<MaybeTempDir>,
2252+
) -> std::io::Result<Self> {
22442253
let file = std::fs::File::open(&path)?;
22452254
let file_metadata = file.metadata()?;
22462255
if file_metadata.len() == 0 {
2247-
return Ok(Self { mmap: None, _temp_dir: None });
2256+
return Ok(Self { full_metadata: None, stub_metadata: None, _temp_dir: None });
22482257
}
2249-
let mmap = unsafe { Some(Mmap::map(file)?) };
2250-
Ok(Self { mmap, _temp_dir: temp_dir })
2258+
let full_mmap = unsafe { Some(Mmap::map(file)?) };
2259+
2260+
let stub =
2261+
if let Some(stub_path) = stub_path { Some(std::fs::read(stub_path)?) } else { None };
2262+
2263+
Ok(Self { full_metadata: full_mmap, stub_metadata: stub, _temp_dir: temp_dir })
2264+
}
2265+
2266+
#[inline]
2267+
pub fn full(&self) -> &[u8] {
2268+
&self.full_metadata.as_deref().unwrap_or_default()
22512269
}
22522270

22532271
#[inline]
2254-
pub fn raw_data(&self) -> &[u8] {
2255-
self.mmap.as_deref().unwrap_or_default()
2272+
pub fn stub_or_full(&self) -> &[u8] {
2273+
self.stub_metadata.as_deref().unwrap_or(self.full())
22562274
}
22572275
}
22582276

22592277
impl<S: Encoder> Encodable<S> for EncodedMetadata {
22602278
fn encode(&self, s: &mut S) {
2261-
let slice = self.raw_data();
2279+
self.stub_metadata.encode(s);
2280+
2281+
let slice = self.full();
22622282
slice.encode(s)
22632283
}
22642284
}
22652285

22662286
impl<D: Decoder> Decodable<D> for EncodedMetadata {
22672287
fn decode(d: &mut D) -> Self {
2288+
let stub = <Option<Vec<u8>>>::decode(d);
2289+
22682290
let len = d.read_usize();
2269-
let mmap = if len > 0 {
2291+
let full_metadata = if len > 0 {
22702292
let mut mmap = MmapMut::map_anon(len).unwrap();
22712293
mmap.copy_from_slice(d.read_raw_bytes(len));
22722294
Some(mmap.make_read_only().unwrap())
22732295
} else {
22742296
None
22752297
};
22762298

2277-
Self { mmap, _temp_dir: None }
2299+
Self { full_metadata, stub_metadata: stub, _temp_dir: None }
22782300
}
22792301
}
22802302

2281-
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
2303+
pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path, ref_path: Option<&Path>) {
22822304
let _prof_timer = tcx.prof.verbose_generic_activity("generate_crate_metadata");
22832305

22842306
// Since encoding metadata is not in a query, and nothing is cached,
@@ -2292,6 +2314,42 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
22922314
join(|| prefetch_mir(tcx), || tcx.exported_symbols(LOCAL_CRATE));
22932315
}
22942316

2317+
with_encode_metadata_header(tcx, path, |ecx| {
2318+
// Encode all the entries and extra information in the crate,
2319+
// culminating in the `CrateRoot` which points to all of it.
2320+
let root = ecx.encode_crate_root();
2321+
2322+
// Flush buffer to ensure backing file has the correct size.
2323+
ecx.opaque.flush();
2324+
// Record metadata size for self-profiling
2325+
tcx.prof.artifact_size(
2326+
"crate_metadata",
2327+
"crate_metadata",
2328+
ecx.opaque.file().metadata().unwrap().len(),
2329+
);
2330+
2331+
root.position.get()
2332+
});
2333+
2334+
if let Some(ref_path) = ref_path {
2335+
with_encode_metadata_header(tcx, ref_path, |ecx| {
2336+
let header: LazyValue<CrateHeader> = ecx.lazy(CrateHeader {
2337+
name: tcx.crate_name(LOCAL_CRATE),
2338+
triple: tcx.sess.opts.target_triple.clone(),
2339+
hash: tcx.crate_hash(LOCAL_CRATE),
2340+
is_proc_macro_crate: false,
2341+
is_stub: true,
2342+
});
2343+
header.position.get()
2344+
});
2345+
}
2346+
}
2347+
2348+
fn with_encode_metadata_header(
2349+
tcx: TyCtxt<'_>,
2350+
path: &Path,
2351+
f: impl FnOnce(&mut EncodeContext<'_, '_>) -> usize,
2352+
) {
22952353
let mut encoder = opaque::FileEncoder::new(path)
22962354
.unwrap_or_else(|err| tcx.dcx().emit_fatal(FailCreateFileEncoder { err }));
22972355
encoder.emit_raw_bytes(METADATA_HEADER);
@@ -2326,9 +2384,7 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
23262384
// Encode the rustc version string in a predictable location.
23272385
rustc_version(tcx.sess.cfg_version).encode(&mut ecx);
23282386

2329-
// Encode all the entries and extra information in the crate,
2330-
// culminating in the `CrateRoot` which points to all of it.
2331-
let root = ecx.encode_crate_root();
2387+
let root_position = f(&mut ecx);
23322388

23332389
// Make sure we report any errors from writing to the file.
23342390
// If we forget this, compilation can succeed with an incomplete rmeta file,
@@ -2338,12 +2394,9 @@ pub fn encode_metadata(tcx: TyCtxt<'_>, path: &Path) {
23382394
}
23392395

23402396
let file = ecx.opaque.file();
2341-
if let Err(err) = encode_root_position(file, root.position.get()) {
2397+
if let Err(err) = encode_root_position(file, root_position) {
23422398
tcx.dcx().emit_fatal(FailWriteFile { path: ecx.opaque.path(), err });
23432399
}
2344-
2345-
// Record metadata size for self-profiling
2346-
tcx.prof.artifact_size("crate_metadata", "crate_metadata", file.metadata().unwrap().len());
23472400
}
23482401

23492402
fn encode_root_position(mut file: &File, pos: usize) -> Result<(), std::io::Error> {

compiler/rustc_metadata/src/rmeta/mod.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ pub(crate) fn rustc_version(cfg_version: &'static str) -> String {
5656
/// Metadata encoding version.
5757
/// N.B., increment this if you change the format of metadata such that
5858
/// the rustc version can't be found to compare with `rustc_version()`.
59-
const METADATA_VERSION: u8 = 9;
59+
const METADATA_VERSION: u8 = 10;
6060

6161
/// Metadata header which includes `METADATA_VERSION`.
6262
///
@@ -221,6 +221,12 @@ pub(crate) struct CrateHeader {
221221
/// This is separate from [`ProcMacroData`] to avoid having to update [`METADATA_VERSION`] every
222222
/// time ProcMacroData changes.
223223
pub(crate) is_proc_macro_crate: bool,
224+
/// Whether this crate metadata section is just a stub.
225+
/// Stubs do not contain the full metadata (it will be typically stored
226+
/// in a separate rmeta file).
227+
///
228+
/// This is used inside rlibs and dylibs when using `-Zembed-metadata=no`.
229+
pub(crate) is_stub: bool,
224230
}
225231

226232
/// Serialized `.rmeta` data for a crate.

0 commit comments

Comments
 (0)