Skip to content

hygiene: Remove all caching in syntax context decoding #139228

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ use rustc_serialize::{Decodable, Decoder};
use rustc_session::Session;
use rustc_session::config::TargetModifier;
use rustc_session::cstore::{CrateSource, ExternCrate};
use rustc_span::hygiene::HygieneDecodeContext;
use rustc_span::{BytePos, DUMMY_SP, Pos, SpanData, SpanDecoder, SyntaxContext, kw};
use tracing::debug;

Expand Down Expand Up @@ -127,13 +126,6 @@ pub(crate) struct CrateMetadata {
/// The crate was used non-speculatively.
used: bool,

/// Additional data used for decoding `HygieneData` (e.g. `SyntaxContext`
/// and `ExpnId`).
/// Note that we store a `HygieneDecodeContext` for each `CrateMetadata`. This is
/// because `SyntaxContext` ids are not globally unique, so we need
/// to track which ids we've decoded on a per-crate basis.
hygiene_context: HygieneDecodeContext,

// --- Data used only for improving diagnostics ---
/// Information about the `extern crate` item or path that caused this crate to be loaded.
/// If this is `None`, then the crate was injected (e.g., by the allocator).
Expand Down Expand Up @@ -470,7 +462,7 @@ impl<'a, 'tcx> SpanDecoder for DecodeContext<'a, 'tcx> {
};

let cname = cdata.root.name();
rustc_span::hygiene::decode_syntax_context(self, &cdata.hygiene_context, |_, id| {
rustc_span::hygiene::decode_syntax_context(self, |_, id| {
debug!("SpecializedDecoder<SyntaxContext>: decoding {}", id);
cdata
.root
Expand Down Expand Up @@ -1864,7 +1856,6 @@ impl CrateMetadata {
host_hash,
used: false,
extern_crate: None,
hygiene_context: Default::default(),
def_key_cache: Default::default(),
};

Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use rustc_serialize::opaque::FileEncoder;
use rustc_session::config::{SymbolManglingVersion, TargetModifier};
use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib};
use rustc_span::edition::Edition;
use rustc_span::hygiene::{ExpnIndex, MacroKind, SyntaxContextData};
use rustc_span::hygiene::{ExpnIndex, MacroKind, SyntaxContextKey};
use rustc_span::{self, ExpnData, ExpnHash, ExpnId, Ident, Span, Symbol};
use rustc_target::spec::{PanicStrategy, TargetTuple};
use table::TableBuilder;
Expand Down Expand Up @@ -193,7 +193,7 @@ enum LazyState {
Previous(NonZero<usize>),
}

type SyntaxContextTable = LazyTable<u32, Option<LazyValue<SyntaxContextData>>>;
type SyntaxContextTable = LazyTable<u32, Option<LazyValue<SyntaxContextKey>>>;
type ExpnDataTable = LazyTable<ExpnIndex, Option<LazyValue<ExpnData>>>;
type ExpnHashTable = LazyTable<ExpnIndex, Option<LazyValue<ExpnHash>>>;

Expand Down
20 changes: 6 additions & 14 deletions compiler/rustc_middle/src/query/on_disk_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ use rustc_query_system::query::QuerySideEffect;
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use rustc_session::Session;
use rustc_span::hygiene::{
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
};
use rustc_span::hygiene::{ExpnId, HygieneEncodeContext, SyntaxContext, SyntaxContextKey};
use rustc_span::source_map::Spanned;
use rustc_span::{
BytePos, CachingSourceMapView, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span,
Expand Down Expand Up @@ -75,9 +73,9 @@ pub struct OnDiskCache {
alloc_decoding_state: AllocDecodingState,

// A map from syntax context ids to the position of their associated
// `SyntaxContextData`. We use a `u32` instead of a `SyntaxContext`
// `SyntaxContextKey`. We use a `u32` instead of a `SyntaxContext`
// to represent the fact that we are storing *encoded* ids. When we decode
// a `SyntaxContext`, a new id will be allocated from the global `HygieneData`,
// a `SyntaxContextKey`, a new id will be allocated from the global `HygieneData`,
// which will almost certainly be different than the serialized id.
syntax_contexts: FxHashMap<u32, AbsoluteBytePos>,
// A map from the `DefPathHash` of an `ExpnId` to the position
Expand All @@ -90,8 +88,6 @@ pub struct OnDiskCache {
// we could look up the `ExpnData` from the metadata of foreign crates,
// but it seemed easier to have `OnDiskCache` be independent of the `CStore`.
expn_data: UnhashMap<ExpnHash, AbsoluteBytePos>,
// Additional information used when decoding hygiene data.
hygiene_context: HygieneDecodeContext,
// Maps `ExpnHash`es to their raw value from the *previous*
// compilation session. This is used as an initial 'guess' when
// we try to map an `ExpnHash` to its value in the current
Expand Down Expand Up @@ -183,7 +179,6 @@ impl OnDiskCache {
syntax_contexts: footer.syntax_contexts,
expn_data: footer.expn_data,
foreign_expn_data: footer.foreign_expn_data,
hygiene_context: Default::default(),
})
}

Expand All @@ -199,7 +194,6 @@ impl OnDiskCache {
syntax_contexts: FxHashMap::default(),
expn_data: UnhashMap::default(),
foreign_expn_data: UnhashMap::default(),
hygiene_context: Default::default(),
}
}

Expand Down Expand Up @@ -305,7 +299,7 @@ impl OnDiskCache {
let mut expn_data = UnhashMap::default();
let mut foreign_expn_data = UnhashMap::default();

// Encode all hygiene data (`SyntaxContextData` and `ExpnData`) from the current
// Encode all hygiene data (`SyntaxContextKey` and `ExpnData`) from the current
// session.

hygiene_encode_context.encode(
Expand Down Expand Up @@ -428,7 +422,6 @@ impl OnDiskCache {
syntax_contexts: &self.syntax_contexts,
expn_data: &self.expn_data,
foreign_expn_data: &self.foreign_expn_data,
hygiene_context: &self.hygiene_context,
};
f(&mut decoder)
}
Expand All @@ -448,7 +441,6 @@ pub struct CacheDecoder<'a, 'tcx> {
syntax_contexts: &'a FxHashMap<u32, AbsoluteBytePos>,
expn_data: &'a UnhashMap<ExpnHash, AbsoluteBytePos>,
foreign_expn_data: &'a UnhashMap<ExpnHash, u32>,
hygiene_context: &'a HygieneDecodeContext,
}

impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
Expand Down Expand Up @@ -561,12 +553,12 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Vec<u8> {
impl<'a, 'tcx> SpanDecoder for CacheDecoder<'a, 'tcx> {
fn decode_syntax_context(&mut self) -> SyntaxContext {
let syntax_contexts = self.syntax_contexts;
rustc_span::hygiene::decode_syntax_context(self, self.hygiene_context, |this, id| {
rustc_span::hygiene::decode_syntax_context(self, |this, id| {
// This closure is invoked if we haven't already decoded the data for the `SyntaxContext` we are deserializing.
// We look up the position of the associated `SyntaxData` and decode it.
let pos = syntax_contexts.get(&id).unwrap();
this.with_position(pos.to_usize(), |decoder| {
let data: SyntaxContextData = decode_tagged(decoder, TAG_SYNTAX_CONTEXT);
let data: SyntaxContextKey = decode_tagged(decoder, TAG_SYNTAX_CONTEXT);
data
})
})
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_middle/src/ty/parameterized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ trivially_parameterized_over_tcx! {
rustc_span::Span,
rustc_span::Symbol,
rustc_span::def_id::DefPathHash,
rustc_span::hygiene::SyntaxContextData,
rustc_span::hygiene::SyntaxContextKey,
rustc_span::Ident,
rustc_type_ir::Variance,
rustc_hir::Attribute,
Expand Down
151 changes: 14 additions & 137 deletions compiler/rustc_span/src/hygiene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,14 @@
// because getting it wrong can lead to nested `HygieneData::with` calls that
// trigger runtime aborts. (Fortunately these are obvious and easy to fix.)

use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::hash_set::Entry as SetEntry;
use std::hash::Hash;
use std::sync::Arc;
use std::{fmt, iter, mem};

use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
use rustc_data_structures::sync::{Lock, WorkerLocal};
use rustc_data_structures::sync::Lock;
use rustc_data_structures::unhash::UnhashMap;
use rustc_hashes::Hash64;
use rustc_index::IndexVec;
Expand All @@ -59,10 +56,10 @@ impl !PartialOrd for SyntaxContext {}

/// If this part of two syntax contexts is equal, then the whole syntax contexts should be equal.
/// The other fields are only for caching.
type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);
pub type SyntaxContextKey = (SyntaxContext, ExpnId, Transparency);

#[derive(Clone, Copy, PartialEq, Debug, Encodable, Decodable)]
pub struct SyntaxContextData {
struct SyntaxContextData {
outer_expn: ExpnId,
outer_transparency: Transparency,
parent: SyntaxContext,
Expand Down Expand Up @@ -1266,7 +1263,7 @@ impl HygieneEncodeContext {
pub fn encode<T>(
&self,
encoder: &mut T,
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextData),
mut encode_ctxt: impl FnMut(&mut T, u32, &SyntaxContextKey),
mut encode_expn: impl FnMut(&mut T, ExpnId, &ExpnData, ExpnHash),
) {
// When we serialize a `SyntaxContextData`, we may end up serializing
Expand All @@ -1286,9 +1283,9 @@ impl HygieneEncodeContext {
// of the table that we insert data into doesn't depend on insertion
// order
#[allow(rustc::potential_query_instability)]
for_all_ctxts_in(latest_ctxts.into_iter(), |index, ctxt, data| {
for_all_ctxts_in(latest_ctxts.into_iter(), |index, ctxt, ctxt_key| {
if self.serialized_ctxts.lock().insert(ctxt) {
encode_ctxt(encoder, index, data);
encode_ctxt(encoder, index, ctxt_key);
}
});

Expand All @@ -1306,29 +1303,6 @@ impl HygieneEncodeContext {
}
}

#[derive(Default)]
/// Additional information used to assist in decoding hygiene data
struct HygieneDecodeContextInner {
// Maps serialized `SyntaxContext` ids to a `SyntaxContext` in the current
// global `HygieneData`. When we deserialize a `SyntaxContext`, we need to create
// a new id in the global `HygieneData`. This map tracks the ID we end up picking,
// so that multiple occurrences of the same serialized id are decoded to the same
// `SyntaxContext`. This only stores `SyntaxContext`s which are completely decoded.
remapped_ctxts: Vec<Option<SyntaxContext>>,

/// Maps serialized `SyntaxContext` ids that are currently being decoded to a `SyntaxContext`.
decoding: FxHashMap<u32, SyntaxContext>,
}

#[derive(Default)]
/// Additional information used to assist in decoding hygiene data
pub struct HygieneDecodeContext {
inner: Lock<HygieneDecodeContextInner>,

/// A set of serialized `SyntaxContext` ids that are currently being decoded on each thread.
local_in_progress: WorkerLocal<RefCell<FxHashSet<u32>>>,
}

/// Register an expansion which has been decoded from the on-disk-cache for the local crate.
pub fn register_local_expn_id(data: ExpnData, hash: ExpnHash) -> ExpnId {
HygieneData::with(|hygiene_data| {
Expand Down Expand Up @@ -1393,14 +1367,11 @@ pub fn decode_expn_id(
register_expn_id(krate, index, expn_data, hash)
}

// Decodes `SyntaxContext`, using the provided `HygieneDecodeContext`
// to track which `SyntaxContext`s we have already decoded.
// The provided closure will be invoked to deserialize a `SyntaxContextData`
// if we haven't already seen the id of the `SyntaxContext` we are deserializing.
pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextData>(
// Decodes `SyntaxContext`.
// The provided closure will be invoked to deserialize a `SyntaxContextData`.
pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContextKey>(
d: &mut D,
context: &HygieneDecodeContext,
decode_data: F,
decode_ctxt_key: F,
) -> SyntaxContext {
let raw_id: u32 = Decodable::decode(d);
if raw_id == 0 {
Expand All @@ -1409,115 +1380,21 @@ pub fn decode_syntax_context<D: Decoder, F: FnOnce(&mut D, u32) -> SyntaxContext
return SyntaxContext::root();
}

let pending_ctxt = {
let mut inner = context.inner.lock();

// Reminder: `HygieneDecodeContext` is per-crate, so there are no collisions between
// raw ids from different crate metadatas.
if let Some(ctxt) = inner.remapped_ctxts.get(raw_id as usize).copied().flatten() {
// This has already been decoded.
return ctxt;
}

match inner.decoding.entry(raw_id) {
Entry::Occupied(ctxt_entry) => {
let pending_ctxt = *ctxt_entry.get();
match context.local_in_progress.borrow_mut().entry(raw_id) {
// We're decoding this already on the current thread. Return here and let the
// function higher up the stack finish decoding to handle recursive cases.
// Hopefully having a `SyntaxContext` that refers to an incorrect data is ok
// during reminder of the decoding process, it's certainly not ok after the
// top level decoding function returns.
SetEntry::Occupied(..) => return pending_ctxt,
// Some other thread is currently decoding this.
// Race with it (alternatively we could wait here).
// We cannot return this value, unlike in the recursive case above, because it
// may expose a `SyntaxContext` pointing to incorrect data to arbitrary code.
SetEntry::Vacant(entry) => {
entry.insert();
pending_ctxt
}
}
}
Entry::Vacant(entry) => {
// We are the first thread to start decoding. Mark the current thread as being
// progress.
context.local_in_progress.borrow_mut().insert(raw_id);

// Allocate and store SyntaxContext id *before* calling the decoder function,
// as the SyntaxContextData may reference itself.
let new_ctxt = HygieneData::with(|hygiene_data| {
// Push a dummy SyntaxContextData to ensure that nobody else can get the
// same ID as us. This will be overwritten after call `decode_data`.
hygiene_data.syntax_context_data.push(SyntaxContextData::decode_placeholder());
SyntaxContext::from_usize(hygiene_data.syntax_context_data.len() - 1)
});
entry.insert(new_ctxt);
new_ctxt
}
}
};

// Don't try to decode data while holding the lock, since we need to
// be able to recursively decode a SyntaxContext
let ctxt_data = decode_data(d, raw_id);
let ctxt_key = ctxt_data.key();

let ctxt = HygieneData::with(|hygiene_data| {
match hygiene_data.syntax_context_map.get(&ctxt_key) {
// Ensure that syntax contexts are unique.
// If syntax contexts with the given key already exists, reuse it instead of
// using `pending_ctxt`.
// `pending_ctxt` will leave an unused hole in the vector of syntax contexts.
// Hopefully its value isn't stored anywhere during decoding and its dummy data
// is never accessed later. The `is_decode_placeholder` asserts on all
// accesses to syntax context data attempt to ensure it.
Some(&ctxt) => ctxt,
// This is a completely new context.
// Overwrite its placeholder data with our decoded data.
None => {
let ctxt_data_ref =
&mut hygiene_data.syntax_context_data[pending_ctxt.as_u32() as usize];
let prev_ctxt_data = mem::replace(ctxt_data_ref, ctxt_data);
// Reset `dollar_crate_name` so that it will be updated by `update_dollar_crate_names`.
// We don't care what the encoding crate set this to - we want to resolve it
// from the perspective of the current compilation session.
ctxt_data_ref.dollar_crate_name = kw::DollarCrate;
// Make sure nothing weird happened while `decode_data` was running.
if !prev_ctxt_data.is_decode_placeholder() {
// Another thread may have already inserted the decoded data,
// but the decoded data should match.
assert_eq!(prev_ctxt_data, *ctxt_data_ref);
}
hygiene_data.syntax_context_map.insert(ctxt_key, pending_ctxt);
pending_ctxt
}
}
});

// Mark the context as completed
context.local_in_progress.borrow_mut().remove(&raw_id);

let mut inner = context.inner.lock();
let new_len = raw_id as usize + 1;
if inner.remapped_ctxts.len() < new_len {
inner.remapped_ctxts.resize(new_len, None);
}
inner.remapped_ctxts[raw_id as usize] = Some(ctxt);
inner.decoding.remove(&raw_id);

ctxt
let (parent, expn_id, transparency) = decode_ctxt_key(d, raw_id);
HygieneData::with(|hygiene_data| hygiene_data.alloc_ctxt(parent, expn_id, transparency))
}

fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextData)>(
fn for_all_ctxts_in<F: FnMut(u32, SyntaxContext, &SyntaxContextKey)>(
ctxts: impl Iterator<Item = SyntaxContext>,
mut f: F,
) {
let all_data: Vec<_> = HygieneData::with(|data| {
ctxts.map(|ctxt| (ctxt, data.syntax_context_data[ctxt.0 as usize].clone())).collect()
});
for (ctxt, data) in all_data.into_iter() {
f(ctxt.0, ctxt, &data);
f(ctxt.0, ctxt, &data.key());
}
}

Expand Down
Loading