Skip to content

Commit f05273a

Browse files
committed
Auto merge of rust-lang#127516 - nnethercote:simplify-LazyAttrTokenStream, r=<try>
Simplify `LazyAttrTokenStream` `LazyAttrTokenStream` is an unpleasant type: `Lrc<Box<dyn ToAttrTokenStream>>`. Why does it look like that? - There are two `ToAttrTokenStream` impls, one for the lazy case, and one for the case where we already have an `AttrTokenStream`. - The lazy case (`LazyAttrTokenStreamImpl`) is implemented in `rustc_parse`, but `LazyAttrTokenStream` is defined in `rustc_ast`, which does not depend on `rustc_parse`. The use of the trait lets `rustc_ast` implicitly depend on `rustc_parse`. This explains the `dyn`. - `LazyAttrTokenStream` must have a `size_of` as small as possible, because it's used in many AST nodes. This explains the `Lrc<Box<_>>`, which keeps it to one word. (It's required `Lrc<dyn _>` would be a fat pointer.) This PR moves `LazyAttrTokenStreamImpl` (and a few other token stream things) from `rustc_parse` to `rustc_ast`. This lets us replace the `ToAttrTokenStream` trait with a two-variant enum and also remove the `Box`, changing `LazyAttrTokenStream` to `Lrc<LazyAttrTokenStreamInner>`. r? `@petrochenkov`
2 parents 8672b2b + ad11b0b commit f05273a

File tree

5 files changed

+326
-323
lines changed

5 files changed

+326
-323
lines changed

compiler/rustc_ast/src/mut_visit.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,7 @@ fn visit_lazy_tts_opt_mut<T: MutVisitor>(lazy_tts: Option<&mut LazyAttrTokenStre
744744
if let Some(lazy_tts) = lazy_tts {
745745
let mut tts = lazy_tts.to_attr_token_stream();
746746
visit_attr_tts(&mut tts, vis);
747-
*lazy_tts = LazyAttrTokenStream::new(tts);
747+
*lazy_tts = LazyAttrTokenStream::new_direct(tts);
748748
}
749749
}
750750
}

compiler/rustc_ast/src/tokenstream.rs

+307-23
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ use rustc_serialize::{Decodable, Encodable};
2525
use rustc_span::{sym, Span, SpanDecoder, SpanEncoder, Symbol, DUMMY_SP};
2626

2727
use std::borrow::Cow;
28-
use std::{cmp, fmt, iter};
28+
use std::ops::Range;
29+
use std::{cmp, fmt, iter, mem};
2930

3031
/// Part of a `TokenStream`.
3132
#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
@@ -106,25 +107,30 @@ where
106107
}
107108
}
108109

109-
pub trait ToAttrTokenStream: sync::DynSend + sync::DynSync {
110-
fn to_attr_token_stream(&self) -> AttrTokenStream;
111-
}
112-
113-
impl ToAttrTokenStream for AttrTokenStream {
114-
fn to_attr_token_stream(&self) -> AttrTokenStream {
115-
self.clone()
116-
}
117-
}
118-
119-
/// A lazy version of [`TokenStream`], which defers creation
120-
/// of an actual `TokenStream` until it is needed.
121-
/// `Box` is here only to reduce the structure size.
110+
/// A lazy version of [`AttrTokenStream`], which defers creation of an actual
111+
/// `AttrTokenStream` until it is needed.
122112
#[derive(Clone)]
123-
pub struct LazyAttrTokenStream(Lrc<Box<dyn ToAttrTokenStream>>);
113+
pub struct LazyAttrTokenStream(Lrc<LazyAttrTokenStreamInner>);
124114

125115
impl LazyAttrTokenStream {
126-
pub fn new(inner: impl ToAttrTokenStream + 'static) -> LazyAttrTokenStream {
127-
LazyAttrTokenStream(Lrc::new(Box::new(inner)))
116+
pub fn new_direct(stream: AttrTokenStream) -> LazyAttrTokenStream {
117+
LazyAttrTokenStream(Lrc::new(LazyAttrTokenStreamInner::Direct(stream)))
118+
}
119+
120+
pub fn new_pending(
121+
start_token: (Token, Spacing),
122+
cursor_snapshot: TokenCursor,
123+
num_calls: u32,
124+
break_last_token: bool,
125+
replace_ranges: Box<[ReplaceRange]>,
126+
) -> LazyAttrTokenStream {
127+
LazyAttrTokenStream(Lrc::new(LazyAttrTokenStreamInner::Pending {
128+
start_token,
129+
cursor_snapshot,
130+
num_calls,
131+
break_last_token,
132+
replace_ranges,
133+
}))
128134
}
129135

130136
pub fn to_attr_token_stream(&self) -> AttrTokenStream {
@@ -156,12 +162,213 @@ impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
156162
}
157163
}
158164

159-
/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
160-
/// information about the tokens for attribute targets. This is used
161-
/// during expansion to perform early cfg-expansion, and to process attributes
162-
/// during proc-macro invocations.
163-
#[derive(Clone, Debug, Default, Encodable, Decodable)]
164-
pub struct AttrTokenStream(pub Lrc<Vec<AttrTokenTree>>);
165+
/// Indicates a range of tokens that should be replaced by the tokens in the
166+
/// provided `AttrsTarget`. This is used in two places during token collection:
167+
///
168+
/// 1. During the parsing of an AST node that may have a `#[derive]` attribute,
169+
/// we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]` In this
170+
/// case, we use a `ReplaceRange` to replace the entire inner AST node with
171+
/// `FlatToken::AttrsTarget`, allowing us to perform eager cfg-expansion on
172+
/// an `AttrTokenStream`.
173+
///
174+
/// 2. When we parse an inner attribute while collecting tokens. We remove
175+
/// inner attributes from the token stream entirely, and instead track them
176+
/// through the `attrs` field on the AST node. This allows us to easily
177+
/// manipulate them (for example, removing the first macro inner attribute
178+
/// to invoke a proc-macro). When create a `TokenStream`, the inner
179+
/// attributes get inserted into the proper place in the token stream.
180+
pub type ReplaceRange = (Range<u32>, Option<AttrsTarget>);
181+
182+
enum LazyAttrTokenStreamInner {
183+
// The token stream has already been produced.
184+
Direct(AttrTokenStream),
185+
186+
// Produces a `TokenStream` on-demand. Using `cursor_snapshot` and `num_calls`,
187+
// we can reconstruct the `TokenStream` seen by the callback. This allows us to
188+
// avoid producing a `TokenStream` if it is never needed - for example, a
189+
// captured `macro_rules!` argument that is never passed to a proc macro. In
190+
// practice token stream creation happens rarely compared to calls to
191+
// `collect_tokens` (see some statistics in #78736), so we are doing as little
192+
// up-front work as possible.
193+
//
194+
// This also makes `Parser` very cheap to clone, since there is no intermediate
195+
// collection buffer to clone.
196+
Pending {
197+
start_token: (Token, Spacing),
198+
cursor_snapshot: TokenCursor,
199+
num_calls: u32,
200+
break_last_token: bool,
201+
replace_ranges: Box<[ReplaceRange]>,
202+
},
203+
}
204+
205+
impl LazyAttrTokenStreamInner {
206+
fn to_attr_token_stream(&self) -> AttrTokenStream {
207+
match self {
208+
LazyAttrTokenStreamInner::Direct(stream) => stream.clone(),
209+
LazyAttrTokenStreamInner::Pending {
210+
start_token,
211+
cursor_snapshot,
212+
num_calls,
213+
break_last_token,
214+
replace_ranges,
215+
} => {
216+
// The token produced by the final call to `{,inlined_}next`
217+
// was not actually consumed by the callback. The combination
218+
// of chaining the initial token and using `take` produces the
219+
// desired result - we produce an empty `TokenStream` if no
220+
// calls were made, and omit the final token otherwise.
221+
let mut cursor_snapshot = cursor_snapshot.clone();
222+
let tokens = iter::once(FlatToken::Token(start_token.clone()))
223+
.chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
224+
.take(*num_calls as usize);
225+
226+
if replace_ranges.is_empty() {
227+
make_attr_token_stream(tokens, *break_last_token)
228+
} else {
229+
let mut tokens: Vec<_> = tokens.collect();
230+
let mut replace_ranges = replace_ranges.to_vec();
231+
replace_ranges.sort_by_key(|(range, _)| range.start);
232+
233+
#[cfg(debug_assertions)]
234+
{
235+
for [(range, tokens), (next_range, next_tokens)] in
236+
replace_ranges.array_windows()
237+
{
238+
assert!(
239+
range.end <= next_range.start || range.end >= next_range.end,
240+
"Replace ranges should either be disjoint or nested: \
241+
({:?}, {:?}) ({:?}, {:?})",
242+
range,
243+
tokens,
244+
next_range,
245+
next_tokens,
246+
);
247+
}
248+
}
249+
250+
// Process the replace ranges, starting from the highest
251+
// start position and working our way back. If have tokens
252+
// like:
253+
//
254+
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
255+
//
256+
// Then we will generate replace ranges for both the
257+
// `#[cfg(FALSE)] field: bool` and the entire
258+
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
259+
//
260+
// By starting processing from the replace range with the
261+
// greatest start position, we ensure that any replace
262+
// range which encloses another replace range will capture
263+
// the *replaced* tokens for the inner range, not the
264+
// original tokens.
265+
for (range, target) in replace_ranges.into_iter().rev() {
266+
assert!(!range.is_empty(), "Cannot replace an empty range: {range:?}");
267+
268+
// Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s,
269+
// plus enough `FlatToken::Empty`s to fill up the rest of the range. This
270+
// keeps the total length of `tokens` constant throughout the replacement
271+
// process, allowing us to use all of the `ReplaceRanges` entries without
272+
// adjusting indices.
273+
let target_len = target.is_some() as usize;
274+
tokens.splice(
275+
(range.start as usize)..(range.end as usize),
276+
target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
277+
iter::repeat(FlatToken::Empty).take(range.len() - target_len),
278+
),
279+
);
280+
}
281+
make_attr_token_stream(tokens.into_iter(), *break_last_token)
282+
}
283+
}
284+
}
285+
}
286+
}
287+
288+
/// A helper struct used when building an `AttrTokenStream` from a
289+
/// `LazyAttrTokenStream`. Both delimiter and non-delimited tokens are stored
290+
/// as `FlatToken::Token`. A vector of `FlatToken`s is then 'parsed' to build
291+
/// up an `AttrTokenStream` with nested `AttrTokenTree::Delimited` tokens.
292+
#[derive(Debug, Clone)]
293+
enum FlatToken {
294+
/// A token. This holds both delimiter (e.g. '{' and '}') and non-delimiter
295+
/// tokens.
296+
Token((Token, Spacing)),
297+
/// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
298+
/// directly into the constructed `AttrTokenStream` as an
299+
/// `AttrTokenTree::AttrsTarget`.
300+
AttrsTarget(AttrsTarget),
301+
/// A special 'empty' token that is ignored during the conversion to an
302+
/// `AttrTokenStream`. This is used to simplify the handling of replace
303+
/// ranges.
304+
Empty,
305+
}
306+
307+
/// Converts a flattened iterator of tokens (including open and close delimiter
308+
/// tokens) into an `AttrTokenStream`, creating an `AttrTokenTree::Delimited`
309+
/// for each matching pair of open and close delims.
310+
fn make_attr_token_stream(
311+
iter: impl Iterator<Item = FlatToken>,
312+
break_last_token: bool,
313+
) -> AttrTokenStream {
314+
#[derive(Debug)]
315+
struct FrameData {
316+
// This is `None` for the first frame, `Some` for all others.
317+
open_delim_sp: Option<(Delimiter, Span, Spacing)>,
318+
inner: Vec<AttrTokenTree>,
319+
}
320+
// The stack always has at least one element. Storing it separately makes for shorter code.
321+
let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] };
322+
let mut stack_rest = vec![];
323+
for flat_token in iter {
324+
match flat_token {
325+
FlatToken::Token((Token { kind: TokenKind::OpenDelim(delim), span }, spacing)) => {
326+
stack_rest.push(mem::replace(
327+
&mut stack_top,
328+
FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] },
329+
));
330+
}
331+
FlatToken::Token((Token { kind: TokenKind::CloseDelim(delim), span }, spacing)) => {
332+
let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap());
333+
let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
334+
assert_eq!(
335+
open_delim, delim,
336+
"Mismatched open/close delims: open={open_delim:?} close={span:?}"
337+
);
338+
let dspan = DelimSpan::from_pair(open_sp, span);
339+
let dspacing = DelimSpacing::new(open_spacing, spacing);
340+
let stream = AttrTokenStream::new(frame_data.inner);
341+
let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
342+
stack_top.inner.push(delimited);
343+
}
344+
FlatToken::Token((token, spacing)) => {
345+
stack_top.inner.push(AttrTokenTree::Token(token, spacing))
346+
}
347+
FlatToken::AttrsTarget(target) => {
348+
stack_top.inner.push(AttrTokenTree::AttrsTarget(target))
349+
}
350+
FlatToken::Empty => {}
351+
}
352+
}
353+
354+
if break_last_token {
355+
let last_token = stack_top.inner.pop().unwrap();
356+
if let AttrTokenTree::Token(last_token, spacing) = last_token {
357+
let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
358+
359+
// An 'unglued' token is always two ASCII characters.
360+
let mut first_span = last_token.span.shrink_to_lo();
361+
first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
362+
363+
stack_top
364+
.inner
365+
.push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing));
366+
} else {
367+
panic!("Unexpected last token {last_token:?}")
368+
}
369+
}
370+
AttrTokenStream::new(stack_top.inner)
371+
}
165372

166373
/// Like `TokenTree`, but for `AttrTokenStream`.
167374
#[derive(Clone, Debug, Encodable, Decodable)]
@@ -174,6 +381,13 @@ pub enum AttrTokenTree {
174381
AttrsTarget(AttrsTarget),
175382
}
176383

384+
/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
385+
/// information about the tokens for attribute targets. This is used
386+
/// during expansion to perform early cfg-expansion, and to process attributes
387+
/// during proc-macro invocations.
388+
#[derive(Clone, Debug, Default, Encodable, Decodable)]
389+
pub struct AttrTokenStream(pub Lrc<Vec<AttrTokenTree>>);
390+
177391
impl AttrTokenStream {
178392
pub fn new(tokens: Vec<AttrTokenTree>) -> AttrTokenStream {
179393
AttrTokenStream(Lrc::new(tokens))
@@ -720,6 +934,75 @@ impl TokenTreeCursor {
720934
}
721935
}
722936

937+
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
938+
/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
939+
/// use this type to emit them as a linear sequence. But a linear sequence is
940+
/// what the parser expects, for the most part.
941+
#[derive(Clone, Debug)]
942+
pub struct TokenCursor {
943+
// Cursor for the current (innermost) token stream. The delimiters for this
944+
// token stream are found in `self.stack.last()`; when that is `None` then
945+
// we are in the outermost token stream which never has delimiters.
946+
pub tree_cursor: TokenTreeCursor,
947+
948+
// Token streams surrounding the current one. The delimiters for stack[n]'s
949+
// tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
950+
// because it's the outermost token stream which never has delimiters.
951+
pub stack: Vec<(TokenTreeCursor, DelimSpan, DelimSpacing, Delimiter)>,
952+
}
953+
954+
impl TokenCursor {
955+
pub fn next(&mut self) -> (Token, Spacing) {
956+
self.inlined_next()
957+
}
958+
959+
/// This always-inlined version should only be used on hot code paths.
960+
#[inline(always)]
961+
pub fn inlined_next(&mut self) -> (Token, Spacing) {
962+
loop {
963+
// FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
964+
// #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
965+
// below can be removed.
966+
if let Some(tree) = self.tree_cursor.next_ref() {
967+
match tree {
968+
&TokenTree::Token(ref token, spacing) => {
969+
debug_assert!(!matches!(
970+
token.kind,
971+
token::OpenDelim(_) | token::CloseDelim(_)
972+
));
973+
return (token.clone(), spacing);
974+
}
975+
&TokenTree::Delimited(sp, spacing, delim, ref tts) => {
976+
let trees = tts.clone().into_trees();
977+
self.stack.push((
978+
mem::replace(&mut self.tree_cursor, trees),
979+
sp,
980+
spacing,
981+
delim,
982+
));
983+
if delim != Delimiter::Invisible {
984+
return (Token::new(token::OpenDelim(delim), sp.open), spacing.open);
985+
}
986+
// No open delimiter to return; continue on to the next iteration.
987+
}
988+
};
989+
} else if let Some((tree_cursor, span, spacing, delim)) = self.stack.pop() {
990+
// We have exhausted this token stream. Move back to its parent token stream.
991+
self.tree_cursor = tree_cursor;
992+
if delim != Delimiter::Invisible {
993+
return (Token::new(token::CloseDelim(delim), span.close), spacing.close);
994+
}
995+
// No close delimiter to return; continue on to the next iteration.
996+
} else {
997+
// We have exhausted the outermost token stream. The use of
998+
// `Spacing::Alone` is arbitrary and immaterial, because the
999+
// `Eof` token's spacing is never used.
1000+
return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
1001+
}
1002+
}
1003+
}
1004+
}
1005+
7231006
#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
7241007
pub struct DelimSpan {
7251008
pub open: Span,
@@ -765,6 +1048,7 @@ mod size_asserts {
7651048
static_assert_size!(AttrTokenStream, 8);
7661049
static_assert_size!(AttrTokenTree, 32);
7671050
static_assert_size!(LazyAttrTokenStream, 8);
1051+
static_assert_size!(LazyAttrTokenStreamInner, 96);
7681052
static_assert_size!(Option<LazyAttrTokenStream>, 8); // must be small, used in many AST nodes
7691053
static_assert_size!(TokenStream, 8);
7701054
static_assert_size!(TokenTree, 32);

0 commit comments

Comments
 (0)