@@ -25,7 +25,8 @@ use rustc_serialize::{Decodable, Encodable};
25
25
use rustc_span:: { sym, Span , SpanDecoder , SpanEncoder , Symbol , DUMMY_SP } ;
26
26
27
27
use std:: borrow:: Cow ;
28
- use std:: { cmp, fmt, iter} ;
28
+ use std:: ops:: Range ;
29
+ use std:: { cmp, fmt, iter, mem} ;
29
30
30
31
/// Part of a `TokenStream`.
31
32
#[ derive( Debug , Clone , PartialEq , Encodable , Decodable , HashStable_Generic ) ]
@@ -106,25 +107,30 @@ where
106
107
}
107
108
}
108
109
109
- pub trait ToAttrTokenStream : sync:: DynSend + sync:: DynSync {
110
- fn to_attr_token_stream ( & self ) -> AttrTokenStream ;
111
- }
112
-
113
- impl ToAttrTokenStream for AttrTokenStream {
114
- fn to_attr_token_stream ( & self ) -> AttrTokenStream {
115
- self . clone ( )
116
- }
117
- }
118
-
119
- /// A lazy version of [`TokenStream`], which defers creation
120
- /// of an actual `TokenStream` until it is needed.
121
- /// `Box` is here only to reduce the structure size.
110
+ /// A lazy version of [`AttrTokenStream`], which defers creation of an actual
111
+ /// `AttrTokenStream` until it is needed.
122
112
#[ derive( Clone ) ]
123
- pub struct LazyAttrTokenStream ( Lrc < Box < dyn ToAttrTokenStream > > ) ;
113
+ pub struct LazyAttrTokenStream ( Lrc < LazyAttrTokenStreamInner > ) ;
124
114
125
115
impl LazyAttrTokenStream {
126
- pub fn new ( inner : impl ToAttrTokenStream + ' static ) -> LazyAttrTokenStream {
127
- LazyAttrTokenStream ( Lrc :: new ( Box :: new ( inner) ) )
116
+ pub fn new_direct ( stream : AttrTokenStream ) -> LazyAttrTokenStream {
117
+ LazyAttrTokenStream ( Lrc :: new ( LazyAttrTokenStreamInner :: Direct ( stream) ) )
118
+ }
119
+
120
+ pub fn new_pending (
121
+ start_token : ( Token , Spacing ) ,
122
+ cursor_snapshot : TokenCursor ,
123
+ num_calls : u32 ,
124
+ break_last_token : bool ,
125
+ replace_ranges : Box < [ ReplaceRange ] > ,
126
+ ) -> LazyAttrTokenStream {
127
+ LazyAttrTokenStream ( Lrc :: new ( LazyAttrTokenStreamInner :: Pending {
128
+ start_token,
129
+ cursor_snapshot,
130
+ num_calls,
131
+ break_last_token,
132
+ replace_ranges,
133
+ } ) )
128
134
}
129
135
130
136
pub fn to_attr_token_stream ( & self ) -> AttrTokenStream {
@@ -156,12 +162,213 @@ impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
156
162
}
157
163
}
158
164
159
- /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
160
- /// information about the tokens for attribute targets. This is used
161
- /// during expansion to perform early cfg-expansion, and to process attributes
162
- /// during proc-macro invocations.
163
- #[ derive( Clone , Debug , Default , Encodable , Decodable ) ]
164
- pub struct AttrTokenStream ( pub Lrc < Vec < AttrTokenTree > > ) ;
165
+ /// Indicates a range of tokens that should be replaced by the tokens in the
166
+ /// provided `AttrsTarget`. This is used in two places during token collection:
167
+ ///
168
+ /// 1. During the parsing of an AST node that may have a `#[derive]` attribute,
169
+ /// we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]` In this
170
+ /// case, we use a `ReplaceRange` to replace the entire inner AST node with
171
+ /// `FlatToken::AttrsTarget`, allowing us to perform eager cfg-expansion on
172
+ /// an `AttrTokenStream`.
173
+ ///
174
+ /// 2. When we parse an inner attribute while collecting tokens. We remove
175
+ /// inner attributes from the token stream entirely, and instead track them
176
+ /// through the `attrs` field on the AST node. This allows us to easily
177
+ /// manipulate them (for example, removing the first macro inner attribute
178
+ /// to invoke a proc-macro). When create a `TokenStream`, the inner
179
+ /// attributes get inserted into the proper place in the token stream.
180
+ pub type ReplaceRange = ( Range < u32 > , Option < AttrsTarget > ) ;
181
+
182
+ enum LazyAttrTokenStreamInner {
183
+ // The token stream has already been produced.
184
+ Direct ( AttrTokenStream ) ,
185
+
186
+ // Produces a `TokenStream` on-demand. Using `cursor_snapshot` and `num_calls`,
187
+ // we can reconstruct the `TokenStream` seen by the callback. This allows us to
188
+ // avoid producing a `TokenStream` if it is never needed - for example, a
189
+ // captured `macro_rules!` argument that is never passed to a proc macro. In
190
+ // practice token stream creation happens rarely compared to calls to
191
+ // `collect_tokens` (see some statistics in #78736), so we are doing as little
192
+ // up-front work as possible.
193
+ //
194
+ // This also makes `Parser` very cheap to clone, since there is no intermediate
195
+ // collection buffer to clone.
196
+ Pending {
197
+ start_token : ( Token , Spacing ) ,
198
+ cursor_snapshot : TokenCursor ,
199
+ num_calls : u32 ,
200
+ break_last_token : bool ,
201
+ replace_ranges : Box < [ ReplaceRange ] > ,
202
+ } ,
203
+ }
204
+
205
+ impl LazyAttrTokenStreamInner {
206
+ fn to_attr_token_stream ( & self ) -> AttrTokenStream {
207
+ match self {
208
+ LazyAttrTokenStreamInner :: Direct ( stream) => stream. clone ( ) ,
209
+ LazyAttrTokenStreamInner :: Pending {
210
+ start_token,
211
+ cursor_snapshot,
212
+ num_calls,
213
+ break_last_token,
214
+ replace_ranges,
215
+ } => {
216
+ // The token produced by the final call to `{,inlined_}next`
217
+ // was not actually consumed by the callback. The combination
218
+ // of chaining the initial token and using `take` produces the
219
+ // desired result - we produce an empty `TokenStream` if no
220
+ // calls were made, and omit the final token otherwise.
221
+ let mut cursor_snapshot = cursor_snapshot. clone ( ) ;
222
+ let tokens = iter:: once ( FlatToken :: Token ( start_token. clone ( ) ) )
223
+ . chain ( iter:: repeat_with ( || FlatToken :: Token ( cursor_snapshot. next ( ) ) ) )
224
+ . take ( * num_calls as usize ) ;
225
+
226
+ if replace_ranges. is_empty ( ) {
227
+ make_attr_token_stream ( tokens, * break_last_token)
228
+ } else {
229
+ let mut tokens: Vec < _ > = tokens. collect ( ) ;
230
+ let mut replace_ranges = replace_ranges. to_vec ( ) ;
231
+ replace_ranges. sort_by_key ( |( range, _) | range. start ) ;
232
+
233
+ #[ cfg( debug_assertions) ]
234
+ {
235
+ for [ ( range, tokens) , ( next_range, next_tokens) ] in
236
+ replace_ranges. array_windows ( )
237
+ {
238
+ assert ! (
239
+ range. end <= next_range. start || range. end >= next_range. end,
240
+ "Replace ranges should either be disjoint or nested: \
241
+ ({:?}, {:?}) ({:?}, {:?})",
242
+ range,
243
+ tokens,
244
+ next_range,
245
+ next_tokens,
246
+ ) ;
247
+ }
248
+ }
249
+
250
+ // Process the replace ranges, starting from the highest
251
+ // start position and working our way back. If have tokens
252
+ // like:
253
+ //
254
+ // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
255
+ //
256
+ // Then we will generate replace ranges for both the
257
+ // `#[cfg(FALSE)] field: bool` and the entire
258
+ // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
259
+ //
260
+ // By starting processing from the replace range with the
261
+ // greatest start position, we ensure that any replace
262
+ // range which encloses another replace range will capture
263
+ // the *replaced* tokens for the inner range, not the
264
+ // original tokens.
265
+ for ( range, target) in replace_ranges. into_iter ( ) . rev ( ) {
266
+ assert ! ( !range. is_empty( ) , "Cannot replace an empty range: {range:?}" ) ;
267
+
268
+ // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s,
269
+ // plus enough `FlatToken::Empty`s to fill up the rest of the range. This
270
+ // keeps the total length of `tokens` constant throughout the replacement
271
+ // process, allowing us to use all of the `ReplaceRanges` entries without
272
+ // adjusting indices.
273
+ let target_len = target. is_some ( ) as usize ;
274
+ tokens. splice (
275
+ ( range. start as usize ) ..( range. end as usize ) ,
276
+ target. into_iter ( ) . map ( |target| FlatToken :: AttrsTarget ( target) ) . chain (
277
+ iter:: repeat ( FlatToken :: Empty ) . take ( range. len ( ) - target_len) ,
278
+ ) ,
279
+ ) ;
280
+ }
281
+ make_attr_token_stream ( tokens. into_iter ( ) , * break_last_token)
282
+ }
283
+ }
284
+ }
285
+ }
286
+ }
287
+
288
+ /// A helper struct used when building an `AttrTokenStream` from a
289
+ /// `LazyAttrTokenStream`. Both delimiter and non-delimited tokens are stored
290
+ /// as `FlatToken::Token`. A vector of `FlatToken`s is then 'parsed' to build
291
+ /// up an `AttrTokenStream` with nested `AttrTokenTree::Delimited` tokens.
292
+ #[ derive( Debug , Clone ) ]
293
+ enum FlatToken {
294
+ /// A token. This holds both delimiter (e.g. '{' and '}') and non-delimiter
295
+ /// tokens.
296
+ Token ( ( Token , Spacing ) ) ,
297
+ /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
298
+ /// directly into the constructed `AttrTokenStream` as an
299
+ /// `AttrTokenTree::AttrsTarget`.
300
+ AttrsTarget ( AttrsTarget ) ,
301
+ /// A special 'empty' token that is ignored during the conversion to an
302
+ /// `AttrTokenStream`. This is used to simplify the handling of replace
303
+ /// ranges.
304
+ Empty ,
305
+ }
306
+
307
+ /// Converts a flattened iterator of tokens (including open and close delimiter
308
+ /// tokens) into an `AttrTokenStream`, creating an `AttrTokenTree::Delimited`
309
+ /// for each matching pair of open and close delims.
310
+ fn make_attr_token_stream (
311
+ iter : impl Iterator < Item = FlatToken > ,
312
+ break_last_token : bool ,
313
+ ) -> AttrTokenStream {
314
+ #[ derive( Debug ) ]
315
+ struct FrameData {
316
+ // This is `None` for the first frame, `Some` for all others.
317
+ open_delim_sp : Option < ( Delimiter , Span , Spacing ) > ,
318
+ inner : Vec < AttrTokenTree > ,
319
+ }
320
+ // The stack always has at least one element. Storing it separately makes for shorter code.
321
+ let mut stack_top = FrameData { open_delim_sp : None , inner : vec ! [ ] } ;
322
+ let mut stack_rest = vec ! [ ] ;
323
+ for flat_token in iter {
324
+ match flat_token {
325
+ FlatToken :: Token ( ( Token { kind : TokenKind :: OpenDelim ( delim) , span } , spacing) ) => {
326
+ stack_rest. push ( mem:: replace (
327
+ & mut stack_top,
328
+ FrameData { open_delim_sp : Some ( ( delim, span, spacing) ) , inner : vec ! [ ] } ,
329
+ ) ) ;
330
+ }
331
+ FlatToken :: Token ( ( Token { kind : TokenKind :: CloseDelim ( delim) , span } , spacing) ) => {
332
+ let frame_data = mem:: replace ( & mut stack_top, stack_rest. pop ( ) . unwrap ( ) ) ;
333
+ let ( open_delim, open_sp, open_spacing) = frame_data. open_delim_sp . unwrap ( ) ;
334
+ assert_eq ! (
335
+ open_delim, delim,
336
+ "Mismatched open/close delims: open={open_delim:?} close={span:?}"
337
+ ) ;
338
+ let dspan = DelimSpan :: from_pair ( open_sp, span) ;
339
+ let dspacing = DelimSpacing :: new ( open_spacing, spacing) ;
340
+ let stream = AttrTokenStream :: new ( frame_data. inner ) ;
341
+ let delimited = AttrTokenTree :: Delimited ( dspan, dspacing, delim, stream) ;
342
+ stack_top. inner . push ( delimited) ;
343
+ }
344
+ FlatToken :: Token ( ( token, spacing) ) => {
345
+ stack_top. inner . push ( AttrTokenTree :: Token ( token, spacing) )
346
+ }
347
+ FlatToken :: AttrsTarget ( target) => {
348
+ stack_top. inner . push ( AttrTokenTree :: AttrsTarget ( target) )
349
+ }
350
+ FlatToken :: Empty => { }
351
+ }
352
+ }
353
+
354
+ if break_last_token {
355
+ let last_token = stack_top. inner . pop ( ) . unwrap ( ) ;
356
+ if let AttrTokenTree :: Token ( last_token, spacing) = last_token {
357
+ let unglued_first = last_token. kind . break_two_token_op ( ) . unwrap ( ) . 0 ;
358
+
359
+ // An 'unglued' token is always two ASCII characters.
360
+ let mut first_span = last_token. span . shrink_to_lo ( ) ;
361
+ first_span = first_span. with_hi ( first_span. lo ( ) + rustc_span:: BytePos ( 1 ) ) ;
362
+
363
+ stack_top
364
+ . inner
365
+ . push ( AttrTokenTree :: Token ( Token :: new ( unglued_first, first_span) , spacing) ) ;
366
+ } else {
367
+ panic ! ( "Unexpected last token {last_token:?}" )
368
+ }
369
+ }
370
+ AttrTokenStream :: new ( stack_top. inner )
371
+ }
165
372
166
373
/// Like `TokenTree`, but for `AttrTokenStream`.
167
374
#[ derive( Clone , Debug , Encodable , Decodable ) ]
@@ -174,6 +381,13 @@ pub enum AttrTokenTree {
174
381
AttrsTarget ( AttrsTarget ) ,
175
382
}
176
383
384
+ /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
385
+ /// information about the tokens for attribute targets. This is used
386
+ /// during expansion to perform early cfg-expansion, and to process attributes
387
+ /// during proc-macro invocations.
388
+ #[ derive( Clone , Debug , Default , Encodable , Decodable ) ]
389
+ pub struct AttrTokenStream ( pub Lrc < Vec < AttrTokenTree > > ) ;
390
+
177
391
impl AttrTokenStream {
178
392
pub fn new ( tokens : Vec < AttrTokenTree > ) -> AttrTokenStream {
179
393
AttrTokenStream ( Lrc :: new ( tokens) )
@@ -720,6 +934,75 @@ impl TokenTreeCursor {
720
934
}
721
935
}
722
936
937
+ /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
938
+ /// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
939
+ /// use this type to emit them as a linear sequence. But a linear sequence is
940
+ /// what the parser expects, for the most part.
941
+ #[ derive( Clone , Debug ) ]
942
+ pub struct TokenCursor {
943
+ // Cursor for the current (innermost) token stream. The delimiters for this
944
+ // token stream are found in `self.stack.last()`; when that is `None` then
945
+ // we are in the outermost token stream which never has delimiters.
946
+ pub tree_cursor : TokenTreeCursor ,
947
+
948
+ // Token streams surrounding the current one. The delimiters for stack[n]'s
949
+ // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
950
+ // because it's the outermost token stream which never has delimiters.
951
+ pub stack : Vec < ( TokenTreeCursor , DelimSpan , DelimSpacing , Delimiter ) > ,
952
+ }
953
+
954
+ impl TokenCursor {
955
+ pub fn next ( & mut self ) -> ( Token , Spacing ) {
956
+ self . inlined_next ( )
957
+ }
958
+
959
+ /// This always-inlined version should only be used on hot code paths.
960
+ #[ inline( always) ]
961
+ pub fn inlined_next ( & mut self ) -> ( Token , Spacing ) {
962
+ loop {
963
+ // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
964
+ // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
965
+ // below can be removed.
966
+ if let Some ( tree) = self . tree_cursor . next_ref ( ) {
967
+ match tree {
968
+ & TokenTree :: Token ( ref token, spacing) => {
969
+ debug_assert ! ( !matches!(
970
+ token. kind,
971
+ token:: OpenDelim ( _) | token:: CloseDelim ( _)
972
+ ) ) ;
973
+ return ( token. clone ( ) , spacing) ;
974
+ }
975
+ & TokenTree :: Delimited ( sp, spacing, delim, ref tts) => {
976
+ let trees = tts. clone ( ) . into_trees ( ) ;
977
+ self . stack . push ( (
978
+ mem:: replace ( & mut self . tree_cursor , trees) ,
979
+ sp,
980
+ spacing,
981
+ delim,
982
+ ) ) ;
983
+ if delim != Delimiter :: Invisible {
984
+ return ( Token :: new ( token:: OpenDelim ( delim) , sp. open ) , spacing. open ) ;
985
+ }
986
+ // No open delimiter to return; continue on to the next iteration.
987
+ }
988
+ } ;
989
+ } else if let Some ( ( tree_cursor, span, spacing, delim) ) = self . stack . pop ( ) {
990
+ // We have exhausted this token stream. Move back to its parent token stream.
991
+ self . tree_cursor = tree_cursor;
992
+ if delim != Delimiter :: Invisible {
993
+ return ( Token :: new ( token:: CloseDelim ( delim) , span. close ) , spacing. close ) ;
994
+ }
995
+ // No close delimiter to return; continue on to the next iteration.
996
+ } else {
997
+ // We have exhausted the outermost token stream. The use of
998
+ // `Spacing::Alone` is arbitrary and immaterial, because the
999
+ // `Eof` token's spacing is never used.
1000
+ return ( Token :: new ( token:: Eof , DUMMY_SP ) , Spacing :: Alone ) ;
1001
+ }
1002
+ }
1003
+ }
1004
+ }
1005
+
723
1006
#[ derive( Debug , Copy , Clone , PartialEq , Encodable , Decodable , HashStable_Generic ) ]
724
1007
pub struct DelimSpan {
725
1008
pub open : Span ,
@@ -765,6 +1048,7 @@ mod size_asserts {
765
1048
static_assert_size ! ( AttrTokenStream , 8 ) ;
766
1049
static_assert_size ! ( AttrTokenTree , 32 ) ;
767
1050
static_assert_size ! ( LazyAttrTokenStream , 8 ) ;
1051
+ static_assert_size ! ( LazyAttrTokenStreamInner , 96 ) ;
768
1052
static_assert_size ! ( Option <LazyAttrTokenStream >, 8 ) ; // must be small, used in many AST nodes
769
1053
static_assert_size ! ( TokenStream , 8 ) ;
770
1054
static_assert_size ! ( TokenTree , 32 ) ;
0 commit comments