Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 491fccf

Browse files
committedJul 18, 2022
proc_macro: stop using a remote object handle for Ident
Doing this for all unicode identifiers would require a dependency on `unicode-normalization` and `rustc_lexer`, which is currently not possible for `proc_macro` due to it being built concurrently with `std` and `core`. Instead, ASCII identifiers are validated locally, and an RPC message is used to validate unicode identifiers when needed. String values are interned on the both the server and client when deserializing, to avoid unnecessary copies and keep Ident cheap to copy and move. This appears to be important for performance. The client-side interner is based roughly on the one from rustc_span, and uses an arena inspired by rustc_arena. RPC messages passing symbols always include the full value. This could potentially be optimized in the future if it is revealed to be a performance bottleneck. Despite now having a relevant implementaion of Display for Ident, ToString is still specialized, as it is a hot-path for this object. The symbol infrastructure will also be used for literals in the next part.
1 parent e0dce6e commit 491fccf

File tree

11 files changed

+441
-114
lines changed

11 files changed

+441
-114
lines changed
 

‎compiler/rustc_expand/src/proc_macro_server.rs

Lines changed: 34 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ use rustc_parse::lexer::nfc_normalize;
1111
use rustc_parse::parse_stream_from_source_str;
1212
use rustc_session::parse::ParseSess;
1313
use rustc_span::def_id::CrateNum;
14-
use rustc_span::symbol::{self, kw, sym, Symbol};
14+
use rustc_span::symbol::{self, sym, Symbol};
1515
use rustc_span::{BytePos, FileName, Pos, SourceFile, Span};
1616

17-
use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Punct, TokenTree};
17+
use pm::bridge::{server, DelimSpan, ExpnGlobals, Group, Ident, Punct, TokenTree};
1818
use pm::{Delimiter, Level, LineColumn};
19+
use std::ascii;
1920
use std::ops::Bound;
20-
use std::{ascii, panic};
2121

2222
trait FromInternal<T> {
2323
fn from_internal(x: T) -> Self;
@@ -50,7 +50,7 @@ impl ToInternal<token::Delimiter> for Delimiter {
5050
}
5151

5252
impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
53-
for Vec<TokenTree<TokenStream, Span, Ident, Literal>>
53+
for Vec<TokenTree<TokenStream, Span, Symbol, Literal>>
5454
{
5555
fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_, '_>)) -> Self {
5656
use rustc_ast::token::*;
@@ -135,13 +135,12 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
135135
Question => op("?"),
136136
SingleQuote => op("'"),
137137

138-
Ident(name, false) if name == kw::DollarCrate => trees.push(TokenTree::Ident(Ident::dollar_crate(span))),
139-
Ident(name, is_raw) => trees.push(TokenTree::Ident(Ident::new(rustc.sess(), name, is_raw, span))),
138+
Ident(sym, is_raw) => trees.push(TokenTree::Ident(Ident { sym, is_raw, span })),
140139
Lifetime(name) => {
141140
let ident = symbol::Ident::new(name, span).without_first_quote();
142141
trees.extend([
143142
TokenTree::Punct(Punct { ch: b'\'', joint: true, span }),
144-
TokenTree::Ident(Ident::new(rustc.sess(), ident.name, false, span)),
143+
TokenTree::Ident(Ident { sym: ident.name, is_raw: false, span }),
145144
]);
146145
}
147146
Literal(lit) => trees.push(TokenTree::Literal(self::Literal { lit, span })),
@@ -170,7 +169,7 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
170169
}
171170

172171
Interpolated(nt) if let NtIdent(ident, is_raw) = *nt => {
173-
trees.push(TokenTree::Ident(Ident::new(rustc.sess(), ident.name, is_raw, ident.span)))
172+
trees.push(TokenTree::Ident(Ident { sym: ident.name, is_raw, span: ident.span }))
174173
}
175174

176175
Interpolated(nt) => {
@@ -200,11 +199,14 @@ impl FromInternal<(TokenStream, &mut Rustc<'_, '_>)>
200199
}
201200
}
202201

203-
impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Ident, Literal> {
202+
impl ToInternal<TokenStream>
203+
for (TokenTree<TokenStream, Span, Symbol, Literal>, &mut Rustc<'_, '_>)
204+
{
204205
fn to_internal(self) -> TokenStream {
205206
use rustc_ast::token::*;
206207

207-
let (ch, joint, span) = match self {
208+
let (tree, rustc) = self;
209+
let (ch, joint, span) = match tree {
208210
TokenTree::Punct(Punct { ch, joint, span }) => (ch, joint, span),
209211
TokenTree::Group(Group { delimiter, stream, span: DelimSpan { open, close, .. } }) => {
210212
return tokenstream::TokenTree::Delimited(
@@ -215,6 +217,7 @@ impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Ident, Literal> {
215217
.into();
216218
}
217219
TokenTree::Ident(self::Ident { sym, is_raw, span }) => {
220+
rustc.sess().symbol_gallery.insert(sym, span);
218221
return tokenstream::TokenTree::token(Ident(sym, is_raw), span).into();
219222
}
220223
TokenTree::Literal(self::Literal {
@@ -289,33 +292,6 @@ impl ToInternal<rustc_errors::Level> for Level {
289292

290293
pub struct FreeFunctions;
291294

292-
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
293-
pub struct Ident {
294-
sym: Symbol,
295-
is_raw: bool,
296-
span: Span,
297-
}
298-
299-
impl Ident {
300-
fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident {
301-
let sym = nfc_normalize(sym.as_str());
302-
let string = sym.as_str();
303-
if !rustc_lexer::is_ident(string) {
304-
panic!("`{:?}` is not a valid identifier", string)
305-
}
306-
if is_raw && !sym.can_be_raw() {
307-
panic!("`{}` cannot be a raw identifier", string);
308-
}
309-
sess.symbol_gallery.insert(sym, span);
310-
Ident { sym, is_raw, span }
311-
}
312-
313-
fn dollar_crate(span: Span) -> Ident {
314-
// `$crate` is accepted as an ident only if it comes from the compiler.
315-
Ident { sym: kw::DollarCrate, is_raw: false, span }
316-
}
317-
}
318-
319295
// FIXME(eddyb) `Literal` should not expose internal `Debug` impls.
320296
#[derive(Clone, Debug)]
321297
pub struct Literal {
@@ -357,12 +333,12 @@ impl<'a, 'b> Rustc<'a, 'b> {
357333
impl server::Types for Rustc<'_, '_> {
358334
type FreeFunctions = FreeFunctions;
359335
type TokenStream = TokenStream;
360-
type Ident = Ident;
361336
type Literal = Literal;
362337
type SourceFile = Lrc<SourceFile>;
363338
type MultiSpan = Vec<Span>;
364339
type Diagnostic = Diagnostic;
365340
type Span = Span;
341+
type Symbol = Symbol;
366342
}
367343

368344
impl server::FreeFunctions for Rustc<'_, '_> {
@@ -453,22 +429,22 @@ impl server::TokenStream for Rustc<'_, '_> {
453429

454430
fn from_token_tree(
455431
&mut self,
456-
tree: TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>,
432+
tree: TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>,
457433
) -> Self::TokenStream {
458-
tree.to_internal()
434+
(tree, &mut *self).to_internal()
459435
}
460436

461437
fn concat_trees(
462438
&mut self,
463439
base: Option<Self::TokenStream>,
464-
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>>,
440+
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>>,
465441
) -> Self::TokenStream {
466442
let mut builder = tokenstream::TokenStreamBuilder::new();
467443
if let Some(base) = base {
468444
builder.push(base);
469445
}
470446
for tree in trees {
471-
builder.push(tree.to_internal());
447+
builder.push((tree, &mut *self).to_internal());
472448
}
473449
builder.build()
474450
}
@@ -491,25 +467,11 @@ impl server::TokenStream for Rustc<'_, '_> {
491467
fn into_trees(
492468
&mut self,
493469
stream: Self::TokenStream,
494-
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>> {
470+
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>> {
495471
FromInternal::from_internal((stream, self))
496472
}
497473
}
498474

499-
impl server::Ident for Rustc<'_, '_> {
500-
fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident {
501-
Ident::new(self.sess(), Symbol::intern(string), is_raw, span)
502-
}
503-
504-
fn span(&mut self, ident: Self::Ident) -> Self::Span {
505-
ident.span
506-
}
507-
508-
fn with_span(&mut self, ident: Self::Ident, span: Self::Span) -> Self::Ident {
509-
Ident { span, ..ident }
510-
}
511-
}
512-
513475
impl server::Literal for Rustc<'_, '_> {
514476
fn from_str(&mut self, s: &str) -> Result<Self::Literal, ()> {
515477
let name = FileName::proc_macro_source_code(s);
@@ -812,6 +774,13 @@ impl server::Span for Rustc<'_, '_> {
812774
}
813775
}
814776

777+
impl server::Symbol for Rustc<'_, '_> {
778+
fn normalize_and_validate_ident(&mut self, string: &str) -> Result<Self::Symbol, ()> {
779+
let sym = nfc_normalize(string);
780+
if rustc_lexer::is_ident(sym.as_str()) { Ok(sym) } else { Err(()) }
781+
}
782+
}
783+
815784
impl server::Server for Rustc<'_, '_> {
816785
fn globals(&mut self) -> ExpnGlobals<Self::Span> {
817786
ExpnGlobals {
@@ -820,4 +789,12 @@ impl server::Server for Rustc<'_, '_> {
820789
mixed_site: self.mixed_site,
821790
}
822791
}
792+
793+
fn intern_symbol(string: &str) -> Self::Symbol {
794+
Symbol::intern(string)
795+
}
796+
797+
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
798+
f(&symbol.as_str())
799+
}
823800
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
2+
//!
3+
//! This is unfortunately a minimal re-implementation rather than a dependency
4+
//! as it is difficult to depend on crates from within `proc_macro`, due to it
5+
//! being built at the same time as `std`.
6+
7+
use std::cell::{Cell, RefCell};
8+
use std::cmp;
9+
use std::mem::MaybeUninit;
10+
use std::ops::Range;
11+
use std::ptr;
12+
use std::slice;
13+
use std::str;
14+
15+
// The arenas start with PAGE-sized chunks, and then each new chunk is twice as
16+
// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon
17+
// we stop growing. This scales well, from arenas that are barely used up to
18+
// arenas that are used for 100s of MiBs. Note also that the chosen sizes match
19+
// the usual sizes of pages and huge pages on Linux.
20+
const PAGE: usize = 4096;
21+
const HUGE_PAGE: usize = 2 * 1024 * 1024;
22+
23+
/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
24+
///
25+
/// This is unfortunately a complete re-implementation rather than a dependency
26+
/// as it is difficult to depend on crates from within `proc_macro`, due to it
27+
/// being built at the same time as `std`.
28+
///
29+
/// This arena doesn't have support for allocating anything other than byte
30+
/// slices, as that is all that is necessary.
31+
pub(crate) struct Arena {
32+
start: Cell<*mut MaybeUninit<u8>>,
33+
end: Cell<*mut MaybeUninit<u8>>,
34+
chunks: RefCell<Vec<Box<[MaybeUninit<u8>]>>>,
35+
}
36+
37+
impl Arena {
38+
pub(crate) fn new() -> Self {
39+
Arena {
40+
start: Cell::new(ptr::null_mut()),
41+
end: Cell::new(ptr::null_mut()),
42+
chunks: RefCell::new(Vec::new()),
43+
}
44+
}
45+
46+
/// Add a new chunk with at least `additional` free bytes.
47+
#[inline(never)]
48+
#[cold]
49+
fn grow(&self, additional: usize) {
50+
let mut chunks = self.chunks.borrow_mut();
51+
let mut new_cap;
52+
if let Some(last_chunk) = chunks.last_mut() {
53+
// If the previous chunk's len is less than HUGE_PAGE
54+
// bytes, then this chunk will be least double the previous
55+
// chunk's size.
56+
new_cap = last_chunk.len().min(HUGE_PAGE / 2);
57+
new_cap *= 2;
58+
} else {
59+
new_cap = PAGE;
60+
}
61+
// Also ensure that this chunk can fit `additional`.
62+
new_cap = cmp::max(additional, new_cap);
63+
64+
let mut chunk = Box::new_uninit_slice(new_cap);
65+
let Range { start, end } = chunk.as_mut_ptr_range();
66+
self.start.set(start);
67+
self.end.set(end);
68+
chunks.push(chunk);
69+
}
70+
71+
/// Allocates a byte slice with specified size from the current memory
72+
/// chunk. Returns `None` if there is no free space left to satisfy the
73+
/// request.
74+
fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit<u8>]> {
75+
let start = self.start.get().addr();
76+
let old_end = self.end.get();
77+
let end = old_end.addr();
78+
79+
let new_end = end.checked_sub(bytes)?;
80+
if start <= new_end {
81+
let new_end = old_end.with_addr(new_end);
82+
self.end.set(new_end);
83+
// SAFETY: `bytes` bytes starting at `new_end` were just reserved.
84+
Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) })
85+
} else {
86+
None
87+
}
88+
}
89+
90+
fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit<u8>] {
91+
if bytes == 0 {
92+
return &mut [];
93+
}
94+
95+
loop {
96+
if let Some(a) = self.alloc_raw_without_grow(bytes) {
97+
break a;
98+
}
99+
// No free space left. Allocate a new chunk to satisfy the request.
100+
// On failure the grow will panic or abort.
101+
self.grow(bytes);
102+
}
103+
}
104+
105+
pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str {
106+
let alloc = self.alloc_raw(string.len());
107+
let bytes = MaybeUninit::write_slice(alloc, string.as_bytes());
108+
109+
// SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
110+
// and immediately convert the clone back to `&str`.
111+
unsafe { str::from_utf8_unchecked_mut(bytes) }
112+
}
113+
}

‎library/proc_macro/src/bridge/client.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ define_handles! {
181181
Diagnostic,
182182

183183
'interned:
184-
Ident,
185184
Span,
186185
}
187186

@@ -242,6 +241,8 @@ impl fmt::Debug for Span {
242241
}
243242
}
244243

244+
pub(crate) use super::symbol::Symbol;
245+
245246
macro_rules! define_client_side {
246247
($($name:ident {
247248
$(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)*
@@ -405,6 +406,9 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
405406
panic::catch_unwind(panic::AssertUnwindSafe(|| {
406407
maybe_install_panic_hook(force_show_panics);
407408

409+
// Make sure the symbol store is empty before decoding inputs.
410+
Symbol::invalidate_all();
411+
408412
let reader = &mut &buf[..];
409413
let (globals, input) = <(ExpnGlobals<Span>, A)>::decode(reader, &mut ());
410414

@@ -438,6 +442,10 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
438442
buf.clear();
439443
Err::<(), _>(e).encode(&mut buf, &mut ());
440444
});
445+
446+
// Now that a response has been serialized, invalidate all symbols
447+
// registered with the interner.
448+
Symbol::invalidate_all();
441449
buf
442450
}
443451

‎library/proc_macro/src/bridge/mod.rs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,24 +65,19 @@ macro_rules! with_api {
6565
fn from_str(src: &str) -> $S::TokenStream;
6666
fn to_string($self: &$S::TokenStream) -> String;
6767
fn from_token_tree(
68-
tree: TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>,
68+
tree: TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>,
6969
) -> $S::TokenStream;
7070
fn concat_trees(
7171
base: Option<$S::TokenStream>,
72-
trees: Vec<TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>>,
72+
trees: Vec<TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>>,
7373
) -> $S::TokenStream;
7474
fn concat_streams(
7575
base: Option<$S::TokenStream>,
7676
streams: Vec<$S::TokenStream>,
7777
) -> $S::TokenStream;
7878
fn into_trees(
7979
$self: $S::TokenStream
80-
) -> Vec<TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>>;
81-
},
82-
Ident {
83-
fn new(string: &str, span: $S::Span, is_raw: bool) -> $S::Ident;
84-
fn span($self: $S::Ident) -> $S::Span;
85-
fn with_span($self: $S::Ident, span: $S::Span) -> $S::Ident;
80+
) -> Vec<TokenTree<$S::TokenStream, $S::Span, $S::Symbol, $S::Literal>>;
8681
},
8782
Literal {
8883
fn drop($self: $S::Literal);
@@ -146,6 +141,9 @@ macro_rules! with_api {
146141
fn save_span($self: $S::Span) -> usize;
147142
fn recover_proc_macro_span(id: usize) -> $S::Span;
148143
},
144+
Symbol {
145+
fn normalize_and_validate_ident(string: &str) -> Result<$S::Symbol, ()>;
146+
},
149147
}
150148
};
151149
}
@@ -170,6 +168,8 @@ macro_rules! reverse_decode {
170168
}
171169
}
172170

171+
#[allow(unsafe_code)]
172+
mod arena;
173173
#[allow(unsafe_code)]
174174
mod buffer;
175175
#[forbid(unsafe_code)]
@@ -189,6 +189,8 @@ mod scoped_cell;
189189
mod selfless_reify;
190190
#[forbid(unsafe_code)]
191191
pub mod server;
192+
#[allow(unsafe_code)]
193+
mod symbol;
192194

193195
use buffer::Buffer;
194196
pub use rpc::PanicMessage;
@@ -466,16 +468,25 @@ pub struct Punct<Span> {
466468

467469
compound_traits!(struct Punct<Span> { ch, joint, span });
468470

471+
#[derive(Copy, Clone, Eq, PartialEq)]
472+
pub struct Ident<Span, Symbol> {
473+
pub sym: Symbol,
474+
pub is_raw: bool,
475+
pub span: Span,
476+
}
477+
478+
compound_traits!(struct Ident<Span, Symbol> { sym, is_raw, span });
479+
469480
#[derive(Clone)]
470-
pub enum TokenTree<TokenStream, Span, Ident, Literal> {
481+
pub enum TokenTree<TokenStream, Span, Symbol, Literal> {
471482
Group(Group<TokenStream, Span>),
472483
Punct(Punct<Span>),
473-
Ident(Ident),
484+
Ident(Ident<Span, Symbol>),
474485
Literal(Literal),
475486
}
476487

477488
compound_traits!(
478-
enum TokenTree<TokenStream, Span, Ident, Literal> {
489+
enum TokenTree<TokenStream, Span, Symbol, Literal> {
479490
Group(tt),
480491
Punct(tt),
481492
Ident(tt),

‎library/proc_macro/src/bridge/server.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ use super::client::HandleStore;
88
pub trait Types {
99
type FreeFunctions: 'static;
1010
type TokenStream: 'static + Clone;
11-
type Ident: 'static + Copy + Eq + Hash;
1211
type Literal: 'static + Clone;
1312
type SourceFile: 'static + Clone;
1413
type MultiSpan: 'static;
1514
type Diagnostic: 'static;
1615
type Span: 'static + Copy + Eq + Hash;
16+
type Symbol: 'static;
1717
}
1818

1919
/// Declare an associated fn of one of the traits below, adding necessary
@@ -38,6 +38,12 @@ macro_rules! declare_server_traits {
3838

3939
pub trait Server: Types $(+ $name)* {
4040
fn globals(&mut self) -> ExpnGlobals<Self::Span>;
41+
42+
/// Intern a symbol received from RPC
43+
fn intern_symbol(ident: &str) -> Self::Symbol;
44+
45+
/// Recover the string value of a symbol, and invoke a callback with it.
46+
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str));
4147
}
4248
}
4349
}
@@ -49,6 +55,12 @@ impl<S: Server> Server for MarkedTypes<S> {
4955
fn globals(&mut self) -> ExpnGlobals<Self::Span> {
5056
<_>::mark(Server::globals(&mut self.0))
5157
}
58+
fn intern_symbol(ident: &str) -> Self::Symbol {
59+
<_>::mark(S::intern_symbol(ident))
60+
}
61+
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
62+
S::with_symbol_string(symbol.unmark(), f)
63+
}
5264
}
5365

5466
macro_rules! define_mark_types_impls {
@@ -81,11 +93,13 @@ macro_rules! define_dispatcher_impl {
8193
pub trait DispatcherTrait {
8294
// HACK(eddyb) these are here to allow `Self::$name` to work below.
8395
$(type $name;)*
96+
8497
fn dispatch(&mut self, buf: Buffer) -> Buffer;
8598
}
8699

87100
impl<S: Server> DispatcherTrait for Dispatcher<MarkedTypes<S>> {
88101
$(type $name = <MarkedTypes<S> as Types>::$name;)*
102+
89103
fn dispatch(&mut self, mut buf: Buffer) -> Buffer {
90104
let Dispatcher { handle_store, server } = self;
91105

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
//! Client-side interner used for symbols.
2+
//!
3+
//! This is roughly based on the symbol interner from `rustc_span` and the
4+
//! DroplessArena from `rustc_arena`. It is unfortunately a complete
5+
//! copy/re-implementation rather than a dependency as it is difficult to depend
6+
//! on crates from within `proc_macro`, due to it being built at the same time
7+
//! as `std`.
8+
//!
9+
//! If at some point in the future it becomes easier to add dependencies to
10+
//! proc_macro, this module should probably be removed or simplified.
11+
12+
use std::cell::RefCell;
13+
use std::num::NonZeroU32;
14+
use std::str;
15+
16+
use super::*;
17+
18+
/// Handle for a symbol string stored within the Interner.
19+
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
20+
pub struct Symbol(NonZeroU32);
21+
22+
impl !Send for Symbol {}
23+
impl !Sync for Symbol {}
24+
25+
impl Symbol {
26+
/// Intern a new `Symbol`
27+
pub(crate) fn new(string: &str) -> Self {
28+
INTERNER.with_borrow_mut(|i| i.intern(string))
29+
}
30+
31+
/// Create a new `Symbol` for an identifier.
32+
///
33+
/// Validates and normalizes before converting it to a symbol.
34+
pub(crate) fn new_ident(string: &str, is_raw: bool) -> Self {
35+
// Fast-path: check if this is a valid ASCII identifier
36+
if Self::is_valid_ascii_ident(string.as_bytes()) {
37+
if is_raw && !Self::can_be_raw(string) {
38+
panic!("`{}` cannot be a raw identifier", string);
39+
}
40+
return Self::new(string);
41+
}
42+
43+
// Slow-path: If the string is already ASCII we're done, otherwise ask
44+
// our server to do this for us over RPC.
45+
// We don't need to check for identifiers which can't be raw here,
46+
// because all of them are ASCII.
47+
if string.is_ascii() {
48+
Err(())
49+
} else {
50+
client::Symbol::normalize_and_validate_ident(string)
51+
}
52+
.unwrap_or_else(|_| panic!("`{:?}` is not a valid identifier", string))
53+
}
54+
55+
/// Run a callback with the symbol's string value.
56+
pub(crate) fn with<R>(self, f: impl FnOnce(&str) -> R) -> R {
57+
INTERNER.with_borrow(|i| f(i.get(self)))
58+
}
59+
60+
/// Clear out the thread-local symbol interner, making all previously
61+
/// created symbols invalid such that `with` will panic when called on them.
62+
pub(crate) fn invalidate_all() {
63+
INTERNER.with_borrow_mut(|i| i.clear());
64+
}
65+
66+
/// Check if the ident is a valid ASCII identifier.
67+
///
68+
/// This is a short-circuit which is cheap to implement within the
69+
/// proc-macro client to avoid RPC when creating simple idents, but may
70+
/// return `false` for a valid identifier if it contains non-ASCII
71+
/// characters.
72+
fn is_valid_ascii_ident(bytes: &[u8]) -> bool {
73+
matches!(bytes.first(), Some(b'_' | b'a'..=b'z' | b'A'..=b'Z'))
74+
&& bytes[1..]
75+
.iter()
76+
.all(|b| matches!(b, b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9'))
77+
}
78+
79+
// Mimics the behaviour of `Symbol::can_be_raw` from `rustc_span`
80+
fn can_be_raw(string: &str) -> bool {
81+
match string {
82+
"_" | "super" | "self" | "Self" | "crate" => false,
83+
_ => true,
84+
}
85+
}
86+
}
87+
88+
impl fmt::Debug for Symbol {
89+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90+
self.with(|s| fmt::Debug::fmt(s, f))
91+
}
92+
}
93+
94+
impl ToString for Symbol {
95+
fn to_string(&self) -> String {
96+
self.with(|s| s.to_owned())
97+
}
98+
}
99+
100+
impl fmt::Display for Symbol {
101+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102+
self.with(|s| fmt::Display::fmt(s, f))
103+
}
104+
}
105+
106+
impl<S> Encode<S> for Symbol {
107+
fn encode(self, w: &mut Writer, s: &mut S) {
108+
self.with(|sym| sym.encode(w, s))
109+
}
110+
}
111+
112+
impl<S: server::Server> DecodeMut<'_, '_, client::HandleStore<server::MarkedTypes<S>>>
113+
for Marked<S::Symbol, Symbol>
114+
{
115+
fn decode(r: &mut Reader<'_>, s: &mut client::HandleStore<server::MarkedTypes<S>>) -> Self {
116+
Mark::mark(S::intern_symbol(<&str>::decode(r, s)))
117+
}
118+
}
119+
120+
impl<S: server::Server> Encode<client::HandleStore<server::MarkedTypes<S>>>
121+
for Marked<S::Symbol, Symbol>
122+
{
123+
fn encode(self, w: &mut Writer, s: &mut client::HandleStore<server::MarkedTypes<S>>) {
124+
S::with_symbol_string(&self.unmark(), |sym| sym.encode(w, s))
125+
}
126+
}
127+
128+
impl<S> DecodeMut<'_, '_, S> for Symbol {
129+
fn decode(r: &mut Reader<'_>, s: &mut S) -> Self {
130+
Symbol::new(<&str>::decode(r, s))
131+
}
132+
}
133+
134+
thread_local! {
135+
static INTERNER: RefCell<Interner> = RefCell::new(Interner {
136+
arena: arena::Arena::new(),
137+
names: fxhash::FxHashMap::default(),
138+
strings: Vec::new(),
139+
// Start with a base of 1 to make sure that `NonZeroU32` works.
140+
sym_base: NonZeroU32::new(1).unwrap(),
141+
});
142+
}
143+
144+
/// Basic interner for a `Symbol`, inspired by the one in `rustc_span`.
145+
struct Interner {
146+
arena: arena::Arena,
147+
// SAFETY: These `'static` lifetimes are actually references to data owned
148+
// by the Arena. This is safe, as we never return them as static references
149+
// from `Interner`.
150+
names: fxhash::FxHashMap<&'static str, Symbol>,
151+
strings: Vec<&'static str>,
152+
// The offset to apply to symbol names stored in the interner. This is used
153+
// to ensure that symbol names are not re-used after the interner is
154+
// cleared.
155+
sym_base: NonZeroU32,
156+
}
157+
158+
impl Interner {
159+
fn intern(&mut self, string: &str) -> Symbol {
160+
if let Some(&name) = self.names.get(string) {
161+
return name;
162+
}
163+
164+
let name = Symbol(
165+
self.sym_base
166+
.checked_add(self.strings.len() as u32)
167+
.expect("`proc_macro` symbol name overflow"),
168+
);
169+
170+
let string: &str = self.arena.alloc_str(string);
171+
172+
// SAFETY: we can extend the arena allocation to `'static` because we
173+
// only access these while the arena is still alive.
174+
let string: &'static str = unsafe { &*(string as *const str) };
175+
self.strings.push(string);
176+
self.names.insert(string, name);
177+
name
178+
}
179+
180+
/// Read a symbol's value from the store while it is held.
181+
fn get(&self, symbol: Symbol) -> &str {
182+
// NOTE: Subtract out the offset which was added to make the symbol
183+
// nonzero and prevent symbol name re-use.
184+
let name = symbol
185+
.0
186+
.get()
187+
.checked_sub(self.sym_base.get())
188+
.expect("use-after-free of `proc_macro` symbol");
189+
self.strings[name as usize]
190+
}
191+
192+
/// Clear all symbols from the store, invalidating them such that `get` will
193+
/// panic if they are accessed in the future.
194+
fn clear(&mut self) {
195+
// NOTE: Be careful not to panic here, as we may be called on the client
196+
// when a `catch_unwind` isn't installed.
197+
self.sym_base = self.sym_base.saturating_add(self.strings.len() as u32);
198+
self.names.clear();
199+
self.strings.clear();
200+
201+
// SAFETY: This is cleared after the names and strings tables are
202+
// cleared out, so no references into the arena should remain.
203+
self.arena = arena::Arena::new();
204+
}
205+
}

‎library/proc_macro/src/lib.rs

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,14 @@
2424
#![feature(staged_api)]
2525
#![feature(allow_internal_unstable)]
2626
#![feature(decl_macro)]
27+
#![feature(local_key_cell_methods)]
28+
#![feature(maybe_uninit_write_slice)]
2729
#![feature(negative_impls)]
30+
#![feature(new_uninit)]
2831
#![feature(restricted_std)]
2932
#![feature(rustc_attrs)]
3033
#![feature(min_specialization)]
34+
#![feature(strict_provenance)]
3135
#![recursion_limit = "256"]
3236

3337
#[unstable(feature = "proc_macro_internals", issue = "27812")]
@@ -214,7 +218,7 @@ fn tree_to_bridge_tree(
214218
) -> bridge::TokenTree<
215219
bridge::client::TokenStream,
216220
bridge::client::Span,
217-
bridge::client::Ident,
221+
bridge::client::Symbol,
218222
bridge::client::Literal,
219223
> {
220224
match tree {
@@ -240,7 +244,7 @@ struct ConcatTreesHelper {
240244
bridge::TokenTree<
241245
bridge::client::TokenStream,
242246
bridge::client::Span,
243-
bridge::client::Ident,
247+
bridge::client::Symbol,
244248
bridge::client::Literal,
245249
>,
246250
>,
@@ -367,7 +371,7 @@ pub mod token_stream {
367371
bridge::TokenTree<
368372
bridge::client::TokenStream,
369373
bridge::client::Span,
370-
bridge::client::Ident,
374+
bridge::client::Symbol,
371375
bridge::client::Literal,
372376
>,
373377
>,
@@ -1048,7 +1052,7 @@ impl PartialEq<Punct> for char {
10481052
/// An identifier (`ident`).
10491053
#[derive(Clone)]
10501054
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
1051-
pub struct Ident(bridge::client::Ident);
1055+
pub struct Ident(bridge::Ident<bridge::client::Span, bridge::client::Symbol>);
10521056

10531057
impl Ident {
10541058
/// Creates a new `Ident` with the given `string` as well as the specified
@@ -1072,7 +1076,11 @@ impl Ident {
10721076
/// tokens, requires a `Span` to be specified at construction.
10731077
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
10741078
pub fn new(string: &str, span: Span) -> Ident {
1075-
Ident(bridge::client::Ident::new(string, span.0, false))
1079+
Ident(bridge::Ident {
1080+
sym: bridge::client::Symbol::new_ident(string, false),
1081+
is_raw: false,
1082+
span: span.0,
1083+
})
10761084
}
10771085

10781086
/// Same as `Ident::new`, but creates a raw identifier (`r#ident`).
@@ -1081,38 +1089,45 @@ impl Ident {
10811089
/// (e.g. `self`, `super`) are not supported, and will cause a panic.
10821090
#[stable(feature = "proc_macro_raw_ident", since = "1.47.0")]
10831091
pub fn new_raw(string: &str, span: Span) -> Ident {
1084-
Ident(bridge::client::Ident::new(string, span.0, true))
1092+
Ident(bridge::Ident {
1093+
sym: bridge::client::Symbol::new_ident(string, true),
1094+
is_raw: true,
1095+
span: span.0,
1096+
})
10851097
}
10861098

10871099
/// Returns the span of this `Ident`, encompassing the entire string returned
1088-
/// by [`to_string`](Self::to_string).
1100+
/// by [`to_string`](ToString::to_string).
10891101
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
10901102
pub fn span(&self) -> Span {
1091-
Span(self.0.span())
1103+
Span(self.0.span)
10921104
}
10931105

10941106
/// Configures the span of this `Ident`, possibly changing its hygiene context.
10951107
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
10961108
pub fn set_span(&mut self, span: Span) {
1097-
self.0 = self.0.with_span(span.0);
1109+
self.0.span = span.0;
10981110
}
10991111
}
11001112

1101-
// N.B., the bridge only provides `to_string`, implement `fmt::Display`
1102-
// based on it (the reverse of the usual relationship between the two).
1103-
#[stable(feature = "proc_macro_lib", since = "1.15.0")]
1113+
/// Converts the identifier to a string that should be losslessly convertible
1114+
/// back into the same identifier.
1115+
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
11041116
impl ToString for Ident {
11051117
fn to_string(&self) -> String {
1106-
TokenStream::from(TokenTree::from(self.clone())).to_string()
1118+
self.0.sym.with(|sym| if self.0.is_raw { ["r#", sym].concat() } else { sym.to_owned() })
11071119
}
11081120
}
11091121

1110-
/// Prints the identifier as a string that should be losslessly convertible
1111-
/// back into the same identifier.
1122+
/// Prints the identifier as a string that should be losslessly convertible back
1123+
/// into the same identifier.
11121124
#[stable(feature = "proc_macro_lib2", since = "1.29.0")]
11131125
impl fmt::Display for Ident {
11141126
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1115-
f.write_str(&self.to_string())
1127+
if self.0.is_raw {
1128+
f.write_str("r#")?;
1129+
}
1130+
fmt::Display::fmt(&self.0.sym, f)
11161131
}
11171132
}
11181133

‎src/test/ui/proc-macro/invalid-punct-ident-2.rs

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,9 @@
11
// aux-build:invalid-punct-ident.rs
2-
// rustc-env:RUST_BACKTRACE=0
3-
4-
// FIXME https://github.com/rust-lang/rust/issues/59998
5-
// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> ""
6-
// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> ""
7-
// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> ""
8-
// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> ""
9-
// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> ""
10-
// normalize-stderr-test "note: compiler flags.*\n\n" -> ""
11-
// normalize-stderr-test "note: rustc.*running on.*\n\n" -> ""
12-
// normalize-stderr-test "query stack during panic:\n" -> ""
13-
// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> ""
14-
// normalize-stderr-test "end of query stack\n" -> ""
2+
// ignore-stage1
3+
// only-linux
4+
//
5+
// FIXME: This should be a normal (stage1, all platforms) test in
6+
// src/test/ui/proc-macro once issue #59998 is fixed.
157

168
#[macro_use]
179
extern crate invalid_punct_ident;

‎src/test/ui/proc-macro/invalid-punct-ident-2.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
error: proc macro panicked
2-
--> $DIR/invalid-punct-ident-2.rs:19:1
2+
--> $DIR/invalid-punct-ident-2.rs:11:1
33
|
44
LL | invalid_ident!();
55
| ^^^^^^^^^^^^^^^^

‎src/test/ui/proc-macro/invalid-punct-ident-3.rs

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,9 @@
11
// aux-build:invalid-punct-ident.rs
2-
// rustc-env:RUST_BACKTRACE=0
3-
4-
// FIXME https://github.com/rust-lang/rust/issues/59998
5-
// normalize-stderr-test "thread.*panicked.*proc_macro_server.rs.*\n" -> ""
6-
// normalize-stderr-test "note:.*RUST_BACKTRACE=1.*\n" -> ""
7-
// normalize-stderr-test "\nerror: internal compiler error.*\n\n" -> ""
8-
// normalize-stderr-test "note:.*unexpectedly panicked.*\n\n" -> ""
9-
// normalize-stderr-test "note: we would appreciate a bug report.*\n\n" -> ""
10-
// normalize-stderr-test "note: compiler flags.*\n\n" -> ""
11-
// normalize-stderr-test "note: rustc.*running on.*\n\n" -> ""
12-
// normalize-stderr-test "query stack during panic:\n" -> ""
13-
// normalize-stderr-test "we're just showing a limited slice of the query stack\n" -> ""
14-
// normalize-stderr-test "end of query stack\n" -> ""
2+
// ignore-stage1
3+
// only-linux
4+
//
5+
// FIXME: This should be a normal (stage1, all platforms) test in
6+
// src/test/ui/proc-macro once issue #59998 is fixed.
157

168
#[macro_use]
179
extern crate invalid_punct_ident;

‎src/test/ui/proc-macro/invalid-punct-ident-3.stderr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
error: proc macro panicked
2-
--> $DIR/invalid-punct-ident-3.rs:19:1
2+
--> $DIR/invalid-punct-ident-3.rs:11:1
33
|
44
LL | invalid_raw_ident!();
55
| ^^^^^^^^^^^^^^^^^^^^

0 commit comments

Comments
 (0)
Please sign in to comment.