Skip to content

Commit aa98b25

Browse files
committed
auto merge of #16477 : pnkfelix/rust/fsk-quotstx, r=brson
quote_expr macro: embed Ident using special encoding that retains hygiene state. Fix #15750, #15962
2 parents 28b5e45 + 9434920 commit aa98b25

File tree

9 files changed

+474
-31
lines changed

9 files changed

+474
-31
lines changed

src/libsyntax/ast.rs

+6
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@ impl Ident {
5555
pub fn as_str<'a>(&'a self) -> &'a str {
5656
self.name.as_str()
5757
}
58+
59+
pub fn encode_with_hygiene(&self) -> String {
60+
format!("\x00name_{:u},ctxt_{:u}\x00",
61+
self.name.uint(),
62+
self.ctxt)
63+
}
5864
}
5965

6066
impl Show for Ident {

src/libsyntax/ext/quote.rs

+96-3
Original file line numberDiff line numberDiff line change
@@ -97,20 +97,39 @@ pub mod rt {
9797
fn to_source(&self) -> String;
9898
}
9999

100+
// FIXME (Issue #16472): This should go away after ToToken impls
101+
// are revised to go directly to token-trees.
102+
trait ToSourceWithHygiene : ToSource {
103+
// Takes a thing and generates a string containing rust code
104+
// for it, encoding Idents as special byte sequences to
105+
// maintain hygiene across serialization and deserialization.
106+
fn to_source_with_hygiene(&self) -> String;
107+
}
108+
100109
macro_rules! impl_to_source(
101110
(Gc<$t:ty>, $pp:ident) => (
102111
impl ToSource for Gc<$t> {
103112
fn to_source(&self) -> String {
104113
pprust::$pp(&**self)
105114
}
106115
}
116+
impl ToSourceWithHygiene for Gc<$t> {
117+
fn to_source_with_hygiene(&self) -> String {
118+
pprust::with_hygiene::$pp(&**self)
119+
}
120+
}
107121
);
108122
($t:ty, $pp:ident) => (
109123
impl ToSource for $t {
110124
fn to_source(&self) -> String {
111125
pprust::$pp(self)
112126
}
113127
}
128+
impl ToSourceWithHygiene for $t {
129+
fn to_source_with_hygiene(&self) -> String {
130+
pprust::with_hygiene::$pp(self)
131+
}
132+
}
114133
);
115134
)
116135

@@ -122,13 +141,28 @@ pub mod rt {
122141
.to_string()
123142
}
124143

144+
fn slice_to_source_with_hygiene<'a, T: ToSourceWithHygiene>(
145+
sep: &'static str, xs: &'a [T]) -> String {
146+
xs.iter()
147+
.map(|i| i.to_source_with_hygiene())
148+
.collect::<Vec<String>>()
149+
.connect(sep)
150+
.to_string()
151+
}
152+
125153
macro_rules! impl_to_source_slice(
126154
($t:ty, $sep:expr) => (
127155
impl<'a> ToSource for &'a [$t] {
128156
fn to_source(&self) -> String {
129157
slice_to_source($sep, *self)
130158
}
131159
}
160+
161+
impl<'a> ToSourceWithHygiene for &'a [$t] {
162+
fn to_source_with_hygiene(&self) -> String {
163+
slice_to_source_with_hygiene($sep, *self)
164+
}
165+
}
132166
)
133167
)
134168

@@ -138,6 +172,12 @@ pub mod rt {
138172
}
139173
}
140174

175+
impl ToSourceWithHygiene for ast::Ident {
176+
fn to_source_with_hygiene(&self) -> String {
177+
self.encode_with_hygiene()
178+
}
179+
}
180+
141181
impl_to_source!(ast::Ty, ty_to_string)
142182
impl_to_source!(ast::Block, block_to_string)
143183
impl_to_source!(ast::Arg, arg_to_string)
@@ -156,6 +196,11 @@ pub mod rt {
156196
pprust::attribute_to_string(&dummy_spanned(*self))
157197
}
158198
}
199+
impl ToSourceWithHygiene for ast::Attribute_ {
200+
fn to_source_with_hygiene(&self) -> String {
201+
self.to_source()
202+
}
203+
}
159204

160205
impl<'a> ToSource for &'a str {
161206
fn to_source(&self) -> String {
@@ -164,26 +209,46 @@ pub mod rt {
164209
pprust::lit_to_string(&lit)
165210
}
166211
}
212+
impl<'a> ToSourceWithHygiene for &'a str {
213+
fn to_source_with_hygiene(&self) -> String {
214+
self.to_source()
215+
}
216+
}
167217

168218
impl ToSource for () {
169219
fn to_source(&self) -> String {
170220
"()".to_string()
171221
}
172222
}
223+
impl ToSourceWithHygiene for () {
224+
fn to_source_with_hygiene(&self) -> String {
225+
self.to_source()
226+
}
227+
}
173228

174229
impl ToSource for bool {
175230
fn to_source(&self) -> String {
176231
let lit = dummy_spanned(ast::LitBool(*self));
177232
pprust::lit_to_string(&lit)
178233
}
179234
}
235+
impl ToSourceWithHygiene for bool {
236+
fn to_source_with_hygiene(&self) -> String {
237+
self.to_source()
238+
}
239+
}
180240

181241
impl ToSource for char {
182242
fn to_source(&self) -> String {
183243
let lit = dummy_spanned(ast::LitChar(*self));
184244
pprust::lit_to_string(&lit)
185245
}
186246
}
247+
impl ToSourceWithHygiene for char {
248+
fn to_source_with_hygiene(&self) -> String {
249+
self.to_source()
250+
}
251+
}
187252

188253
macro_rules! impl_to_source_int(
189254
(signed, $t:ty, $tag:ident) => (
@@ -194,6 +259,11 @@ pub mod rt {
194259
pprust::lit_to_string(&dummy_spanned(lit))
195260
}
196261
}
262+
impl ToSourceWithHygiene for $t {
263+
fn to_source_with_hygiene(&self) -> String {
264+
self.to_source()
265+
}
266+
}
197267
);
198268
(unsigned, $t:ty, $tag:ident) => (
199269
impl ToSource for $t {
@@ -202,6 +272,11 @@ pub mod rt {
202272
pprust::lit_to_string(&dummy_spanned(lit))
203273
}
204274
}
275+
impl ToSourceWithHygiene for $t {
276+
fn to_source_with_hygiene(&self) -> String {
277+
self.to_source()
278+
}
279+
}
205280
);
206281
)
207282

@@ -223,7 +298,7 @@ pub mod rt {
223298
($t:ty) => (
224299
impl ToTokens for $t {
225300
fn to_tokens(&self, cx: &ExtCtxt) -> Vec<TokenTree> {
226-
cx.parse_tts(self.to_source())
301+
cx.parse_tts_with_hygiene(self.to_source_with_hygiene())
227302
}
228303
}
229304
)
@@ -233,7 +308,7 @@ pub mod rt {
233308
($t:ty) => (
234309
impl<'a> ToTokens for $t {
235310
fn to_tokens(&self, cx: &ExtCtxt) -> Vec<TokenTree> {
236-
cx.parse_tts(self.to_source())
311+
cx.parse_tts_with_hygiene(self.to_source_with_hygiene())
237312
}
238313
}
239314
)
@@ -272,7 +347,13 @@ pub mod rt {
272347
fn parse_item(&self, s: String) -> Gc<ast::Item>;
273348
fn parse_expr(&self, s: String) -> Gc<ast::Expr>;
274349
fn parse_stmt(&self, s: String) -> Gc<ast::Stmt>;
275-
fn parse_tts(&self, s: String) -> Vec<ast::TokenTree> ;
350+
fn parse_tts(&self, s: String) -> Vec<ast::TokenTree>;
351+
}
352+
353+
trait ExtParseUtilsWithHygiene {
354+
// FIXME (Issue #16472): This should go away after ToToken impls
355+
// are revised to go directly to token-trees.
356+
fn parse_tts_with_hygiene(&self, s: String) -> Vec<ast::TokenTree>;
276357
}
277358

278359
impl<'a> ExtParseUtils for ExtCtxt<'a> {
@@ -315,6 +396,18 @@ pub mod rt {
315396
}
316397
}
317398

399+
impl<'a> ExtParseUtilsWithHygiene for ExtCtxt<'a> {
400+
401+
fn parse_tts_with_hygiene(&self, s: String) -> Vec<ast::TokenTree> {
402+
use parse::with_hygiene::parse_tts_from_source_str;
403+
parse_tts_from_source_str("<quote expansion>".to_string(),
404+
s,
405+
self.cfg(),
406+
self.parse_sess())
407+
}
408+
409+
}
410+
318411
}
319412

320413
pub fn expand_quote_tokens(cx: &mut ExtCtxt,

src/libsyntax/parse/lexer/mod.rs

+105
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ use parse::token;
1717
use parse::token::{str_to_ident};
1818

1919
use std::char;
20+
use std::fmt;
2021
use std::mem::replace;
22+
use std::num;
2123
use std::rc::Rc;
2224
use std::str;
2325

@@ -55,6 +57,11 @@ pub struct StringReader<'a> {
5557
/* cached: */
5658
pub peek_tok: token::Token,
5759
pub peek_span: Span,
60+
61+
// FIXME (Issue #16472): This field should go away after ToToken impls
62+
// are revised to go directly to token-trees.
63+
/// Is \x00<name>,<ctxt>\x00 is interpreted as encoded ast::Ident?
64+
read_embedded_ident: bool,
5865
}
5966

6067
impl<'a> Reader for StringReader<'a> {
@@ -106,6 +113,17 @@ impl<'a> Reader for TtReader<'a> {
106113
}
107114
}
108115

116+
// FIXME (Issue #16472): This function should go away after
117+
// ToToken impls are revised to go directly to token-trees.
118+
pub fn make_reader_with_embedded_idents<'b>(span_diagnostic: &'b SpanHandler,
119+
filemap: Rc<codemap::FileMap>)
120+
-> StringReader<'b> {
121+
let mut sr = StringReader::new_raw(span_diagnostic, filemap);
122+
sr.read_embedded_ident = true;
123+
sr.advance_token();
124+
sr
125+
}
126+
109127
impl<'a> StringReader<'a> {
110128
/// For comments.rs, which hackily pokes into pos and curr
111129
pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler,
@@ -120,6 +138,7 @@ impl<'a> StringReader<'a> {
120138
/* dummy values; not read */
121139
peek_tok: token::EOF,
122140
peek_span: codemap::DUMMY_SP,
141+
read_embedded_ident: false,
123142
};
124143
sr.bump();
125144
sr
@@ -512,6 +531,81 @@ impl<'a> StringReader<'a> {
512531
})
513532
}
514533

534+
// FIXME (Issue #16472): The scan_embedded_hygienic_ident function
535+
// should go away after we revise the syntax::ext::quote::ToToken
536+
// impls to go directly to token-trees instead of thing -> string
537+
// -> token-trees. (The function is currently used to resolve
538+
// Issues #15750 and #15962.)
539+
//
540+
// Since this function is only used for certain internal macros,
541+
// and the functionality it provides is not exposed to end user
542+
// programs, pnkfelix deliberately chose to write it in a way that
543+
// favors rustc debugging effectiveness over runtime efficiency.
544+
545+
/// Scan through input of form \x00name_NNNNNN,ctxt_CCCCCCC\x00
546+
/// where: `NNNNNN` is a string of characters forming an integer
547+
/// (the name) and `CCCCCCC` is a string of characters forming an
548+
/// integer (the ctxt), separate by a comma and delimited by a
549+
/// `\x00` marker.
550+
#[inline(never)]
551+
fn scan_embedded_hygienic_ident(&mut self) -> ast::Ident {
552+
fn bump_expecting_char<'a,D:fmt::Show>(r: &mut StringReader<'a>,
553+
c: char,
554+
described_c: D,
555+
where: &str) {
556+
match r.curr {
557+
Some(r_c) if r_c == c => r.bump(),
558+
Some(r_c) => fail!("expected {}, hit {}, {}", described_c, r_c, where),
559+
None => fail!("expected {}, hit EOF, {}", described_c, where),
560+
}
561+
}
562+
563+
let where = "while scanning embedded hygienic ident";
564+
565+
// skip over the leading `\x00`
566+
bump_expecting_char(self, '\x00', "nul-byte", where);
567+
568+
// skip over the "name_"
569+
for c in "name_".chars() {
570+
bump_expecting_char(self, c, c, where);
571+
}
572+
573+
let start_bpos = self.last_pos;
574+
let base = 10;
575+
576+
// find the integer representing the name
577+
self.scan_digits(base);
578+
let encoded_name : u32 = self.with_str_from(start_bpos, |s| {
579+
num::from_str_radix(s, 10).unwrap_or_else(|| {
580+
fail!("expected digits representing a name, got `{}`, {}, range [{},{}]",
581+
s, where, start_bpos, self.last_pos);
582+
})
583+
});
584+
585+
// skip over the `,`
586+
bump_expecting_char(self, ',', "comma", where);
587+
588+
// skip over the "ctxt_"
589+
for c in "ctxt_".chars() {
590+
bump_expecting_char(self, c, c, where);
591+
}
592+
593+
// find the integer representing the ctxt
594+
let start_bpos = self.last_pos;
595+
self.scan_digits(base);
596+
let encoded_ctxt : ast::SyntaxContext = self.with_str_from(start_bpos, |s| {
597+
num::from_str_radix(s, 10).unwrap_or_else(|| {
598+
fail!("expected digits representing a ctxt, got `{}`, {}", s, where);
599+
})
600+
});
601+
602+
// skip over the `\x00`
603+
bump_expecting_char(self, '\x00', "nul-byte", where);
604+
605+
ast::Ident { name: ast::Name(encoded_name),
606+
ctxt: encoded_ctxt, }
607+
}
608+
515609
/// Scan through any digits (base `radix`) or underscores, and return how
516610
/// many digits there were.
517611
fn scan_digits(&mut self, radix: uint) -> uint {
@@ -839,6 +933,17 @@ impl<'a> StringReader<'a> {
839933
return self.scan_number(c.unwrap());
840934
}
841935

936+
if self.read_embedded_ident {
937+
match (c.unwrap(), self.nextch(), self.nextnextch()) {
938+
('\x00', Some('n'), Some('a')) => {
939+
let ast_ident = self.scan_embedded_hygienic_ident();
940+
let is_mod_name = self.curr_is(':') && self.nextch_is(':');
941+
return token::IDENT(ast_ident, is_mod_name);
942+
}
943+
_ => {}
944+
}
945+
}
946+
842947
match c.expect("next_token_inner called at EOF") {
843948
// One-byte tokens.
844949
';' => { self.bump(); return token::SEMI; }

0 commit comments

Comments
 (0)