Skip to content

Commit c5ba2d2

Browse files
committed
Rollup merge of rust-lang#45125 - bleibig:grammar-update, r=alexcrichton
Update grammar to parse current rust syntax Mainly addressing rust-lang#32723. This PR updates the bison grammar so that it can parse the current rust syntax, except for feature-gated syntax additions. It has been tested with all the tests in run-pass. The grammar in this repo doesn't have build logic anymore, but you can test it out in https://github.com/bleibig/rust-grammar, which has all of what's in this PR. If you are interested in having build logic and grammar tests again, I can look into implementing that as well. I'm aware that things are somewhat undecided as to what an official rust grammar should be from the discussion in rust-lang#30942. With this PR we can go back to having an up-to-date flex/bison based grammar, but the rustypop grammar looks interesting as well.
2 parents d7f1a26 + 8240b87 commit c5ba2d2

File tree

3 files changed

+271
-186
lines changed

3 files changed

+271
-186
lines changed

src/grammar/lexer.l

+45-28
Original file line numberDiff line numberDiff line change
@@ -85,43 +85,60 @@ ident [a-zA-Z\x80-\xff_][a-zA-Z0-9\x80-\xff_]*
8585
<blockcomment>(.|\n) { }
8686

8787
_ { return UNDERSCORE; }
88+
abstract { return ABSTRACT; }
89+
alignof { return ALIGNOF; }
8890
as { return AS; }
91+
become { return BECOME; }
8992
box { return BOX; }
9093
break { return BREAK; }
94+
catch { return CATCH; }
9195
const { return CONST; }
9296
continue { return CONTINUE; }
9397
crate { return CRATE; }
98+
default { return DEFAULT; }
99+
do { return DO; }
94100
else { return ELSE; }
95101
enum { return ENUM; }
96102
extern { return EXTERN; }
97103
false { return FALSE; }
104+
final { return FINAL; }
98105
fn { return FN; }
99106
for { return FOR; }
100107
if { return IF; }
101108
impl { return IMPL; }
102109
in { return IN; }
103110
let { return LET; }
104111
loop { return LOOP; }
112+
macro { return MACRO; }
105113
match { return MATCH; }
106114
mod { return MOD; }
107115
move { return MOVE; }
108116
mut { return MUT; }
117+
offsetof { return OFFSETOF; }
118+
override { return OVERRIDE; }
109119
priv { return PRIV; }
110120
proc { return PROC; }
121+
pure { return PURE; }
111122
pub { return PUB; }
112123
ref { return REF; }
113124
return { return RETURN; }
114125
self { return SELF; }
126+
sizeof { return SIZEOF; }
115127
static { return STATIC; }
116128
struct { return STRUCT; }
129+
super { return SUPER; }
117130
trait { return TRAIT; }
118131
true { return TRUE; }
119132
type { return TYPE; }
120133
typeof { return TYPEOF; }
134+
union { return UNION; }
121135
unsafe { return UNSAFE; }
136+
unsized { return UNSIZED; }
122137
use { return USE; }
138+
virtual { return VIRTUAL; }
123139
where { return WHERE; }
124140
while { return WHILE; }
141+
yield { return YIELD; }
125142

126143
{ident} { return IDENT; }
127144

@@ -189,25 +206,25 @@ while { return WHILE; }
189206
\>\>= { return SHREQ; }
190207
\> { return '>'; }
191208

192-
\x27 { BEGIN(ltorchar); yymore(); }
193-
<ltorchar>static { BEGIN(INITIAL); return STATIC_LIFETIME; }
194-
<ltorchar>{ident} { BEGIN(INITIAL); return LIFETIME; }
195-
<ltorchar>\\[nrt\\\x27\x220]\x27 { BEGIN(suffix); return LIT_CHAR; }
196-
<ltorchar>\\x[0-9a-fA-F]{2}\x27 { BEGIN(suffix); return LIT_CHAR; }
197-
<ltorchar>\\u\{[0-9a-fA-F]?{6}\}\x27 { BEGIN(suffix); return LIT_CHAR; }
198-
<ltorchar>.\x27 { BEGIN(suffix); return LIT_CHAR; }
199-
<ltorchar>[\x80-\xff]{2,4}\x27 { BEGIN(suffix); return LIT_CHAR; }
200-
<ltorchar><<EOF>> { BEGIN(INITIAL); return -1; }
209+
\x27 { BEGIN(ltorchar); yymore(); }
210+
<ltorchar>static { BEGIN(INITIAL); return STATIC_LIFETIME; }
211+
<ltorchar>{ident} { BEGIN(INITIAL); return LIFETIME; }
212+
<ltorchar>\\[nrt\\\x27\x220]\x27 { BEGIN(suffix); return LIT_CHAR; }
213+
<ltorchar>\\x[0-9a-fA-F]{2}\x27 { BEGIN(suffix); return LIT_CHAR; }
214+
<ltorchar>\\u\{([0-9a-fA-F]_*){1,6}\}\x27 { BEGIN(suffix); return LIT_CHAR; }
215+
<ltorchar>.\x27 { BEGIN(suffix); return LIT_CHAR; }
216+
<ltorchar>[\x80-\xff]{2,4}\x27 { BEGIN(suffix); return LIT_CHAR; }
217+
<ltorchar><<EOF>> { BEGIN(INITIAL); return -1; }
201218

202219
b\x22 { BEGIN(bytestr); yymore(); }
203220
<bytestr>\x22 { BEGIN(suffix); return LIT_BYTE_STR; }
204221

205-
<bytestr><<EOF>> { return -1; }
206-
<bytestr>\\[n\nrt\\\x27\x220] { yymore(); }
207-
<bytestr>\\x[0-9a-fA-F]{2} { yymore(); }
208-
<bytestr>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
209-
<bytestr>\\[^n\nrt\\\x27\x220] { return -1; }
210-
<bytestr>(.|\n) { yymore(); }
222+
<bytestr><<EOF>> { return -1; }
223+
<bytestr>\\[n\nrt\\\x27\x220] { yymore(); }
224+
<bytestr>\\x[0-9a-fA-F]{2} { yymore(); }
225+
<bytestr>\\u\{([0-9a-fA-F]_*){1,6}\} { yymore(); }
226+
<bytestr>\\[^n\nrt\\\x27\x220] { return -1; }
227+
<bytestr>(.|\n) { yymore(); }
211228

212229
br\x22 { BEGIN(rawbytestr_nohash); yymore(); }
213230
<rawbytestr_nohash>\x22 { BEGIN(suffix); return LIT_BYTE_STR_RAW; }
@@ -252,13 +269,13 @@ br/# {
252269
}
253270
<rawbytestr><<EOF>> { return -1; }
254271

255-
b\x27 { BEGIN(byte); yymore(); }
256-
<byte>\\[nrt\\\x27\x220]\x27 { BEGIN(INITIAL); return LIT_BYTE; }
257-
<byte>\\x[0-9a-fA-F]{2}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
258-
<byte>\\u[0-9a-fA-F]{4}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
259-
<byte>\\U[0-9a-fA-F]{8}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
260-
<byte>.\x27 { BEGIN(INITIAL); return LIT_BYTE; }
261-
<byte><<EOF>> { BEGIN(INITIAL); return -1; }
272+
b\x27 { BEGIN(byte); yymore(); }
273+
<byte>\\[nrt\\\x27\x220]\x27 { BEGIN(INITIAL); return LIT_BYTE; }
274+
<byte>\\x[0-9a-fA-F]{2}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
275+
<byte>\\u([0-9a-fA-F]_*){4}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
276+
<byte>\\U([0-9a-fA-F]_*){8}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
277+
<byte>.\x27 { BEGIN(INITIAL); return LIT_BYTE; }
278+
<byte><<EOF>> { BEGIN(INITIAL); return -1; }
262279

263280
r\x22 { BEGIN(rawstr); yymore(); }
264281
<rawstr>\x22 { BEGIN(suffix); return LIT_STR_RAW; }
@@ -310,12 +327,12 @@ r/# {
310327
\x22 { BEGIN(str); yymore(); }
311328
<str>\x22 { BEGIN(suffix); return LIT_STR; }
312329

313-
<str><<EOF>> { return -1; }
314-
<str>\\[n\nr\rt\\\x27\x220] { yymore(); }
315-
<str>\\x[0-9a-fA-F]{2} { yymore(); }
316-
<str>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
317-
<str>\\[^n\nrt\\\x27\x220] { return -1; }
318-
<str>(.|\n) { yymore(); }
330+
<str><<EOF>> { return -1; }
331+
<str>\\[n\nr\rt\\\x27\x220] { yymore(); }
332+
<str>\\x[0-9a-fA-F]{2} { yymore(); }
333+
<str>\\u\{([0-9a-fA-F]_*){1,6}\} { yymore(); }
334+
<str>\\[^n\nrt\\\x27\x220] { return -1; }
335+
<str>(.|\n) { yymore(); }
319336

320337
\<- { return LARROW; }
321338
-\> { return RARROW; }

0 commit comments

Comments
 (0)