Skip to content

Commit 5509ae3

Browse files
authored
Auto merge of #37290 - petrochenkov:pnp, r=jseyfried
syntax: Tweak path parsing logic Associated paths starting with `<<` are parsed in patterns. Paths like `self::foo::bar` are interpreted as paths and not as `self` arguments in methods (cc @matklad). Now, I believe, *all* paths are consistently parsed greedily in case of ambiguity. Detection of `&'a mut self::` requires pretty large (but still fixed) lookahead, so I had to increase the size of parser's lookahead buffer. Curiously, if `lookahead_distance >= lookahead_buffer_size` was used previously, the parser hung forever, I fixed this as well, now it ICEs. r? @jseyfried
2 parents da5b646 + fea630e commit 5509ae3

11 files changed

+134
-74
lines changed

src/libsyntax/parse/lexer/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ pub struct TokenAndSpan {
7474
pub sp: Span,
7575
}
7676

77+
impl Default for TokenAndSpan {
78+
fn default() -> Self {
79+
TokenAndSpan { tok: token::Underscore, sp: syntax_pos::DUMMY_SP }
80+
}
81+
}
82+
7783
pub struct StringReader<'a> {
7884
pub span_diagnostic: &'a Handler,
7985
/// The absolute offset within the codemap of the next character to read

src/libsyntax/parse/parser.rs

+58-53
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,22 @@ enum PrevTokenKind {
245245
Other,
246246
}
247247

248+
// Simple circular buffer used for keeping few next tokens.
249+
#[derive(Default)]
250+
struct LookaheadBuffer {
251+
buffer: [TokenAndSpan; LOOKAHEAD_BUFFER_CAPACITY],
252+
start: usize,
253+
end: usize,
254+
}
255+
256+
const LOOKAHEAD_BUFFER_CAPACITY: usize = 8;
257+
258+
impl LookaheadBuffer {
259+
fn len(&self) -> usize {
260+
(LOOKAHEAD_BUFFER_CAPACITY + self.end - self.start) % LOOKAHEAD_BUFFER_CAPACITY
261+
}
262+
}
263+
248264
/* ident is handled by common.rs */
249265

250266
pub struct Parser<'a> {
@@ -258,9 +274,7 @@ pub struct Parser<'a> {
258274
pub cfg: CrateConfig,
259275
/// the previous token kind
260276
prev_token_kind: PrevTokenKind,
261-
pub buffer: [TokenAndSpan; 4],
262-
pub buffer_start: isize,
263-
pub buffer_end: isize,
277+
lookahead_buffer: LookaheadBuffer,
264278
pub tokens_consumed: usize,
265279
pub restrictions: Restrictions,
266280
pub quote_depth: usize, // not (yet) related to the quasiquoter
@@ -356,10 +370,6 @@ impl<'a> Parser<'a> {
356370
_ => PathBuf::from(sess.codemap().span_to_filename(span)),
357371
};
358372
directory.pop();
359-
let placeholder = TokenAndSpan {
360-
tok: token::Underscore,
361-
sp: span,
362-
};
363373

364374
Parser {
365375
reader: rdr,
@@ -369,14 +379,7 @@ impl<'a> Parser<'a> {
369379
span: span,
370380
prev_span: span,
371381
prev_token_kind: PrevTokenKind::Other,
372-
buffer: [
373-
placeholder.clone(),
374-
placeholder.clone(),
375-
placeholder.clone(),
376-
placeholder.clone(),
377-
],
378-
buffer_start: 0,
379-
buffer_end: 0,
382+
lookahead_buffer: Default::default(),
380383
tokens_consumed: 0,
381384
restrictions: Restrictions::empty(),
382385
quote_depth: 0,
@@ -937,19 +940,13 @@ impl<'a> Parser<'a> {
937940
_ => PrevTokenKind::Other,
938941
};
939942

940-
let next = if self.buffer_start == self.buffer_end {
943+
let next = if self.lookahead_buffer.start == self.lookahead_buffer.end {
941944
self.reader.real_token()
942945
} else {
943946
// Avoid token copies with `replace`.
944-
let buffer_start = self.buffer_start as usize;
945-
let next_index = (buffer_start + 1) & 3;
946-
self.buffer_start = next_index as isize;
947-
948-
let placeholder = TokenAndSpan {
949-
tok: token::Underscore,
950-
sp: self.span,
951-
};
952-
mem::replace(&mut self.buffer[buffer_start], placeholder)
947+
let old_start = self.lookahead_buffer.start;
948+
self.lookahead_buffer.start = (old_start + 1) % LOOKAHEAD_BUFFER_CAPACITY;
949+
mem::replace(&mut self.lookahead_buffer.buffer[old_start], Default::default())
953950
};
954951
self.span = next.sp;
955952
self.token = next.tok;
@@ -982,21 +979,22 @@ impl<'a> Parser<'a> {
982979
self.expected_tokens.clear();
983980
}
984981

985-
pub fn buffer_length(&mut self) -> isize {
986-
if self.buffer_start <= self.buffer_end {
987-
return self.buffer_end - self.buffer_start;
988-
}
989-
return (4 - self.buffer_start) + self.buffer_end;
990-
}
991-
pub fn look_ahead<R, F>(&mut self, distance: usize, f: F) -> R where
982+
pub fn look_ahead<R, F>(&mut self, dist: usize, f: F) -> R where
992983
F: FnOnce(&token::Token) -> R,
993984
{
994-
let dist = distance as isize;
995-
while self.buffer_length() < dist {
996-
self.buffer[self.buffer_end as usize] = self.reader.real_token();
997-
self.buffer_end = (self.buffer_end + 1) & 3;
985+
if dist == 0 {
986+
f(&self.token)
987+
} else if dist < LOOKAHEAD_BUFFER_CAPACITY {
988+
while self.lookahead_buffer.len() < dist {
989+
self.lookahead_buffer.buffer[self.lookahead_buffer.end] = self.reader.real_token();
990+
self.lookahead_buffer.end =
991+
(self.lookahead_buffer.end + 1) % LOOKAHEAD_BUFFER_CAPACITY;
992+
}
993+
let index = (self.lookahead_buffer.start + dist - 1) % LOOKAHEAD_BUFFER_CAPACITY;
994+
f(&self.lookahead_buffer.buffer[index].tok)
995+
} else {
996+
self.bug("lookahead distance is too large");
998997
}
999-
f(&self.buffer[((self.buffer_start + dist - 1) & 3) as usize].tok)
1000998
}
1001999
pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> {
10021000
self.sess.span_diagnostic.struct_span_fatal(self.span, m)
@@ -1118,7 +1116,6 @@ impl<'a> Parser<'a> {
11181116
Ok(ast::TyKind::ImplTrait(bounds))
11191117
}
11201118

1121-
11221119
pub fn parse_ty_path(&mut self) -> PResult<'a, TyKind> {
11231120
Ok(TyKind::Path(None, self.parse_path(PathStyle::Type)?))
11241121
}
@@ -3623,7 +3620,7 @@ impl<'a> Parser<'a> {
36233620
// Parse box pat
36243621
let subpat = self.parse_pat()?;
36253622
pat = PatKind::Box(subpat);
3626-
} else if self.token.is_ident() && self.token.is_path_start() &&
3623+
} else if self.token.is_ident() && !self.token.is_any_keyword() &&
36273624
self.look_ahead(1, |t| match *t {
36283625
token::OpenDelim(token::Paren) | token::OpenDelim(token::Brace) |
36293626
token::DotDotDot | token::ModSep | token::Not => false,
@@ -3874,6 +3871,11 @@ impl<'a> Parser<'a> {
38743871
})
38753872
}
38763873

3874+
fn is_union_item(&mut self) -> bool {
3875+
self.token.is_keyword(keywords::Union) &&
3876+
self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword())
3877+
}
3878+
38773879
fn parse_stmt_without_recovery(&mut self,
38783880
macro_legacy_warnings: bool)
38793881
-> PResult<'a, Option<Stmt>> {
@@ -3888,10 +3890,10 @@ impl<'a> Parser<'a> {
38883890
node: StmtKind::Local(self.parse_local(attrs.into())?),
38893891
span: mk_sp(lo, self.prev_span.hi),
38903892
}
3891-
} else if self.token.is_path_start() && self.token != token::Lt && {
3892-
!self.check_keyword(keywords::Union) ||
3893-
self.look_ahead(1, |t| *t == token::Not || *t == token::ModSep)
3894-
} {
3893+
// Starts like a simple path, but not a union item.
3894+
} else if self.token.is_path_start() &&
3895+
!self.token.is_qpath_start() &&
3896+
!self.is_union_item() {
38953897
let pth = self.parse_path(PathStyle::Expr)?;
38963898

38973899
if !self.eat(&token::Not) {
@@ -4602,6 +4604,10 @@ impl<'a> Parser<'a> {
46024604
token::Ident(ident) => { this.bump(); codemap::respan(this.prev_span, ident) }
46034605
_ => unreachable!()
46044606
};
4607+
let isolated_self = |this: &mut Self, n| {
4608+
this.look_ahead(n, |t| t.is_keyword(keywords::SelfValue)) &&
4609+
this.look_ahead(n + 1, |t| t != &token::ModSep)
4610+
};
46054611

46064612
// Parse optional self parameter of a method.
46074613
// Only a limited set of initial token sequences is considered self parameters, anything
@@ -4614,22 +4620,22 @@ impl<'a> Parser<'a> {
46144620
// &'lt self
46154621
// &'lt mut self
46164622
// &not_self
4617-
if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
4623+
if isolated_self(self, 1) {
46184624
self.bump();
46194625
(SelfKind::Region(None, Mutability::Immutable), expect_ident(self))
46204626
} else if self.look_ahead(1, |t| t.is_keyword(keywords::Mut)) &&
4621-
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
4627+
isolated_self(self, 2) {
46224628
self.bump();
46234629
self.bump();
46244630
(SelfKind::Region(None, Mutability::Mutable), expect_ident(self))
46254631
} else if self.look_ahead(1, |t| t.is_lifetime()) &&
4626-
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
4632+
isolated_self(self, 2) {
46274633
self.bump();
46284634
let lt = self.parse_lifetime()?;
46294635
(SelfKind::Region(Some(lt), Mutability::Immutable), expect_ident(self))
46304636
} else if self.look_ahead(1, |t| t.is_lifetime()) &&
46314637
self.look_ahead(2, |t| t.is_keyword(keywords::Mut)) &&
4632-
self.look_ahead(3, |t| t.is_keyword(keywords::SelfValue)) {
4638+
isolated_self(self, 3) {
46334639
self.bump();
46344640
let lt = self.parse_lifetime()?;
46354641
self.bump();
@@ -4644,12 +4650,12 @@ impl<'a> Parser<'a> {
46444650
// *mut self
46454651
// *not_self
46464652
// Emit special error for `self` cases.
4647-
if self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
4653+
if isolated_self(self, 1) {
46484654
self.bump();
46494655
self.span_err(self.span, "cannot pass `self` by raw pointer");
46504656
(SelfKind::Value(Mutability::Immutable), expect_ident(self))
46514657
} else if self.look_ahead(1, |t| t.is_mutability()) &&
4652-
self.look_ahead(2, |t| t.is_keyword(keywords::SelfValue)) {
4658+
isolated_self(self, 2) {
46534659
self.bump();
46544660
self.bump();
46554661
self.span_err(self.span, "cannot pass `self` by raw pointer");
@@ -4659,7 +4665,7 @@ impl<'a> Parser<'a> {
46594665
}
46604666
}
46614667
token::Ident(..) => {
4662-
if self.token.is_keyword(keywords::SelfValue) {
4668+
if isolated_self(self, 0) {
46634669
// self
46644670
// self: TYPE
46654671
let eself_ident = expect_ident(self);
@@ -4670,7 +4676,7 @@ impl<'a> Parser<'a> {
46704676
(SelfKind::Value(Mutability::Immutable), eself_ident)
46714677
}
46724678
} else if self.token.is_keyword(keywords::Mut) &&
4673-
self.look_ahead(1, |t| t.is_keyword(keywords::SelfValue)) {
4679+
isolated_self(self, 1) {
46744680
// mut self
46754681
// mut self: TYPE
46764682
self.bump();
@@ -5961,8 +5967,7 @@ impl<'a> Parser<'a> {
59615967
maybe_append(attrs, extra_attrs));
59625968
return Ok(Some(item));
59635969
}
5964-
if self.check_keyword(keywords::Union) &&
5965-
self.look_ahead(1, |t| t.is_ident() && !t.is_any_keyword()) {
5970+
if self.is_union_item() {
59665971
// UNION ITEM
59675972
self.bump();
59685973
let (ident, item_, extra_attrs) = self.parse_item_union()?;

src/libsyntax/parse/token.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,8 @@ impl Token {
159159
/// Returns `true` if the token can appear at the start of an expression.
160160
pub fn can_begin_expr(&self) -> bool {
161161
match *self {
162-
OpenDelim(_) => true,
162+
OpenDelim(..) => true,
163163
Ident(..) => true,
164-
Underscore => true,
165-
Tilde => true,
166164
Literal(..) => true,
167165
Not => true,
168166
BinOp(Minus) => true,
@@ -172,6 +170,7 @@ impl Token {
172170
OrOr => true, // in lambda syntax
173171
AndAnd => true, // double borrow
174172
DotDot | DotDotDot => true, // range notation
173+
Lt | BinOp(Shl) => true, // associated path
175174
ModSep => true,
176175
Interpolated(NtExpr(..)) => true,
177176
Interpolated(NtIdent(..)) => true,
@@ -236,8 +235,12 @@ impl Token {
236235
self.is_keyword(keywords::Const)
237236
}
238237

238+
pub fn is_qpath_start(&self) -> bool {
239+
self == &Lt || self == &BinOp(Shl)
240+
}
241+
239242
pub fn is_path_start(&self) -> bool {
240-
self == &ModSep || self == &Lt || self.is_path() ||
243+
self == &ModSep || self.is_qpath_start() || self.is_path() ||
241244
self.is_path_segment_keyword() || self.is_ident() && !self.is_any_keyword()
242245
}
243246

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// Check that associated paths starting with `<<` are successfully parsed.
12+
13+
fn main() {
14+
let _: <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
15+
let _ = <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
16+
let <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
17+
let 0 ... <<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
18+
//~^ ERROR only char and numeric types are allowed in range patterns
19+
<<A>::B>::C; //~ ERROR type name `A` is undefined or not in scope
20+
}

src/test/parse-fail/keyword-self-as-identifier.rs renamed to src/test/compile-fail/keyword-self-as-identifier.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
// compile-flags: -Z parse-only
12-
1311
fn main() {
14-
let Self = "foo"; //~ error: expected identifier, found keyword `Self`
12+
let Self = "foo"; //~ ERROR unresolved unit struct/variant or constant `Self`
1513
}

src/test/parse-fail/keyword-super-as-identifier.rs renamed to src/test/compile-fail/keyword-super-as-identifier.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
// compile-flags: -Z parse-only
12-
1311
fn main() {
14-
let super = "foo"; //~ error: expected identifier, found keyword `super`
12+
let super = "foo"; //~ ERROR unresolved unit struct/variant or constant `super`
1513
}

src/test/parse-fail/keyword-super.rs renamed to src/test/compile-fail/keyword-super.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
// compile-flags: -Z parse-only
12-
1311
fn main() {
14-
let super: isize; //~ ERROR expected identifier, found keyword `super`
12+
let super: isize; //~ ERROR unresolved unit struct/variant or constant `super`
1513
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// Check that `self::foo` is parsed as a general pattern and not a self argument.
12+
13+
struct S;
14+
15+
impl S {
16+
fn f(self::S: S) {}
17+
fn g(&self::S: &S) {}
18+
fn h(&mut self::S: &mut S) {}
19+
fn i(&'a self::S: &S) {} //~ ERROR unexpected lifetime `'a` in pattern
20+
//~^ ERROR expected one of `)` or `mut`, found `'a`
21+
}
22+
23+
fn main() {}

src/test/compile-fail/self_type_keyword-2.rs

+11-1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,14 @@
1010

1111
use self::Self as Foo; //~ ERROR unresolved import `self::Self`
1212

13-
pub fn main() {}
13+
pub fn main() {
14+
let Self = 5;
15+
//~^ ERROR unresolved unit struct/variant or constant `Self`
16+
17+
match 15 {
18+
Self => (),
19+
//~^ ERROR unresolved unit struct/variant or constant `Self`
20+
Foo { x: Self } => (),
21+
//~^ ERROR unresolved unit struct/variant or constant `Self`
22+
}
23+
}

0 commit comments

Comments
 (0)