Skip to content

Commit 14ef167

Browse files
committed
[WIP] Check future proofing of macros with multiple arms using FIRST sets.
1 parent a120ae7 commit 14ef167

File tree

1 file changed

+371
-1
lines changed

1 file changed

+371
-1
lines changed

src/libsyntax/ext/tt/macro_rules.rs

Lines changed: 371 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,25 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt,
309309
(**tt).clone()
310310
}
311311
_ => cx.span_bug(def.span, "wrong-structured lhs")
312-
}).collect()
312+
}).collect::<Vec<_>>()
313313
}
314314
_ => cx.span_bug(def.span, "wrong-structured lhs")
315315
};
316316

317+
'a: for (i, lhs) in lhses.iter().enumerate() {
318+
for lhs_ in lhses[i + 1 ..].iter() {
319+
if !check_lhs_firsts(cx, lhs, lhs_) {
320+
cx.struct_span_err(def.span, "macro is not future-proof")
321+
.span_help(lhs.get_span(), "parsing of this arm is ambiguous...")
322+
.span_help(lhs_.get_span(), "with the parsing of this arm.")
323+
.help("the behaviour of this macro might change in the future")
324+
.emit();
325+
valid = false;
326+
break 'a;
327+
}
328+
}
329+
}
330+
317331
let rhses = match **argument_map.get(&rhs_nm.name).unwrap() {
318332
MatchedSeq(ref s, _) => {
319333
s.iter().map(|m| match **m {
@@ -339,6 +353,362 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt,
339353
NormalTT(exp, Some(def.span), def.allow_internal_unstable)
340354
}
341355

356+
fn check_lhs_firsts(cx: &ExtCtxt, lhs: &TokenTree, lhs_: &TokenTree) -> bool {
357+
match (lhs, lhs_) {
358+
(&TokenTree::Delimited(_, ref tta),
359+
&TokenTree::Delimited(_, ref ttb)) =>
360+
check_matcher_firsts(cx, &tta.tts, &ttb.tts),
361+
_ => cx.span_bug(lhs.get_span(), "malformed macro lhs")
362+
}
363+
}
364+
365+
fn match_same_input(ma: &TokenTree, mb: &TokenTree) -> bool {
366+
match (ma, mb) {
367+
(&TokenTree::Token(_, MatchNt(_, nta)),
368+
&TokenTree::Token(_, MatchNt(_, ntb))) =>
369+
nta == ntb,
370+
// FIXME: must we descend into Interpolated TTs here?
371+
(&TokenTree::Token(_, ref toka),
372+
&TokenTree::Token(_, ref tokb)) =>
373+
toka == tokb,
374+
(&TokenTree::Delimited(_, ref delima),
375+
&TokenTree::Delimited(_, ref delimb)) => {
376+
delima.delim == delimb.delim &&
377+
delima.tts.iter().zip(delimb.tts.iter())
378+
.all(|(ref t1, ref t2)| match_same_input(t1, t2))
379+
}
380+
(&TokenTree::Sequence(_, ref seqa),
381+
&TokenTree::Sequence(_, ref seqb)) => {
382+
seqa.separator == seqb.separator &&
383+
seqa.op == seqb.op &&
384+
seqa.tts.iter().zip(seqb.tts.iter())
385+
.all(|(ref t1, ref t2)| match_same_input(t1, t2))
386+
}
387+
_ => false
388+
}
389+
}
390+
391+
// assumes that tok != MatchNt
392+
fn nt_first_set_contains(nt: ast::Ident, tok: &Token) -> bool {
393+
use parse::token::BinOpToken::*;
394+
use parse::token::DelimToken::*;
395+
match &nt.name.as_str() as &str {
396+
"tt" => true,
397+
"ident" => match *tok {
398+
Ident(_) => true,
399+
_ => false
400+
},
401+
"meta" => match *tok {
402+
Ident(_) => true,
403+
_ => false
404+
},
405+
"path" => match *tok {
406+
ModSep |
407+
Ident(_) => true,
408+
_ => false
409+
},
410+
"ty" => match *tok {
411+
AndAnd |
412+
BinOp(And) |
413+
OpenDelim(Paren) |
414+
OpenDelim(Bracket) |
415+
BinOp(Star) |
416+
ModSep |
417+
BinOp(Shl) |
418+
Lt |
419+
Underscore |
420+
Ident(_) => true,
421+
_ => false
422+
},
423+
"expr" => match *tok {
424+
BinOp(And) |
425+
AndAnd |
426+
Not |
427+
BinOp(Star) |
428+
BinOp(Minus) |
429+
OpenDelim(_) |
430+
DotDot |
431+
ModSep |
432+
BinOp(Shl) |
433+
Lt |
434+
Lifetime(_) |
435+
BinOp(Or) |
436+
OrOr |
437+
Ident(_) |
438+
Literal(..) => true,
439+
_ => false
440+
},
441+
"pat" => match *tok {
442+
AndAnd |
443+
BinOp(And) |
444+
OpenDelim(Paren) |
445+
OpenDelim(Bracket) |
446+
BinOp(Minus) |
447+
ModSep |
448+
BinOp(Shl) |
449+
Lt|
450+
Underscore |
451+
Ident(_) |
452+
Literal(..) => true,
453+
_ => false
454+
},
455+
"stmt" => match *tok {
456+
BinOp(And) |
457+
AndAnd |
458+
Not |
459+
BinOp(Star) |
460+
BinOp(Minus) |
461+
Pound |
462+
OpenDelim(_) |
463+
DotDot |
464+
ModSep |
465+
Semi |
466+
BinOp(Shl) |
467+
Lt |
468+
Lifetime(_) |
469+
BinOp(Or) |
470+
OrOr |
471+
Ident(_) |
472+
Literal(..) => true,
473+
_ => false
474+
},
475+
"block" => match *tok {
476+
OpenDelim(Brace) => true,
477+
_ => false
478+
},
479+
"item" => match *tok {
480+
ModSep |
481+
Ident(_) => true,
482+
_ => false
483+
},
484+
_ => panic!("unknown NT")
485+
}
486+
}
487+
488+
fn nt_first_disjoints(nt1: ast::Ident, nt2: ast::Ident) -> bool {
489+
use parse::token::DelimToken::*;
490+
match (&nt1.name.as_str() as &str, &nt2.name.as_str() as &str) {
491+
("block", _) => !nt_first_set_contains(nt2, &OpenDelim(Brace)),
492+
(_, "block") => !nt_first_set_contains(nt1, &OpenDelim(Brace)),
493+
// all the others can contain Ident
494+
_ => false
495+
}
496+
}
497+
498+
fn first_set_contains(set: &TokenSet, tok: &Token) -> bool {
499+
for &(_, ref t) in set.tokens.iter() {
500+
match (t, tok) {
501+
(&MatchNt(_, nt1), &MatchNt(_, nt2)) =>
502+
if !nt_first_disjoints(nt1, nt2) { return true },
503+
(&MatchNt(_, nt), tok) | (tok, &MatchNt(_, nt)) =>
504+
if nt_first_set_contains(nt, tok) { return true },
505+
(t1, t2) => if t1 == t2 { return true }
506+
}
507+
}
508+
return false
509+
}
510+
511+
fn token_of(tt: &TokenTree) -> Token {
512+
use tokenstream::TokenTree::*;
513+
match tt {
514+
&Delimited(_, ref delim) => OpenDelim(delim.delim.clone()),
515+
&Token(_, ref tok) => tok.clone(),
516+
&Sequence(..) => panic!("unexpected seq")
517+
}
518+
}
519+
520+
#[allow(unused_variables)]
521+
fn first_sets_disjoints(ma: &TokenTree, mb: &TokenTree,
522+
first_a: &FirstSets, first_b: &FirstSets) -> bool {
523+
use tokenstream::TokenTree::*;
524+
match (ma, mb) {
525+
(&Token(_, MatchNt(_, nta)),
526+
&Token(_, MatchNt(_, ntb))) => nt_first_disjoints(nta, ntb),
527+
528+
(&Token(_, MatchNt(_, nt)), &Token(_, ref tok)) |
529+
(&Token(_, ref tok), &Token(_, MatchNt(_, nt))) =>
530+
!nt_first_set_contains(nt, tok),
531+
532+
(&Token(_, MatchNt(_, nt)), &Delimited(_, ref delim)) |
533+
(&Delimited(_, ref delim), &Token(_, MatchNt(_, nt))) =>
534+
!nt_first_set_contains(nt, &OpenDelim(delim.delim.clone())),
535+
536+
(&Sequence(ref spa, _), &Sequence(ref spb, _)) => {
537+
match (first_a.first.get(spa), first_b.first.get(spb)) {
538+
(Some(&Some(ref seta)), Some(&Some(ref setb))) => {
539+
for &(_, ref tok) in setb.tokens.iter() {
540+
if first_set_contains(seta, tok) {
541+
return false
542+
}
543+
}
544+
true
545+
}
546+
_ => panic!("no FIRST set for sequence")
547+
}
548+
}
549+
550+
(&Sequence(ref sp, _), ref tok) => {
551+
match first_a.first.get(sp) {
552+
Some(&Some(ref set)) => !first_set_contains(set, &token_of(tok)),
553+
_ => panic!("no FIRST set for sequence")
554+
}
555+
}
556+
557+
(ref tok, &Sequence(ref sp, _)) => {
558+
match first_b.first.get(sp) {
559+
Some(&Some(ref set)) => !first_set_contains(set, &token_of(tok)),
560+
_ => panic!("no FIRST set for sequence")
561+
}
562+
}
563+
564+
(&Token(_, ref t1), &Token(_, ref t2)) =>
565+
t1 != t2,
566+
567+
(&Token(_, ref t), &Delimited(_, ref delim)) |
568+
(&Delimited(_, ref delim), &Token(_, ref t)) =>
569+
t != &OpenDelim(delim.delim.clone()),
570+
571+
(&Delimited(_, ref d1), &Delimited(_, ref d2)) =>
572+
d1.delim != d2.delim
573+
}
574+
}
575+
576+
fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> bool {
577+
let mut need_disambiguation = false;
578+
579+
// first compute the FIRST sets. FIRST sets for tokens, delimited TTs and NT
580+
// matchers are fixed, this will compute the FIRST sets for all sequence TTs
581+
// that appear in the matcher. Note that if a sequence starts with a matcher,
582+
// for ex. $e:expr, its FIRST set will be the singleton { MatchNt(expr) }.
583+
// This is okay because none of our matchable NTs can be empty.
584+
let firsts_a = FirstSets::new(ma);
585+
let firsts_b = FirstSets::new(mb);
586+
587+
// analyse until one of the cases happen:
588+
// * we find an obvious disambiguation, that is a proof that all inputs that
589+
// matches A will never match B or vice-versa
590+
// * we find a case that is too complex to handle and reject it
591+
// * we reach the end of the macro
592+
for (ta, tb) in ma.iter().zip(mb.iter()) {
593+
if match_same_input(ta, tb) {
594+
continue;
595+
}
596+
597+
if first_sets_disjoints(&ta, &tb, &firsts_a, &firsts_b) {
598+
// accept the macro
599+
return true
600+
}
601+
602+
// i.e. A or B is either a repeated sequence or a NT matcher that is
603+
// not tt, ident, or block (that is, either A or B could match several
604+
// token trees), we cannot know where we should continue the analysis.
605+
match (ta, tb) {
606+
(&TokenTree::Sequence(_, _), _) |
607+
(_, &TokenTree::Sequence(_, _)) => return false,
608+
609+
(&TokenTree::Token(_, MatchNt(_, nta)),
610+
&TokenTree::Token(_, MatchNt(_, ntb))) =>
611+
if !(nt_is_single_tt(nta) && nt_is_single_tt(ntb)) {
612+
return false
613+
},
614+
615+
(&TokenTree::Token(_, MatchNt(_, nt)), _) |
616+
(_ ,&TokenTree::Token(_, MatchNt(_, nt))) =>
617+
if !nt_is_single_tt(nt) { return false },
618+
619+
_ => ()
620+
}
621+
622+
// A and B always both match a single TT
623+
match (ta, tb) {
624+
(&TokenTree::Sequence(_, _), _) |
625+
(_, &TokenTree::Sequence(_, _)) =>
626+
// cannot happen since sequences are not always a single-TT
627+
cx.bug("unexpeceted seq"),
628+
629+
(&TokenTree::Token(_, MatchNt(_, nt)), _) |
630+
(_ ,&TokenTree::Token(_, MatchNt(_, nt)))
631+
if nt.name.as_str() == "tt" =>
632+
// this is okay for now, either A will always have priority,
633+
// either B will always be unreachable. but search for errors
634+
// further
635+
continue,
636+
637+
(&TokenTree::Token(_, MatchNt(_, _)),
638+
&TokenTree::Token(_, MatchNt(_, _))) =>
639+
// this case cannot happen. the only NTs that are a single-TT
640+
// and that are not tt are ident and block, that do not share any
641+
// FIRST token.
642+
cx.bug("unexpected NT vs. NT"),
643+
644+
(&TokenTree::Token(_, MatchNt(_, nt)), &TokenTree::Token(_, Ident(_))) |
645+
(&TokenTree::Token(_, Ident(_)), &TokenTree::Token(_, MatchNt(_, nt))) =>
646+
if nt.name.as_str() == "ident" {
647+
// NT ident vs token ident. it's the same as with tt:
648+
// either A is included, either B is unreachable
649+
continue
650+
} else {
651+
// the only possible NT here is ident because the only token in
652+
// the FIRST set of block is {, and { is not seen as a token but
653+
// as the beginning of a Delim
654+
// the only possible token is thus an hardcoded identifier or
655+
// keyword. so the only possible case of NT vs. Token is the
656+
// the case above.
657+
cx.bug("unexpeceted NT vs. Token")
658+
},
659+
660+
(&TokenTree::Token(_, MatchNt(_, nt)), &TokenTree::Delimited(_, ref delim)) |
661+
(&TokenTree::Delimited(_, ref delim), &TokenTree::Token(_, MatchNt(_, nt))) =>
662+
if nt.name.as_str() == "block"
663+
&& delim.delim == token::DelimToken::Brace {
664+
// we cannot say much here. we cannot look inside. we
665+
// can just hope we will find an obvious disambiguation later
666+
need_disambiguation = true;
667+
continue
668+
} else {
669+
// again, the other possibilites do not share any FIRST token
670+
cx.bug("unexpeceted NT vs. Delim")
671+
},
672+
673+
(&TokenTree::Delimited(..), &TokenTree::Delimited(..)) => {
674+
// they have the same delim. as above.
675+
// FIXME: we could search for disambiguation *inside* the
676+
// delimited TTs
677+
need_disambiguation = true;
678+
continue
679+
}
680+
681+
// cannot happen. either they're the same token or their FIRST sets
682+
// are disjoint.
683+
(&TokenTree::Token(..), &TokenTree::Token(..)) |
684+
(&TokenTree::Token(..), &TokenTree::Delimited(..)) |
685+
(&TokenTree::Delimited(..), &TokenTree::Token(..)) =>
686+
cx.bug("unexpected Token vs. Token")
687+
}
688+
}
689+
690+
// now we are at the end of one arm:
691+
if need_disambiguation {
692+
// we couldn't find any. we cannot say anything about those arms.
693+
// reject conservatively.
694+
// FIXME: if we are not at the end of the other arm, and that the other
695+
// arm cannot derive empty, I think we could accept...?
696+
false
697+
} else {
698+
// either A is strictly included in B and the other inputs that match B
699+
// will never match A, or B is included in or equal to A, which means
700+
// it's unreachable. this is not our problem. accept.
701+
true
702+
}
703+
}
704+
705+
fn nt_is_single_tt(nt: ast::Ident) -> bool {
706+
match &nt.name.as_str() as &str {
707+
"block" | "ident" | "tt" => true,
708+
_ => false
709+
}
710+
}
711+
342712
fn check_lhs_nt_follows(cx: &mut ExtCtxt, lhs: &TokenTree) -> bool {
343713
// lhs is going to be like TokenTree::Delimited(...), where the
344714
// entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens.

0 commit comments

Comments
 (0)