@@ -21,6 +21,8 @@ use super::tokenizer::*;
2121use std:: error:: Error ;
2222use std:: fmt;
2323
24+ use crate :: { cst, cst:: SyntaxKind as SK } ;
25+
2426#[ derive( Debug , Clone , PartialEq ) ]
2527pub enum ParserError {
2628 TokenizerError ( String ) ,
@@ -38,6 +40,7 @@ macro_rules! parser_err {
3840pub struct Marker {
3941 /// position in the token stream (`parser.index`)
4042 index : usize ,
43+ builder_checkpoint : rowan:: Checkpoint ,
4144}
4245
4346#[ derive( PartialEq ) ]
@@ -79,12 +82,63 @@ pub struct Parser {
7982 tokens : Vec < Token > ,
8083 /// The index of the first unprocessed token in `self.tokens`
8184 index : usize ,
85+ builder : rowan:: GreenNodeBuilder < ' static > ,
86+
87+ // TBD: the parser currently provides an API to move around the token
88+ // stream without restrictions (`next_token`/`prev_token`), while the
89+ // `builder` does not. To work around this, we keep a list of "pending"
90+ // tokens which have already been processed via `next_token`, but may
91+ // be put back via `prev_token`.
92+ pending : Vec < ( cst:: SyntaxKind , rowan:: SmolStr ) > ,
93+ }
94+
95+ /// `ret!(expr => via self.complete(m, SK::FOO, ..)` runs the following steps:
96+ /// 1) Evaluates `expr`, possibly advancing the parser's position in the token stream;
97+ /// 2) Closes the current branch of the CST, identified by `m`, and sets its kind to
98+ /// `SyntaxKind::FOO`;
99+ /// 3) Returns the value of `expr`, which should be the typed AST node, corresponding
100+ /// to the recently closed branch.
101+ /// The weird syntax prevents rustfmt from making each call to this macro take up 5 lines.
102+ macro_rules! ret {
103+ { $e: expr => via $self: ident . complete( $m: ident, $syntax_kind: expr, ..) } => {
104+ {
105+ let rv = $e;
106+ $self. complete( $m, $syntax_kind, rv)
107+ }
108+ } ;
82109}
83110
84111impl Parser {
85112 /// Parse the specified tokens
86113 pub fn new ( tokens : Vec < Token > ) -> Self {
87- Parser { tokens, index : 0 }
114+ let mut parser = Parser {
115+ tokens,
116+ index : 0 ,
117+ builder : rowan:: GreenNodeBuilder :: new ( ) ,
118+ pending : vec ! [ ] ,
119+ } ;
120+ parser. builder . start_node ( SK :: ROOT . into ( ) ) ;
121+ parser
122+ }
123+
124+ pub fn syntax ( mut self ) -> cst:: SyntaxNode {
125+ if self . peek_token ( ) . is_some ( ) {
126+ // Not at end-of-file: either some extraneous tokens left after
127+ // successfully parsing something, or we've bailed with an error.
128+ //
129+ // TBD: ideally we wouldn't abandon the "current" branch of the
130+ // CST on error, instead `.complete()`-ing it as usual, but that's
131+ // in conflict with having to return the typed AST, which can't
132+ // lack certain bits.
133+ self . builder . start_node ( SK :: ERR . into ( ) ) ;
134+ while self . next_token ( ) . is_some ( ) { }
135+ self . builder . finish_node ( ) ;
136+ } else {
137+ // TBD: this is required until all parser methods end with `ret!`.
138+ self . flush_pending_buffer ( ) ;
139+ }
140+ self . builder . finish_node ( ) ;
141+ cst:: SyntaxNode :: new_root ( self . builder . finish ( ) )
88142 }
89143
90144 /// Parse a SQL statement and produce an Abstract Syntax Tree (AST)
@@ -749,11 +803,44 @@ impl Parser {
749803 }
750804
751805 pub fn start ( & mut self ) -> Marker {
752- Marker { index : self . index }
806+ self . flush_pending_buffer ( ) ;
807+ Marker {
808+ index : self . index ,
809+ builder_checkpoint : self . builder . checkpoint ( ) ,
810+ }
811+ }
812+
813+ fn start_if < F > ( & mut self , f : F ) -> Option < Marker >
814+ where
815+ F : FnOnce ( & mut Parser ) -> bool ,
816+ {
817+ self . flush_pending_buffer ( ) ;
818+ let m = self . start ( ) ;
819+ if f ( self ) {
820+ Some ( m)
821+ } else {
822+ None
823+ }
753824 }
754825
755826 pub fn reset ( & mut self , m : Marker ) {
756827 self . index = m. index ;
828+ self . pending . truncate ( 0 ) ;
829+ // TBD: rowan's builder does not allow reverting to a checkpoint
830+ }
831+
832+ pub fn complete < T > ( & mut self , m : Marker , kind : cst:: SyntaxKind , rv : T ) -> T {
833+ self . flush_pending_buffer ( ) ;
834+ self . builder
835+ . start_node_at ( m. builder_checkpoint , kind. into ( ) ) ;
836+ self . builder . finish_node ( ) ;
837+ rv
838+ }
839+
840+ pub fn flush_pending_buffer ( & mut self ) {
841+ for ( kind, s) in self . pending . drain ( ..) {
842+ self . builder . token ( kind. into ( ) , s) ;
843+ }
757844 }
758845
759846 /// Return the first non-whitespace token that has not yet been processed
@@ -783,9 +870,10 @@ impl Parser {
783870 /// (or None if reached end-of-file) and mark it as processed. OK to call
784871 /// repeatedly after reaching EOF.
785872 pub fn next_token ( & mut self ) -> Option < Token > {
873+ self . flush_pending_buffer ( ) ;
874+
786875 loop {
787- self . index += 1 ;
788- match self . tokens . get ( self . index - 1 ) {
876+ match self . next_token_no_skip ( ) {
789877 Some ( Token :: Whitespace ( _) ) => continue ,
790878 token => return token. cloned ( ) ,
791879 }
@@ -795,7 +883,12 @@ impl Parser {
795883 /// Return the first unprocessed token, possibly whitespace.
796884 pub fn next_token_no_skip ( & mut self ) -> Option < & Token > {
797885 self . index += 1 ;
798- self . tokens . get ( self . index - 1 )
886+ #[ allow( clippy:: let_and_return) ]
887+ let token = self . tokens . get ( self . index - 1 ) ;
888+ if let Some ( t) = token {
889+ self . pending . push ( ( t. kind ( ) , t. to_string ( ) . into ( ) ) ) ;
890+ }
891+ token
799892 }
800893
801894 /// Push back the last one non-whitespace token. Must be called after
@@ -805,9 +898,23 @@ impl Parser {
805898 loop {
806899 assert ! ( self . index > 0 ) ;
807900 self . index -= 1 ;
901+
902+ if !self . pending . is_empty ( ) {
903+ self . pending . pop ( ) ;
904+ } else {
905+ assert ! ( self . index >= self . tokens. len( ) ) ; // past EOF
906+ }
907+
808908 if let Some ( Token :: Whitespace ( _) ) = self . tokens . get ( self . index ) {
809909 continue ;
810910 }
911+
912+ // There may be only one non-whitespace token `pending` as by
913+ // convention, backtracking (i.e. going more than one token back)
914+ // is done via `start`/`reset` instead.
915+ for tok in & self . pending {
916+ assert ! ( tok. 0 == SK :: Whitespace ) ;
917+ }
811918 return ;
812919 }
813920 }
@@ -832,6 +939,10 @@ impl Parser {
832939 match self . peek_token ( ) {
833940 Some ( Token :: Word ( ref k) ) if expected. eq_ignore_ascii_case ( & k. keyword ) => {
834941 self . next_token ( ) ;
942+ // TBD: a hack to change the "kind" of the token just processed
943+ let mut p = self . pending . pop ( ) . unwrap ( ) ;
944+ p. 0 = SK :: KW . into ( ) ;
945+ self . pending . push ( p) ;
835946 true
836947 }
837948 _ => false ,
0 commit comments