@@ -21,6 +21,8 @@ use super::tokenizer::*;
21
21
use std:: error:: Error ;
22
22
use std:: fmt;
23
23
24
+ use crate :: { cst, cst:: SyntaxKind as SK } ;
25
+
24
26
#[ derive( Debug , Clone , PartialEq ) ]
25
27
pub enum ParserError {
26
28
TokenizerError ( String ) ,
@@ -38,6 +40,7 @@ macro_rules! parser_err {
38
40
pub struct Marker {
39
41
/// position in the token stream (`parser.index`)
40
42
index : usize ,
43
+ builder_checkpoint : rowan:: Checkpoint ,
41
44
}
42
45
43
46
#[ derive( PartialEq ) ]
@@ -79,12 +82,63 @@ pub struct Parser {
79
82
tokens : Vec < Token > ,
80
83
/// The index of the first unprocessed token in `self.tokens`
81
84
index : usize ,
85
+ builder : rowan:: GreenNodeBuilder < ' static > ,
86
+
87
+ // TBD: the parser currently provides an API to move around the token
88
+ // stream without restrictions (`next_token`/`prev_token`), while the
89
+ // `builder` does not. To work around this, we keep a list of "pending"
90
+ // tokens which have already been processed via `next_token`, but may
91
+ // be put back via `prev_token`.
92
+ pending : Vec < ( cst:: SyntaxKind , rowan:: SmolStr ) > ,
93
+ }
94
+
95
+ /// `ret!(expr => via self.complete(m, SK::FOO, ..)` runs the following steps:
96
+ /// 1) Evaluates `expr`, possibly advancing the parser's position in the token stream;
97
+ /// 2) Closes the current branch of the CST, identified by `m`, and sets its kind to
98
+ /// `SyntaxKind::FOO`;
99
+ /// 3) Returns the value of `expr`, which should be the typed AST node, corresponding
100
+ /// to the recently closed branch.
101
+ /// The weird syntax prevents rustfmt from making each call to this macro take up 5 lines.
102
+ macro_rules! ret {
103
+ { $e: expr => via $self: ident . complete( $m: ident, $syntax_kind: expr, ..) } => {
104
+ {
105
+ let rv = $e;
106
+ $self. complete( $m, $syntax_kind, rv)
107
+ }
108
+ } ;
82
109
}
83
110
84
111
impl Parser {
85
112
/// Parse the specified tokens
86
113
pub fn new ( tokens : Vec < Token > ) -> Self {
87
- Parser { tokens, index : 0 }
114
+ let mut parser = Parser {
115
+ tokens,
116
+ index : 0 ,
117
+ builder : rowan:: GreenNodeBuilder :: new ( ) ,
118
+ pending : vec ! [ ] ,
119
+ } ;
120
+ parser. builder . start_node ( SK :: ROOT . into ( ) ) ;
121
+ parser
122
+ }
123
+
124
+ pub fn syntax ( mut self ) -> cst:: SyntaxNode {
125
+ if self . peek_token ( ) . is_some ( ) {
126
+ // Not at end-of-file: either some extraneous tokens left after
127
+ // successfully parsing something, or we've bailed with an error.
128
+ //
129
+ // TBD: ideally we wouldn't abandon the "current" branch of the
130
+ // CST on error, instead `.complete()`-ing it as usual, but that's
131
+ // in conflict with having to return the typed AST, which can't
132
+ // lack certain bits.
133
+ self . builder . start_node ( SK :: ERR . into ( ) ) ;
134
+ while self . next_token ( ) . is_some ( ) { }
135
+ self . builder . finish_node ( ) ;
136
+ } else {
137
+ // TBD: this is required until all parser methods end with `ret!`.
138
+ self . flush_pending_buffer ( ) ;
139
+ }
140
+ self . builder . finish_node ( ) ;
141
+ cst:: SyntaxNode :: new_root ( self . builder . finish ( ) )
88
142
}
89
143
90
144
/// Parse a SQL statement and produce an Abstract Syntax Tree (AST)
@@ -749,11 +803,44 @@ impl Parser {
749
803
}
750
804
751
805
pub fn start ( & mut self ) -> Marker {
752
- Marker { index : self . index }
806
+ self . flush_pending_buffer ( ) ;
807
+ Marker {
808
+ index : self . index ,
809
+ builder_checkpoint : self . builder . checkpoint ( ) ,
810
+ }
811
+ }
812
+
813
+ fn start_if < F > ( & mut self , f : F ) -> Option < Marker >
814
+ where
815
+ F : FnOnce ( & mut Parser ) -> bool ,
816
+ {
817
+ self . flush_pending_buffer ( ) ;
818
+ let m = self . start ( ) ;
819
+ if f ( self ) {
820
+ Some ( m)
821
+ } else {
822
+ None
823
+ }
753
824
}
754
825
755
826
pub fn reset ( & mut self , m : Marker ) {
756
827
self . index = m. index ;
828
+ self . pending . truncate ( 0 ) ;
829
+ // TBD: rowan's builder does not allow reverting to a checkpoint
830
+ }
831
+
832
+ pub fn complete < T > ( & mut self , m : Marker , kind : cst:: SyntaxKind , rv : T ) -> T {
833
+ self . flush_pending_buffer ( ) ;
834
+ self . builder
835
+ . start_node_at ( m. builder_checkpoint , kind. into ( ) ) ;
836
+ self . builder . finish_node ( ) ;
837
+ rv
838
+ }
839
+
840
+ pub fn flush_pending_buffer ( & mut self ) {
841
+ for ( kind, s) in self . pending . drain ( ..) {
842
+ self . builder . token ( kind. into ( ) , s) ;
843
+ }
757
844
}
758
845
759
846
/// Return the first non-whitespace token that has not yet been processed
@@ -783,9 +870,10 @@ impl Parser {
783
870
/// (or None if reached end-of-file) and mark it as processed. OK to call
784
871
/// repeatedly after reaching EOF.
785
872
pub fn next_token ( & mut self ) -> Option < Token > {
873
+ self . flush_pending_buffer ( ) ;
874
+
786
875
loop {
787
- self . index += 1 ;
788
- match self . tokens . get ( self . index - 1 ) {
876
+ match self . next_token_no_skip ( ) {
789
877
Some ( Token :: Whitespace ( _) ) => continue ,
790
878
token => return token. cloned ( ) ,
791
879
}
@@ -795,7 +883,12 @@ impl Parser {
795
883
/// Return the first unprocessed token, possibly whitespace.
796
884
pub fn next_token_no_skip ( & mut self ) -> Option < & Token > {
797
885
self . index += 1 ;
798
- self . tokens . get ( self . index - 1 )
886
+ #[ allow( clippy:: let_and_return) ]
887
+ let token = self . tokens . get ( self . index - 1 ) ;
888
+ if let Some ( t) = token {
889
+ self . pending . push ( ( t. kind ( ) , t. to_string ( ) . into ( ) ) ) ;
890
+ }
891
+ token
799
892
}
800
893
801
894
/// Push back the last one non-whitespace token. Must be called after
@@ -805,9 +898,23 @@ impl Parser {
805
898
loop {
806
899
assert ! ( self . index > 0 ) ;
807
900
self . index -= 1 ;
901
+
902
+ if !self . pending . is_empty ( ) {
903
+ self . pending . pop ( ) ;
904
+ } else {
905
+ assert ! ( self . index >= self . tokens. len( ) ) ; // past EOF
906
+ }
907
+
808
908
if let Some ( Token :: Whitespace ( _) ) = self . tokens . get ( self . index ) {
809
909
continue ;
810
910
}
911
+
912
+ // There may be only one non-whitespace token `pending` as by
913
+ // convention, backtracking (i.e. going more than one token back)
914
+ // is done via `start`/`reset` instead.
915
+ for tok in & self . pending {
916
+ assert ! ( tok. 0 == SK :: Whitespace ) ;
917
+ }
811
918
return ;
812
919
}
813
920
}
@@ -832,6 +939,10 @@ impl Parser {
832
939
match self . peek_token ( ) {
833
940
Some ( Token :: Word ( ref k) ) if expected. eq_ignore_ascii_case ( & k. keyword ) => {
834
941
self . next_token ( ) ;
942
+ // TBD: a hack to change the "kind" of the token just processed
943
+ let mut p = self . pending . pop ( ) . unwrap ( ) ;
944
+ p. 0 = SK :: KW . into ( ) ;
945
+ self . pending . push ( p) ;
835
946
true
836
947
}
837
948
_ => false ,
0 commit comments