Skip to content

Commit 75c8326

Browse files
committed
Add some structure to the CST (TBD)
The naming scheme for `SyntaxKind`s and the CST structure is only an example, see the README for the discussion. Note also the 'TBD' markers in the patch.
1 parent d977eaa commit 75c8326

File tree

2 files changed

+93
-48
lines changed

2 files changed

+93
-48
lines changed

src/cst.rs

+19
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,25 @@ pub enum SyntaxKind {
108108
ROOT,
109109
ERR,
110110
KW,
111+
IDENT,
112+
OBJECT_NAME,
113+
QUERY,
114+
CTES,
115+
BODY,
116+
SELECT,
117+
PROJECTION,
118+
SELECT_ITEM_WILDCARD, SELECT_ITEM_QWILDCARD, SELECT_ITEM_EXPR_WITH_ALIAS, SELECT_ITEM_UNNAMED,
119+
FROM,
120+
WHERE,
121+
ORDER_BY,
122+
123+
JoinConstraint__On, JoinConstraint__Using,
124+
125+
EXPR_PREFIX, // TBD
126+
EXPR,
127+
EXPR_NESTED,
128+
EXPR_SUBQUERY,
129+
BIN_EXPR,
111130

112131
// Sentinel value
113132
LAST

src/parser.rs

+74-48
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ macro_rules! parser_err {
4040
}
4141

4242
/// The parser state
43+
#[derive(Clone)]
4344
pub struct Marker {
4445
/// position in the token stream (`parser.index`)
4546
index: usize,
@@ -238,6 +239,7 @@ impl Parser {
238239
/// the `precedence` is 0 (representing the lowest binding power).
239240
pub fn parse_subexpr(&mut self, precedence: u8) -> Result<Expr, ParserError> {
240241
debug!("parsing expr");
242+
let m = self.start();
241243
let mut expr = self.parse_prefix()?;
242244
debug!("prefix: {:?}", expr);
243245
loop {
@@ -259,13 +261,14 @@ impl Parser {
259261
// | |< current token (returned by `peek_token()`;
260262
// `precedence` has `next_precedence`)
261263
//
262-
expr = self.parse_infix(expr, next_precedence)?;
264+
expr = self.parse_infix(m.clone(), expr, next_precedence)?;
263265
}
264266
Ok(expr)
265267
}
266268

267269
/// Parse an expression prefix
268270
pub fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
271+
let m = self.start();
269272
let tok = self
270273
.next_token()
271274
.ok_or_else(|| ParserError::ParserError("Unexpected EOF".to_string()))?;
@@ -341,11 +344,14 @@ impl Parser {
341344
Token::LParen => {
342345
let expr = if self.parse_keyword("SELECT") || self.parse_keyword("WITH") {
343346
self.prev_token();
344-
Expr::Subquery(Box::new(self.parse_query()?))
347+
let expr = Expr::Subquery(Box::new(self.parse_query()?));
348+
self.expect_token(&Token::RParen)?;
349+
ret!(expr => via self.complete(m, SK::EXPR_SUBQUERY, ..))
345350
} else {
346-
Expr::Nested(Box::new(self.parse_expr()?))
351+
let expr = Expr::Nested(Box::new(self.parse_expr()?));
352+
self.expect_token(&Token::RParen)?;
353+
ret!(expr => via self.complete(m, SK::EXPR_NESTED, ..))
347354
};
348-
self.expect_token(&Token::RParen)?;
349355
Ok(expr)
350356
}
351357
unexpected => self.expected("an expression", Some(unexpected)),
@@ -651,7 +657,12 @@ impl Parser {
651657
}
652658

653659
/// Parse an operator following an expression
654-
pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result<Expr, ParserError> {
660+
pub fn parse_infix(
661+
&mut self,
662+
m: Marker,
663+
expr: Expr,
664+
precedence: u8,
665+
) -> Result<Expr, ParserError> {
655666
debug!("parsing infix");
656667
let tok = self.next_token().unwrap(); // safe as EOF's precedence is the lowest
657668

@@ -688,11 +699,12 @@ impl Parser {
688699
};
689700

690701
if let Some(op) = regular_binary_operator {
691-
Ok(Expr::BinaryOp {
692-
left: Box::new(expr),
693-
op,
694-
right: Box::new(self.parse_subexpr(precedence)?),
695-
})
702+
ret!(Ok(Expr::BinaryOp {
703+
left: Box::new(expr),
704+
op,
705+
right: Box::new(self.parse_subexpr(precedence)?),
706+
})
707+
=> via self.complete(m, SK::BIN_EXPR, ..)) // TBD OTHER
696708
} else if let Token::Word(ref k) = tok {
697709
match k.keyword.as_ref() {
698710
"IS" => {
@@ -1669,21 +1681,26 @@ impl Parser {
16691681
/// Parse a possibly qualified, possibly quoted identifier, e.g.
16701682
/// `foo` or `myschema."table"`
16711683
pub fn parse_object_name(&mut self) -> Result<ObjectName, ParserError> {
1684+
let m = self.start();
16721685
let mut idents = vec![];
16731686
loop {
16741687
idents.push(self.parse_identifier()?);
16751688
if !self.consume_token(&Token::Period) {
16761689
break;
16771690
}
16781691
}
1679-
Ok(ObjectName(idents))
1692+
ret!(Ok(ObjectName(idents))
1693+
=> via self.complete(m, SK::OBJECT_NAME, ..))
16801694
}
16811695

16821696
/// Parse a simple one-word identifier (possibly quoted, possibly a keyword)
16831697
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
1698+
let m = self.start();
16841699
match self.next_token() {
1685-
Some(Token::Word(w)) => Ok(w.to_ident()),
1686-
unexpected => self.expected("identifier", unexpected),
1700+
Some(Token::Word(w)) => ret!(Ok(w.to_ident())
1701+
=> via self.complete(m, SK::IDENT, ..)),
1702+
unexpected => ret!(self.expected("identifier", unexpected)
1703+
=> via self.complete(m, SK::ERR, ..)),
16871704
}
16881705
}
16891706

@@ -1750,47 +1767,44 @@ impl Parser {
17501767
/// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't
17511768
/// expect the initial keyword to be already consumed
17521769
pub fn parse_query(&mut self) -> Result<Query, ParserError> {
1753-
let ctes = if self.parse_keyword("WITH") {
1770+
let m = self.start();
1771+
let ctes = if let Some(m) = self.start_if(|p| p.parse_keyword("WITH")) {
17541772
// TODO: optional RECURSIVE
1755-
self.parse_comma_separated(Parser::parse_cte)?
1773+
ret!(self.parse_comma_separated(Parser::parse_cte)?
1774+
=> via self.complete(m, SK::CTES, ..))
17561775
} else {
17571776
vec![]
17581777
};
17591778

17601779
let body = self.parse_query_body(0)?;
17611780

1762-
let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) {
1763-
self.parse_comma_separated(Parser::parse_order_by_expr)?
1781+
let order_by = if let Some(m) = self.start_if(|p| p.parse_keywords(vec!["ORDER", "BY"])) {
1782+
ret!(self.parse_comma_separated(Parser::parse_order_by_expr)?
1783+
=> via self.complete(m, SK::ORDER_BY, ..))
17641784
} else {
17651785
vec![]
17661786
};
17671787

1768-
let limit = if self.parse_keyword("LIMIT") {
1769-
self.parse_limit()?
1788+
let limit = if let Some(_m) = self.start_if(|p| p.parse_keyword("LIMIT")) {
1789+
self.parse_limit()? // TBD
17701790
} else {
17711791
None
17721792
};
17731793

1774-
let offset = if self.parse_keyword("OFFSET") {
1775-
Some(self.parse_offset()?)
1794+
let offset = if let Some(_m) = self.start_if(|p| p.parse_keyword("OFFSET")) {
1795+
Some(self.parse_offset()?) // TBD
17761796
} else {
17771797
None
17781798
};
17791799

1780-
let fetch = if self.parse_keyword("FETCH") {
1781-
Some(self.parse_fetch()?)
1800+
let fetch = if let Some(_m) = self.start_if(|p| p.parse_keyword("FETCH")) {
1801+
Some(self.parse_fetch()?) // TBD
17821802
} else {
17831803
None
17841804
};
17851805

1786-
Ok(Query {
1787-
ctes,
1788-
body,
1789-
limit,
1790-
order_by,
1791-
offset,
1792-
fetch,
1793-
})
1806+
ret!(Ok(Query { ctes, body, limit, order_by, offset, fetch })
1807+
=> via self.complete(m, SK::QUERY, ..))
17941808
}
17951809

17961810
/// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`)
@@ -1817,15 +1831,16 @@ impl Parser {
18171831
fn parse_query_body(&mut self, precedence: u8) -> Result<SetExpr, ParserError> {
18181832
// We parse the expression using a Pratt parser, as in `parse_expr()`.
18191833
// Start by parsing a restricted SELECT or a `(subquery)`:
1820-
let mut expr = if self.parse_keyword("SELECT") {
1821-
SetExpr::Select(Box::new(self.parse_select()?))
1834+
let mut expr = if let Some(m) = self.start_if(|parser| parser.parse_keyword("SELECT")) {
1835+
ret!(SetExpr::Select(Box::new(self.parse_select()?))
1836+
=> via self.complete(m, SK::SELECT, ..))
18221837
} else if self.consume_token(&Token::LParen) {
18231838
// CTEs are not allowed here, but the parser currently accepts them
18241839
let subquery = self.parse_query()?;
18251840
self.expect_token(&Token::RParen)?;
1826-
SetExpr::Query(Box::new(subquery))
1841+
SetExpr::Query(Box::new(subquery)) // TBD
18271842
} else if self.parse_keyword("VALUES") {
1828-
SetExpr::Values(self.parse_values()?)
1843+
SetExpr::Values(self.parse_values()?) // TBD
18291844
} else {
18301845
return self.expected(
18311846
"SELECT, VALUES, or a subquery in the query body",
@@ -1850,6 +1865,7 @@ impl Parser {
18501865
}
18511866
self.next_token(); // skip past the set operator
18521867
expr = SetExpr::SetOperation {
1868+
// TBD ret!
18531869
left: Box::new(expr),
18541870
op: op.unwrap(),
18551871
all: self.parse_keyword("ALL"),
@@ -1880,21 +1896,24 @@ impl Parser {
18801896
None
18811897
};
18821898

1883-
let projection = self.parse_comma_separated(Parser::parse_select_item)?;
1899+
let m = self.start();
1900+
let projection = ret!(self.parse_comma_separated(Parser::parse_select_item)?
1901+
=> via self.complete(m, SK::PROJECTION, ..));
18841902

18851903
// Note that for keywords to be properly handled here, they need to be
18861904
// added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`,
18871905
// otherwise they may be parsed as an alias as part of the `projection`
18881906
// or `from`.
18891907

1890-
let from = if self.parse_keyword("FROM") {
1891-
self.parse_comma_separated(Parser::parse_table_and_joins)?
1908+
let from = if let Some(m) = self.start_if(|parser| parser.parse_keyword("FROM")) {
1909+
ret!(self.parse_comma_separated(Parser::parse_table_and_joins)?
1910+
=> via self.complete(m, SK::FROM, ..))
18921911
} else {
18931912
vec![]
18941913
};
18951914

1896-
let selection = if self.parse_keyword("WHERE") {
1897-
Some(self.parse_expr()?)
1915+
let selection = if let Some(m) = self.start_if(|parser| parser.parse_keyword("WHERE")) {
1916+
ret!(Some(self.parse_expr()?) => via self.complete(m, SK::WHERE, ..))
18981917
} else {
18991918
None
19001919
};
@@ -2162,12 +2181,14 @@ impl Parser {
21622181
fn parse_join_constraint(&mut self, natural: bool) -> Result<JoinConstraint, ParserError> {
21632182
if natural {
21642183
Ok(JoinConstraint::Natural)
2165-
} else if self.parse_keyword("ON") {
2184+
} else if let Some(m) = self.start_if(|parser| parser.parse_keyword("ON")) {
21662185
let constraint = self.parse_expr()?;
2167-
Ok(JoinConstraint::On(constraint))
2168-
} else if self.parse_keyword("USING") {
2186+
ret!(Ok(JoinConstraint::On(constraint))
2187+
=> via self.complete(m, SK::JoinConstraint__On, ..))
2188+
} else if let Some(m) = self.start_if(|parser| parser.parse_keyword("USING")) {
21692189
let columns = self.parse_parenthesized_column_list(Mandatory)?;
2170-
Ok(JoinConstraint::Using(columns))
2190+
ret!(Ok(JoinConstraint::Using(columns))
2191+
=> via self.complete(m, SK::JoinConstraint__Using, ..))
21712192
} else {
21722193
self.expected("ON, or USING after JOIN", self.peek_token())
21732194
}
@@ -2222,17 +2243,22 @@ impl Parser {
22222243

22232244
/// Parse a comma-delimited list of projections after SELECT
22242245
pub fn parse_select_item(&mut self) -> Result<SelectItem, ParserError> {
2246+
let m = self.start();
22252247
let expr = self.parse_expr()?;
22262248
if let Expr::Wildcard = expr {
2227-
Ok(SelectItem::Wildcard)
2249+
ret!(Ok(SelectItem::Wildcard)
2250+
=> via self.complete(m, SK::SELECT_ITEM_WILDCARD, ..))
22282251
} else if let Expr::QualifiedWildcard(prefix) = expr {
2229-
Ok(SelectItem::QualifiedWildcard(ObjectName(prefix)))
2252+
ret!(Ok(SelectItem::QualifiedWildcard(ObjectName(prefix)))
2253+
=> via self.complete(m, SK::SELECT_ITEM_QWILDCARD, ..))
22302254
} else {
22312255
// `expr` is a regular SQL expression and can be followed by an alias
22322256
if let Some(alias) = self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? {
2233-
Ok(SelectItem::ExprWithAlias { expr, alias })
2257+
ret!(Ok(SelectItem::ExprWithAlias { expr, alias })
2258+
=> via self.complete(m, SK::SELECT_ITEM_EXPR_WITH_ALIAS, ..))
22342259
} else {
2235-
Ok(SelectItem::UnnamedExpr(expr))
2260+
ret!(Ok(SelectItem::UnnamedExpr(expr))
2261+
=> via self.complete(m, SK::SELECT_ITEM_UNNAMED, ..))
22362262
}
22372263
}
22382264
}

0 commit comments

Comments
 (0)