Skip to content

Commit c261afc

Browse files
committed
feat: Parsing SQL with missing sample values
1 parent 0810819 commit c261afc

23 files changed

+170
-1193
lines changed

build_wasm.sh

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22

33
set -eux
44

5-
cd crates/postgresql-cst-parser-wasm
6-
rm -rf ./pkg
7-
wasm-pack build --release --target web
8-
cp pkg/*.js pkg/*.ts pkg/*.wasm ../../docs/js
9-
cd ../../docs
5+
cd demo
6+
npm run copy-wasm
7+
npm run build
8+
cd ../docs
109
python3 -m http.server 8000

crates/postgresql-cst-parser/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,7 @@ license-file = "../../LICENSE"
1313
regex = "1.10.2"
1414
cstree = { version = "0.12.0", features = ["derive"] }
1515
miniz_oxide = "0.7.1"
16+
17+
[features]
18+
default = ["remove-empty-node"]
19+
remove-empty-node = []

crates/postgresql-cst-parser/src/cst.rs

Lines changed: 73 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::collections::{HashMap, HashSet};
2+
13
use cstree::{
24
build::GreenNodeBuilder, green::GreenNode, interning::Resolver, RawSyntaxKind, Syntax,
35
};
@@ -56,7 +58,16 @@ impl Parser {
5658
&mut self,
5759
node: &Node,
5860
peekable: &mut std::iter::Peekable<std::vec::IntoIter<(SyntaxKind, usize, usize, &str)>>,
61+
complement_node_or_token: &HashSet<usize>,
5962
) {
63+
if cfg!(feature = "remove-empty-node") {
64+
if node.start_byte_pos == node.end_byte_pos
65+
&& !complement_node_or_token.contains(&node.start_byte_pos)
66+
{
67+
return;
68+
}
69+
}
70+
6071
while let Some((kind, start, _, text)) = peekable.peek() {
6172
// TODO: Consider whether the presence or absence of an equals sign changes the position of comments. Determine which option is preferable
6273
if *start >= node.start_byte_pos {
@@ -74,7 +85,7 @@ impl Parser {
7485
self.builder.start_node(kind);
7586
node.children
7687
.iter()
77-
.for_each(|c| self.parse_rec(c, peekable));
88+
.for_each(|c| self.parse_rec(c, peekable, complement_node_or_token));
7889
self.builder.finish_node();
7990
}
8091
}
@@ -83,13 +94,14 @@ impl Parser {
8394
mut self,
8495
nodes: &Vec<&Node>,
8596
extras: Vec<(SyntaxKind, usize, usize, &str)>,
97+
complement_node_or_token: &HashSet<usize>,
8698
) -> (GreenNode, impl Resolver) {
8799
let mut peekable = extras.into_iter().peekable();
88100

89101
self.builder.start_node(SyntaxKind::Root);
90102

91103
for node in nodes {
92-
self.parse_rec(node, &mut peekable);
104+
self.parse_rec(node, &mut peekable, complement_node_or_token);
93105
}
94106

95107
while let Some((kind, _, _, text)) = peekable.peek() {
@@ -177,30 +189,54 @@ fn init_tokens(tokens: &mut [Token]) {
177189
}
178190

179191
#[inline]
180-
fn is_bind_variable_comment(s: impl AsRef<str>) -> bool {
192+
fn is_replacement_value_comment(s: impl AsRef<str>) -> bool {
181193
let s = s.as_ref();
182-
s.starts_with("/*") && s.ends_with("*/") && !s.contains('\n')
194+
s.starts_with("/*#") && s.ends_with("*/") && !s.contains('\n')
183195
}
184196

185197
#[inline]
186-
fn is_missing_bind_variable(
198+
fn is_missing_from_replacement_value(
199+
stack: &[(u32, Node)],
187200
extras: &[(SyntaxKind, usize, usize, &str)],
188201
action_table: &[i16],
189202
state: u32,
190203
) -> bool {
191204
match extras.last() {
192-
Some((_, _, _, s)) => {
193-
dbg!(s, is_bind_variable_comment(s));
205+
Some((_, _, _, s))
206+
if is_replacement_value_comment(s)
207+
&& stack.last().unwrap().1.component_id == SyntaxKind::FROM as u32 =>
208+
{
209+
let action_index =
210+
(state * num_terminal_symbol()) as usize + SyntaxKind::IDENT as usize;
211+
212+
let a = action_table[action_index];
213+
a != 0x7FFF
194214
}
195-
_ => (),
215+
_ => false,
196216
}
217+
}
218+
219+
#[inline]
220+
fn is_bind_variable_comment(s: impl AsRef<str>) -> bool {
221+
let s = s.as_ref();
222+
s.starts_with("/*")
223+
&& s.ends_with("*/")
224+
&& !s.contains('\n')
225+
&& !matches!(s.chars().nth(2).unwrap(), '$' | '#')
226+
}
227+
228+
#[inline]
229+
fn is_missing_bind_variable(
230+
extras: &[(SyntaxKind, usize, usize, &str)],
231+
action_table: &[i16],
232+
state: u32,
233+
) -> bool {
197234
match extras.last() {
198235
Some((_, _, _, s)) if is_bind_variable_comment(s) => {
199236
let action_index =
200237
(state * num_terminal_symbol()) as usize + SyntaxKind::SCONST as usize;
201238

202239
let a = action_table[action_index];
203-
dbg!(a);
204240
a != 0x7FFF
205241
}
206242
_ => false,
@@ -266,6 +302,7 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
266302

267303
let mut last_pos = 0;
268304
let mut extras: Vec<(SyntaxKind, usize, usize, &str)> = Vec::new();
305+
let mut complement_node_or_token = HashSet::new();
269306

270307
loop {
271308
let state = stack.last().unwrap().0;
@@ -313,44 +350,38 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
313350
v if v < 0 => Action::Reduce((-v - 1) as usize),
314351
_ => Action::Accept,
315352
};
316-
// dbg!(&action);
317-
318-
dbg!(
319-
&token.value,
320-
is_missing_bind_variable(&extras, action_table, state),
321-
);
322-
// if action == Action::Error {
323-
// dbg!(
324-
// token.start_byte_pos,
325-
// &token.value,
326-
// is_missing_bind_variable(&extras, action_table, state),
327-
// extras.last()
328-
// );
329-
if is_missing_bind_variable(&extras, action_table, state) {
330-
let last_extra = extras.last().unwrap();
331-
if token.start_byte_pos != last_extra.2 {
353+
354+
if Some(token.start_byte_pos) != extras.last().map(|e| e.2) {
355+
if is_missing_from_replacement_value(&stack, &extras, action_table, state) {
356+
let last_extra = extras.last().unwrap();
357+
token = Token {
358+
start_byte_pos: last_extra.2,
359+
end_byte_pos: last_extra.2,
360+
kind: TokenKind::IDENT, // とりあえず識別子としておく
361+
value: String::new(),
362+
};
363+
cid = SyntaxKind::IDENT as u32;
364+
complement_node_or_token.insert(token.start_byte_pos);
365+
366+
action = match action_table[(state * num_terminal_symbol() + cid) as usize] {
367+
0x7FFF => Action::Error,
368+
v if v > 0 => Action::Shift((v - 1) as usize),
369+
v if v < 0 => Action::Reduce((-v - 1) as usize),
370+
_ => Action::Accept,
371+
};
372+
insert_dummy_token = true;
373+
}
374+
375+
if is_missing_bind_variable(&extras, action_table, state) {
376+
let last_extra = extras.last().unwrap();
332377
token = Token {
333378
start_byte_pos: last_extra.2,
334379
end_byte_pos: last_extra.2,
335380
kind: TokenKind::SCONST, // とりあえず文字列としておく
336381
value: String::new(),
337382
};
338383
cid = SyntaxKind::SCONST as u32;
339-
340-
// let node = Node {
341-
// token: Some(Token {
342-
// start_byte_pos: token.start_byte_pos,
343-
// end_byte_pos: token.start_byte_pos,
344-
// kind: TokenKind::SCONST, // とりあえず文字列としておく
345-
// value: String::new(),
346-
// }),
347-
// component_id: SyntaxKind::SCONST as u32,
348-
// children: Vec::new(),
349-
// start_byte_pos: token.start_byte_pos,
350-
// end_byte_pos: token.end_byte_pos,
351-
// };
352-
353-
// stack.push((next_state as u32, node));
384+
complement_node_or_token.insert(token.start_byte_pos);
354385

355386
action = match action_table[(state * num_terminal_symbol() + cid) as usize] {
356387
0x7FFF => Action::Error,
@@ -361,8 +392,6 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
361392
insert_dummy_token = true;
362393
}
363394
}
364-
// dbg!(&action, &token);
365-
// }
366395

367396
match action {
368397
Action::Shift(next_state) => {
@@ -494,7 +523,7 @@ pub fn parse(input: &str) -> Result<ResolvedNode, ParseError> {
494523
builder: GreenNodeBuilder::new(),
495524
};
496525
let root: Vec<&Node> = stack[1..].iter().map(|s| &s.1).collect();
497-
let (ast, resolver) = parser.parse(&root, extras);
526+
let (ast, resolver) = parser.parse(&root, extras, &complement_node_or_token);
498527

499528
Ok(SyntaxNode::new_root_with_resolver(ast, resolver))
500529
}
@@ -512,7 +541,7 @@ select
512541
, /*fuga*/ * 1
513542
, /*fuga*/ || 'hoge'
514543
from
515-
tbl t
544+
/*#tbl*/ t
516545
where
517546
/*val*/ = 1;
518547
"#;

0 commit comments

Comments
 (0)