Skip to content

Commit b5fbe42

Browse files
committed
Support parser backtracking in the GreenNodeBuilder, part 1
Copy GreenNodeBuilder from rowan with the minimal changes to make it work as part of our crate.
1 parent 07ed839 commit b5fbe42

File tree

3 files changed

+220
-3
lines changed

3 files changed

+220
-3
lines changed

src/builder.rs

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
// Licensed under the Apache License, Version 2.0 (the "License");
2+
// you may not use this file except in compliance with the License.
3+
// You may obtain a copy of the License at
4+
//
5+
// http://www.apache.org/licenses/LICENSE-2.0
6+
//
7+
// Unless required by applicable law or agreed to in writing, software
8+
// distributed under the License is distributed on an "AS IS" BASIS,
9+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
// See the License for the specific language governing permissions and
11+
// limitations under the License.
12+
13+
//! Concrete syntax (green) tree builder
14+
//!
15+
//! Based on the code from rowan:
16+
//! https://github.com/rust-analyzer/rowan/blob/v0.10.0/src/green/builder.rs
17+
//!
18+
//! The deviations are marked with `CHANGED(sqlparser)`.
19+
20+
// CHANGED(sqlparser): parts of the imported code may be unused
21+
#![allow(dead_code)]
22+
23+
use rowan::{GreenNode, GreenToken, NodeOrToken, SmolStr, SyntaxKind};
24+
25+
// CHANGED(sqlparser): Use HashSet from std instead of FxHashSet to avoid the
26+
// extra dependency
27+
use std::collections::HashSet;
28+
// CHANGED(sqlparser): Redefine `GreenElement`, as it's not public in rowan
29+
pub type GreenElement = NodeOrToken<GreenNode, GreenToken>;
30+
31+
#[derive(Default, Debug)]
32+
pub struct NodeCache {
33+
nodes: HashSet<GreenNode>,
34+
tokens: HashSet<GreenToken>,
35+
}
36+
37+
impl NodeCache {
38+
fn node<I>(&mut self, kind: SyntaxKind, children: I) -> GreenNode
39+
where
40+
I: IntoIterator<Item = GreenElement>,
41+
I::IntoIter: ExactSizeIterator,
42+
{
43+
let mut node = GreenNode::new(kind, children);
44+
// Green nodes are fully immutable, so it's ok to deduplicate them.
45+
// This is the same optimization that Roslyn does
46+
// https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees
47+
//
48+
// For example, all `#[inline]` in this file share the same green node!
49+
// For `libsyntax/parse/parser.rs`, measurements show that deduping saves
50+
// 17% of the memory for green nodes!
51+
// Future work: make hashing faster by avoiding rehashing of subtrees.
52+
if node.children().len() <= 3 {
53+
match self.nodes.get(&node) {
54+
Some(existing) => node = existing.clone(),
55+
None => assert!(self.nodes.insert(node.clone())),
56+
}
57+
}
58+
node
59+
}
60+
61+
fn token(&mut self, kind: SyntaxKind, text: SmolStr) -> GreenToken {
62+
let mut token = GreenToken::new(kind, text);
63+
match self.tokens.get(&token) {
64+
Some(existing) => token = existing.clone(),
65+
None => assert!(self.tokens.insert(token.clone())),
66+
}
67+
token
68+
}
69+
}
70+
71+
#[derive(Debug)]
72+
enum MaybeOwned<'a, T> {
73+
Owned(T),
74+
Borrowed(&'a mut T),
75+
}
76+
77+
impl<T> std::ops::Deref for MaybeOwned<'_, T> {
78+
type Target = T;
79+
fn deref(&self) -> &T {
80+
match self {
81+
MaybeOwned::Owned(it) => it,
82+
MaybeOwned::Borrowed(it) => *it,
83+
}
84+
}
85+
}
86+
87+
impl<T> std::ops::DerefMut for MaybeOwned<'_, T> {
88+
fn deref_mut(&mut self) -> &mut T {
89+
match self {
90+
MaybeOwned::Owned(it) => it,
91+
MaybeOwned::Borrowed(it) => *it,
92+
}
93+
}
94+
}
95+
96+
impl<T: Default> Default for MaybeOwned<'_, T> {
97+
fn default() -> Self {
98+
MaybeOwned::Owned(T::default())
99+
}
100+
}
101+
102+
/// A checkpoint for maybe wrapping a node. See `GreenNodeBuilder::checkpoint` for details.
103+
#[derive(Clone, Copy, Debug)]
104+
pub struct Checkpoint(usize);
105+
106+
/// A builder for a green tree.
107+
#[derive(Default, Debug)]
108+
pub struct GreenNodeBuilder<'cache> {
109+
cache: MaybeOwned<'cache, NodeCache>,
110+
parents: Vec<(SyntaxKind, usize)>,
111+
children: Vec<GreenElement>,
112+
}
113+
114+
impl GreenNodeBuilder<'_> {
115+
/// Creates new builder.
116+
pub fn new() -> GreenNodeBuilder<'static> {
117+
GreenNodeBuilder::default()
118+
}
119+
120+
/// Reusing `NodeCache` between different `GreenNodeBuilder`s saves memory.
121+
/// It allows to structurally share underlying trees.
122+
pub fn with_cache(cache: &mut NodeCache) -> GreenNodeBuilder<'_> {
123+
GreenNodeBuilder {
124+
cache: MaybeOwned::Borrowed(cache),
125+
parents: Vec::new(),
126+
children: Vec::new(),
127+
}
128+
}
129+
130+
/// Adds new token to the current branch.
131+
#[inline]
132+
pub fn token(&mut self, kind: SyntaxKind, text: SmolStr) {
133+
let token = self.cache.token(kind, text);
134+
self.children.push(token.into());
135+
}
136+
137+
/// Start new node and make it current.
138+
#[inline]
139+
pub fn start_node(&mut self, kind: SyntaxKind) {
140+
let len = self.children.len();
141+
self.parents.push((kind, len));
142+
}
143+
144+
/// Finish current branch and restore previous
145+
/// branch as current.
146+
#[inline]
147+
pub fn finish_node(&mut self) {
148+
let (kind, first_child) = self.parents.pop().unwrap();
149+
let children = self.children.drain(first_child..);
150+
let node = self.cache.node(kind, children);
151+
self.children.push(node.into());
152+
}
153+
154+
/// Prepare for maybe wrapping the next node.
155+
/// The way wrapping works is that you first of all get a checkpoint,
156+
/// then you place all tokens you want to wrap, and then *maybe* call
157+
/// `start_node_at`.
158+
/// Example:
159+
/// ```rust
160+
/// # use rowan::{GreenNodeBuilder, SyntaxKind};
161+
/// # const PLUS: SyntaxKind = SyntaxKind(0);
162+
/// # const OPERATION: SyntaxKind = SyntaxKind(1);
163+
/// # struct Parser;
164+
/// # impl Parser {
165+
/// # fn peek(&self) -> Option<SyntaxKind> { None }
166+
/// # fn parse_expr(&mut self) {}
167+
/// # }
168+
/// # let mut builder = GreenNodeBuilder::new();
169+
/// # let mut parser = Parser;
170+
/// let checkpoint = builder.checkpoint();
171+
/// parser.parse_expr();
172+
/// if parser.peek() == Some(PLUS) {
173+
/// // 1 + 2 = Add(1, 2)
174+
/// builder.start_node_at(checkpoint, OPERATION);
175+
/// parser.parse_expr();
176+
/// builder.finish_node();
177+
/// }
178+
/// ```
179+
#[inline]
180+
pub fn checkpoint(&self) -> Checkpoint {
181+
Checkpoint(self.children.len())
182+
}
183+
184+
/// Wrap the previous branch marked by `checkpoint` in a new branch and
185+
/// make it current.
186+
#[inline]
187+
pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) {
188+
let Checkpoint(checkpoint) = checkpoint;
189+
assert!(
190+
checkpoint <= self.children.len(),
191+
"checkpoint no longer valid, was finish_node called early?"
192+
);
193+
194+
if let Some(&(_, first_child)) = self.parents.last() {
195+
assert!(
196+
checkpoint >= first_child,
197+
"checkpoint no longer valid, was an unmatched start_node_at called?"
198+
);
199+
}
200+
201+
self.parents.push((kind, checkpoint));
202+
}
203+
204+
/// Complete tree building. Make sure that
205+
/// `start_node_at` and `finish_node` calls
206+
/// are paired!
207+
#[inline]
208+
pub fn finish(mut self) -> GreenNode {
209+
assert_eq!(self.children.len(), 1);
210+
match self.children.pop().unwrap() {
211+
NodeOrToken::Node(node) => node,
212+
NodeOrToken::Token(_) => panic!(),
213+
}
214+
}
215+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#![warn(clippy::all)]
3636

3737
pub mod ast;
38+
mod builder;
3839
pub mod cst;
3940
pub mod dialect;
4041
pub mod parser;

src/parser.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use super::tokenizer::*;
2121
use std::error::Error;
2222
use std::fmt;
2323

24+
use crate::builder;
2425
use crate::{cst, cst::SyntaxKind as SK};
2526

2627
#[derive(Debug, Clone, PartialEq)]
@@ -40,7 +41,7 @@ macro_rules! parser_err {
4041
pub struct Marker {
4142
/// position in the token stream (`parser.index`)
4243
index: usize,
43-
builder_checkpoint: rowan::Checkpoint,
44+
builder_checkpoint: builder::Checkpoint,
4445
}
4546

4647
#[derive(PartialEq)]
@@ -82,7 +83,7 @@ pub struct Parser {
8283
tokens: Vec<Token>,
8384
/// The index of the first unprocessed token in `self.tokens`
8485
index: usize,
85-
builder: rowan::GreenNodeBuilder<'static>,
86+
builder: builder::GreenNodeBuilder<'static>,
8687

8788
// TBD: the parser currently provides an API to move around the token
8889
// stream without restrictions (`next_token`/`prev_token`), while the
@@ -114,7 +115,7 @@ impl Parser {
114115
let mut parser = Parser {
115116
tokens,
116117
index: 0,
117-
builder: rowan::GreenNodeBuilder::new(),
118+
builder: builder::GreenNodeBuilder::new(),
118119
pending: vec![],
119120
};
120121
parser.builder.start_node(SK::ROOT.into());

0 commit comments

Comments
 (0)