|
| 1 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 2 | +// you may not use this file except in compliance with the License. |
| 3 | +// You may obtain a copy of the License at |
| 4 | +// |
| 5 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 6 | +// |
| 7 | +// Unless required by applicable law or agreed to in writing, software |
| 8 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 9 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 10 | +// See the License for the specific language governing permissions and |
| 11 | +// limitations under the License. |
| 12 | + |
| 13 | +//! Concrete syntax (green) tree builder |
| 14 | +//! |
| 15 | +//! Based on the code from rowan: |
| 16 | +//! https://github.com/rust-analyzer/rowan/blob/v0.10.0/src/green/builder.rs |
| 17 | +//! |
| 18 | +//! The deviations are marked with `CHANGED(sqlparser)`. |
| 19 | +
|
| 20 | +// CHANGED(sqlparser): parts of the imported code may be unused |
| 21 | +#![allow(dead_code)] |
| 22 | + |
| 23 | +use rowan::{GreenNode, GreenToken, NodeOrToken, SmolStr, SyntaxKind}; |
| 24 | + |
| 25 | +// CHANGED(sqlparser): Use HashSet from std instead of FxHashSet to avoid the |
| 26 | +// extra dependency |
| 27 | +use std::collections::HashSet; |
| 28 | +// CHANGED(sqlparser): Redefine `GreenElement`, as it's not public in rowan |
| 29 | +pub type GreenElement = NodeOrToken<GreenNode, GreenToken>; |
| 30 | + |
| 31 | +#[derive(Default, Debug)] |
| 32 | +pub struct NodeCache { |
| 33 | + nodes: HashSet<GreenNode>, |
| 34 | + tokens: HashSet<GreenToken>, |
| 35 | +} |
| 36 | + |
| 37 | +impl NodeCache { |
| 38 | + fn node<I>(&mut self, kind: SyntaxKind, children: I) -> GreenNode |
| 39 | + where |
| 40 | + I: IntoIterator<Item = GreenElement>, |
| 41 | + I::IntoIter: ExactSizeIterator, |
| 42 | + { |
| 43 | + let mut node = GreenNode::new(kind, children); |
| 44 | + // Green nodes are fully immutable, so it's ok to deduplicate them. |
| 45 | + // This is the same optimization that Roslyn does |
| 46 | + // https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees |
| 47 | + // |
| 48 | + // For example, all `#[inline]` in this file share the same green node! |
| 49 | + // For `libsyntax/parse/parser.rs`, measurements show that deduping saves |
| 50 | + // 17% of the memory for green nodes! |
| 51 | + // Future work: make hashing faster by avoiding rehashing of subtrees. |
| 52 | + if node.children().len() <= 3 { |
| 53 | + match self.nodes.get(&node) { |
| 54 | + Some(existing) => node = existing.clone(), |
| 55 | + None => assert!(self.nodes.insert(node.clone())), |
| 56 | + } |
| 57 | + } |
| 58 | + node |
| 59 | + } |
| 60 | + |
| 61 | + fn token(&mut self, kind: SyntaxKind, text: SmolStr) -> GreenToken { |
| 62 | + let mut token = GreenToken::new(kind, text); |
| 63 | + match self.tokens.get(&token) { |
| 64 | + Some(existing) => token = existing.clone(), |
| 65 | + None => assert!(self.tokens.insert(token.clone())), |
| 66 | + } |
| 67 | + token |
| 68 | + } |
| 69 | +} |
| 70 | + |
| 71 | +#[derive(Debug)] |
| 72 | +enum MaybeOwned<'a, T> { |
| 73 | + Owned(T), |
| 74 | + Borrowed(&'a mut T), |
| 75 | +} |
| 76 | + |
| 77 | +impl<T> std::ops::Deref for MaybeOwned<'_, T> { |
| 78 | + type Target = T; |
| 79 | + fn deref(&self) -> &T { |
| 80 | + match self { |
| 81 | + MaybeOwned::Owned(it) => it, |
| 82 | + MaybeOwned::Borrowed(it) => *it, |
| 83 | + } |
| 84 | + } |
| 85 | +} |
| 86 | + |
| 87 | +impl<T> std::ops::DerefMut for MaybeOwned<'_, T> { |
| 88 | + fn deref_mut(&mut self) -> &mut T { |
| 89 | + match self { |
| 90 | + MaybeOwned::Owned(it) => it, |
| 91 | + MaybeOwned::Borrowed(it) => *it, |
| 92 | + } |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +impl<T: Default> Default for MaybeOwned<'_, T> { |
| 97 | + fn default() -> Self { |
| 98 | + MaybeOwned::Owned(T::default()) |
| 99 | + } |
| 100 | +} |
| 101 | + |
| 102 | +/// A checkpoint for maybe wrapping a node. See `GreenNodeBuilder::checkpoint` for details. |
| 103 | +#[derive(Clone, Copy, Debug)] |
| 104 | +pub struct Checkpoint(usize); |
| 105 | + |
| 106 | +/// A builder for a green tree. |
| 107 | +#[derive(Default, Debug)] |
| 108 | +pub struct GreenNodeBuilder<'cache> { |
| 109 | + cache: MaybeOwned<'cache, NodeCache>, |
| 110 | + parents: Vec<(SyntaxKind, usize)>, |
| 111 | + children: Vec<GreenElement>, |
| 112 | +} |
| 113 | + |
| 114 | +impl GreenNodeBuilder<'_> { |
| 115 | + /// Creates new builder. |
| 116 | + pub fn new() -> GreenNodeBuilder<'static> { |
| 117 | + GreenNodeBuilder::default() |
| 118 | + } |
| 119 | + |
| 120 | + /// Reusing `NodeCache` between different `GreenNodeBuilder`s saves memory. |
| 121 | + /// It allows to structurally share underlying trees. |
| 122 | + pub fn with_cache(cache: &mut NodeCache) -> GreenNodeBuilder<'_> { |
| 123 | + GreenNodeBuilder { |
| 124 | + cache: MaybeOwned::Borrowed(cache), |
| 125 | + parents: Vec::new(), |
| 126 | + children: Vec::new(), |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + /// Adds new token to the current branch. |
| 131 | + #[inline] |
| 132 | + pub fn token(&mut self, kind: SyntaxKind, text: SmolStr) { |
| 133 | + let token = self.cache.token(kind, text); |
| 134 | + self.children.push(token.into()); |
| 135 | + } |
| 136 | + |
| 137 | + /// Start new node and make it current. |
| 138 | + #[inline] |
| 139 | + pub fn start_node(&mut self, kind: SyntaxKind) { |
| 140 | + let len = self.children.len(); |
| 141 | + self.parents.push((kind, len)); |
| 142 | + } |
| 143 | + |
| 144 | + /// Finish current branch and restore previous |
| 145 | + /// branch as current. |
| 146 | + #[inline] |
| 147 | + pub fn finish_node(&mut self) { |
| 148 | + let (kind, first_child) = self.parents.pop().unwrap(); |
| 149 | + let children = self.children.drain(first_child..); |
| 150 | + let node = self.cache.node(kind, children); |
| 151 | + self.children.push(node.into()); |
| 152 | + } |
| 153 | + |
| 154 | + /// Prepare for maybe wrapping the next node. |
| 155 | + /// The way wrapping works is that you first of all get a checkpoint, |
| 156 | + /// then you place all tokens you want to wrap, and then *maybe* call |
| 157 | + /// `start_node_at`. |
| 158 | + /// Example: |
| 159 | + /// ```rust |
| 160 | + /// # use rowan::{GreenNodeBuilder, SyntaxKind}; |
| 161 | + /// # const PLUS: SyntaxKind = SyntaxKind(0); |
| 162 | + /// # const OPERATION: SyntaxKind = SyntaxKind(1); |
| 163 | + /// # struct Parser; |
| 164 | + /// # impl Parser { |
| 165 | + /// # fn peek(&self) -> Option<SyntaxKind> { None } |
| 166 | + /// # fn parse_expr(&mut self) {} |
| 167 | + /// # } |
| 168 | + /// # let mut builder = GreenNodeBuilder::new(); |
| 169 | + /// # let mut parser = Parser; |
| 170 | + /// let checkpoint = builder.checkpoint(); |
| 171 | + /// parser.parse_expr(); |
| 172 | + /// if parser.peek() == Some(PLUS) { |
| 173 | + /// // 1 + 2 = Add(1, 2) |
| 174 | + /// builder.start_node_at(checkpoint, OPERATION); |
| 175 | + /// parser.parse_expr(); |
| 176 | + /// builder.finish_node(); |
| 177 | + /// } |
| 178 | + /// ``` |
| 179 | + #[inline] |
| 180 | + pub fn checkpoint(&self) -> Checkpoint { |
| 181 | + Checkpoint(self.children.len()) |
| 182 | + } |
| 183 | + |
| 184 | + /// Wrap the previous branch marked by `checkpoint` in a new branch and |
| 185 | + /// make it current. |
| 186 | + #[inline] |
| 187 | + pub fn start_node_at(&mut self, checkpoint: Checkpoint, kind: SyntaxKind) { |
| 188 | + let Checkpoint(checkpoint) = checkpoint; |
| 189 | + assert!( |
| 190 | + checkpoint <= self.children.len(), |
| 191 | + "checkpoint no longer valid, was finish_node called early?" |
| 192 | + ); |
| 193 | + |
| 194 | + if let Some(&(_, first_child)) = self.parents.last() { |
| 195 | + assert!( |
| 196 | + checkpoint >= first_child, |
| 197 | + "checkpoint no longer valid, was an unmatched start_node_at called?" |
| 198 | + ); |
| 199 | + } |
| 200 | + |
| 201 | + self.parents.push((kind, checkpoint)); |
| 202 | + } |
| 203 | + |
| 204 | + /// Complete tree building. Make sure that |
| 205 | + /// `start_node_at` and `finish_node` calls |
| 206 | + /// are paired! |
| 207 | + #[inline] |
| 208 | + pub fn finish(mut self) -> GreenNode { |
| 209 | + assert_eq!(self.children.len(), 1); |
| 210 | + match self.children.pop().unwrap() { |
| 211 | + NodeOrToken::Node(node) => node, |
| 212 | + NodeOrToken::Token(_) => panic!(), |
| 213 | + } |
| 214 | + } |
| 215 | +} |
0 commit comments