Skip to content

Commit 790e5dd

Browse files
committed
Refactor: Don't use external scanner to parse text tokens
1 parent f08e7d4 commit 790e5dd

17 files changed

+798
-284
lines changed

Cargo.lock

+59
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
[package]
22
name = "tree-sitter-comment"
3-
description = "comment grammar for the tree-sitter parsing library"
4-
version = "0.0.1"
3+
description = "Grammar for code tags like TODO:, FIXME(user): for the tree-sitter parsing library"
4+
version = "0.1.0"
55
keywords = ["incremental", "parsing", "comment"]
66
categories = ["parsing", "text-editors"]
77
repository = "https://github.com/stsewd/tree-sitter-comment"
8+
homepage = "https://stsewd.dev/tree-sitter-comment/"
89
edition = "2018"
10+
license = "MIT"
11+
readme = "README.md"
912

1013
build = "bindings/rust/build.rs"
1114
include = [
@@ -19,7 +22,7 @@ include = [
1922
path = "bindings/rust/lib.rs"
2023

2124
[dependencies]
22-
tree-sitter = "0.19.2"
25+
tree-sitter = "0.20"
2326

2427
[build-dependencies]
2528
cc = "1.0"

bindings/rust/build.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ fn main() {
22
let src_dir = std::path::Path::new("src");
33

44
let mut c_config = cc::Build::new();
5-
c_config.include(&src_dir);
5+
c_config.include(src_dir);
66
c_config
77
.flag_if_supported("-Wno-unused-parameter")
88
.flag_if_supported("-Wno-unused-but-set-variable")
@@ -13,11 +13,9 @@ fn main() {
1313
// If your language uses an external scanner written in C,
1414
// then include this block of code:
1515

16-
/*
1716
let scanner_path = src_dir.join("scanner.c");
1817
c_config.file(&scanner_path);
1918
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
20-
*/
2119

2220
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
2321
c_config.compile("parser");

bindings/rust/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//! ```
77
//! let code = "";
88
//! let mut parser = tree_sitter::Parser::new();
9-
//! parser.set_language(tree_sitter_javascript::language()).expect("Error loading comment grammar");
9+
//! parser.set_language(tree_sitter_comment::language()).expect("Error loading comment grammar");
1010
//! let tree = parser.parse(code, None).unwrap();
1111
//! ```
1212
//!

docs/js/tree-sitter-comment.wasm

1.06 KB
Binary file not shown.

grammar.js

+53-17
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,76 @@
1-
const WHITE_SPACE = choice(" ", "\t", "\v", "\f");
21
const NEWLINE = /\r?\n/;
2+
const STOP_CHARS = [
3+
"/",
4+
"'",
5+
'"',
6+
"<",
7+
"(",
8+
"[",
9+
"{",
10+
".",
11+
",",
12+
":",
13+
";",
14+
"!",
15+
"?",
16+
"\\",
17+
"}",
18+
"]",
19+
")",
20+
">",
21+
// This must be last, so that it isn't interpreted as a range.
22+
"-",
23+
];
324

425
module.exports = grammar({
526
name: "comment",
627

7-
externals: $ => [
8-
$.name,
9-
$._text,
10-
],
28+
externals: ($) => [$.name, $.invalid_token],
1129

12-
extras: $ => [
13-
$.__newline,
14-
$.__whitespace,
15-
],
30+
extras: ($) => [$.__newline, /\s/],
1631

1732
rules: {
18-
source: $ => repeat(
33+
source: ($) => repeat(
1934
choice(
2035
$.tag,
2136
alias($._text, "text"),
2237
),
2338
),
2439

25-
tag: $ => seq(
40+
tag: ($) => seq(
2641
$.name,
2742
optional($._user),
28-
':',
43+
":",
2944
),
3045

31-
_user: $ => seq(
32-
'(',
46+
_user: ($) => seq(
47+
"(",
3348
alias(/[^()]+/, $.user),
34-
')',
49+
")",
3550
),
3651

37-
__newline: $ => NEWLINE,
38-
__whitespace: $ => token(WHITE_SPACE),
52+
_text: ($) => choice($._stop_char, notmatching(STOP_CHARS)),
53+
_stop_char: ($) => choice(...STOP_CHARS),
54+
55+
// HACK: for some reason this needs be assigned to a token,
56+
// otherwise isn't recognized as an extra.
57+
__newline: ($) => NEWLINE,
3958
},
4059
});
60+
61+
/**
62+
* Match any characters that aren't whitespace or that aren't in the given list.
63+
*/
64+
function notmatching(chars) {
65+
chars = chars.join("");
66+
return new RegExp(`[^\r\n\\s${escapeRegExp(chars)}]+`);
67+
}
68+
69+
/**
70+
* Escape a string for use in a regular expression.
71+
*
72+
* Taken from https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping.
73+
*/
74+
function escapeRegExp(string) {
75+
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
76+
}

package-lock.json

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "tree-sitter-comment",
3-
"version": "0.0.1",
4-
"description": "Grammar for code tags like TODO:, FIXME(user):, etc.",
3+
"version": "0.1.0",
4+
"description": "Grammar for code tags like TODO:, FIXME(user): for the tree-sitter parsing library",
55
"main": "bindings/node",
66
"scripts": {
77
"test": "tree-sitter test",

src/grammar.json

+97-27
Original file line numberDiff line numberDiff line change
@@ -69,33 +69,103 @@
6969
}
7070
]
7171
},
72+
"_text": {
73+
"type": "CHOICE",
74+
"members": [
75+
{
76+
"type": "SYMBOL",
77+
"name": "_stop_char"
78+
},
79+
{
80+
"type": "PATTERN",
81+
"value": "[^\\r\\n\\s/'\"<\\(\\[\\{\\.,:;!\\?\\\\\\}\\]\\)>-]+"
82+
}
83+
]
84+
},
85+
"_stop_char": {
86+
"type": "CHOICE",
87+
"members": [
88+
{
89+
"type": "STRING",
90+
"value": "/"
91+
},
92+
{
93+
"type": "STRING",
94+
"value": "'"
95+
},
96+
{
97+
"type": "STRING",
98+
"value": "\""
99+
},
100+
{
101+
"type": "STRING",
102+
"value": "<"
103+
},
104+
{
105+
"type": "STRING",
106+
"value": "("
107+
},
108+
{
109+
"type": "STRING",
110+
"value": "["
111+
},
112+
{
113+
"type": "STRING",
114+
"value": "{"
115+
},
116+
{
117+
"type": "STRING",
118+
"value": "."
119+
},
120+
{
121+
"type": "STRING",
122+
"value": ","
123+
},
124+
{
125+
"type": "STRING",
126+
"value": ":"
127+
},
128+
{
129+
"type": "STRING",
130+
"value": ";"
131+
},
132+
{
133+
"type": "STRING",
134+
"value": "!"
135+
},
136+
{
137+
"type": "STRING",
138+
"value": "?"
139+
},
140+
{
141+
"type": "STRING",
142+
"value": "\\"
143+
},
144+
{
145+
"type": "STRING",
146+
"value": "}"
147+
},
148+
{
149+
"type": "STRING",
150+
"value": "]"
151+
},
152+
{
153+
"type": "STRING",
154+
"value": ")"
155+
},
156+
{
157+
"type": "STRING",
158+
"value": ">"
159+
},
160+
{
161+
"type": "STRING",
162+
"value": "-"
163+
}
164+
]
165+
},
72166
"__newline": {
73167
"type": "PATTERN",
74168
"value": "\\r?\\n"
75-
},
76-
"__whitespace": {
77-
"type": "TOKEN",
78-
"content": {
79-
"type": "CHOICE",
80-
"members": [
81-
{
82-
"type": "STRING",
83-
"value": " "
84-
},
85-
{
86-
"type": "STRING",
87-
"value": "\t"
88-
},
89-
{
90-
"type": "STRING",
91-
"value": "\u000b"
92-
},
93-
{
94-
"type": "STRING",
95-
"value": "\f"
96-
}
97-
]
98-
}
99169
}
100170
},
101171
"extras": [
@@ -104,8 +174,8 @@
104174
"name": "__newline"
105175
},
106176
{
107-
"type": "SYMBOL",
108-
"name": "__whitespace"
177+
"type": "PATTERN",
178+
"value": "\\s"
109179
}
110180
],
111181
"conflicts": [],
@@ -117,7 +187,7 @@
117187
},
118188
{
119189
"type": "SYMBOL",
120-
"name": "_text"
190+
"name": "invalid_token"
121191
}
122192
],
123193
"inline": [],

0 commit comments

Comments
 (0)