Skip to content

Commit fd1e102

Browse files
authored
Don't include dash (-) as end char (#27)
It's very common in URLs, and not really common as an end character in a sentence. This should be a little more smart, and will probably have more similar cases that need an actual fix, but this covers it for now... Closes #24
1 parent 94c99a6 commit fd1e102

File tree

5 files changed

+32
-69
lines changed

5 files changed

+32
-69
lines changed

docs/js/tree-sitter-comment.wasm

344 Bytes
Binary file not shown.

grammar.js

-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ const END_CHARS = [
1212
"]",
1313
")",
1414
">",
15-
// This must be last, so that it isn't interpreted as a range.
16-
"-",
1715
];
1816

1917
const STOP_CHARS = [

src/grammar.json

+1-5
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@
102102
},
103103
"uri": {
104104
"type": "PATTERN",
105-
"value": "https?:\\/\\/([^\\s\\.,:;!\\?\\\\'\"\\}\\]\\)>-]|[\\.,:;!\\?\\\\'\"\\}\\]\\)>-][a-zA-Z0-9]+)*[^\\s\\.,:;!\\?\\\\'\"\\}\\]\\)>-]"
105+
"value": "https?:\\/\\/([^\\s\\.,:;!\\?\\\\'\"\\}\\]\\)>]|[\\.,:;!\\?\\\\'\"\\}\\]\\)>][a-zA-Z0-9]+)*[^\\s\\.,:;!\\?\\\\'\"\\}\\]\\)>]"
106106
},
107107
"_text": {
108108
"type": "CHOICE",
@@ -252,10 +252,6 @@
252252
{
253253
"type": "STRING",
254254
"value": ">"
255-
},
256-
{
257-
"type": "STRING",
258-
"value": "-"
259255
}
260256
]
261257
}

src/parser.c

+31-62
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,20 @@ static const TSStateId ts_primary_state_ids[STATE_COUNT] = {
292292
[15] = 15,
293293
};
294294

295+
static inline bool sym_uri_character_set_1(int32_t c) {
296+
return (c < '.'
297+
? (c < ')'
298+
? (c < '\''
299+
? (c >= '!' && c <= '"')
300+
: c <= '\'')
301+
: (c <= ')' || c == ','))
302+
: (c <= '.' || (c < '\\'
303+
? (c < '>'
304+
? (c >= ':' && c <= ';')
305+
: c <= '?')
306+
: (c <= ']' || c == '}'))));
307+
}
308+
295309
static inline bool aux_sym__text_token1_character_set_1(int32_t c) {
296310
return (c < ','
297311
? (c < '\r'
@@ -376,18 +390,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
376390
lookahead != ')') ADVANCE(10);
377391
END_STATE();
378392
case 4:
379-
if (lookahead == '!' ||
380-
lookahead == '"' ||
381-
lookahead == '\'' ||
382-
lookahead == ')' ||
383-
(',' <= lookahead && lookahead <= '.') ||
384-
lookahead == ':' ||
385-
lookahead == ';' ||
386-
lookahead == '>' ||
387-
lookahead == '?' ||
388-
lookahead == '\\' ||
389-
lookahead == ']' ||
390-
lookahead == '}') ADVANCE(6);
393+
if (sym_uri_character_set_1(lookahead)) ADVANCE(6);
391394
if (('0' <= lookahead && lookahead <= '9') ||
392395
('A' <= lookahead && lookahead <= 'Z') ||
393396
('a' <= lookahead && lookahead <= 'z')) ADVANCE(13);
@@ -398,18 +401,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
398401
lookahead != ' ') ADVANCE(14);
399402
END_STATE();
400403
case 5:
401-
if (lookahead == '!' ||
402-
lookahead == '"' ||
403-
lookahead == '\'' ||
404-
lookahead == ')' ||
405-
(',' <= lookahead && lookahead <= '.') ||
406-
lookahead == ':' ||
407-
lookahead == ';' ||
408-
lookahead == '>' ||
409-
lookahead == '?' ||
410-
lookahead == '\\' ||
411-
lookahead == ']' ||
412-
lookahead == '}') ADVANCE(6);
404+
if (sym_uri_character_set_1(lookahead)) ADVANCE(6);
413405
if (lookahead != 0 &&
414406
lookahead != '\t' &&
415407
lookahead != '\n' &&
@@ -444,18 +436,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
444436
END_STATE();
445437
case 13:
446438
ACCEPT_TOKEN(sym_uri);
447-
if (lookahead == '!' ||
448-
lookahead == '"' ||
449-
lookahead == '\'' ||
450-
lookahead == ')' ||
451-
(',' <= lookahead && lookahead <= '.') ||
452-
lookahead == ':' ||
453-
lookahead == ';' ||
454-
lookahead == '>' ||
455-
lookahead == '?' ||
456-
lookahead == '\\' ||
457-
lookahead == ']' ||
458-
lookahead == '}') ADVANCE(6);
439+
if (sym_uri_character_set_1(lookahead)) ADVANCE(6);
459440
if (('0' <= lookahead && lookahead <= '9') ||
460441
('A' <= lookahead && lookahead <= 'Z') ||
461442
('a' <= lookahead && lookahead <= 'z')) ADVANCE(13);
@@ -467,18 +448,7 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) {
467448
END_STATE();
468449
case 14:
469450
ACCEPT_TOKEN(sym_uri);
470-
if (lookahead == '!' ||
471-
lookahead == '"' ||
472-
lookahead == '\'' ||
473-
lookahead == ')' ||
474-
(',' <= lookahead && lookahead <= '.') ||
475-
lookahead == ':' ||
476-
lookahead == ';' ||
477-
lookahead == '>' ||
478-
lookahead == '?' ||
479-
lookahead == '\\' ||
480-
lookahead == ']' ||
481-
lookahead == '}') ADVANCE(6);
451+
if (sym_uri_character_set_1(lookahead)) ADVANCE(6);
482452
if (lookahead != 0 &&
483453
lookahead != '\t' &&
484454
lookahead != '\n' &&
@@ -867,7 +837,7 @@ static const uint16_t ts_small_parse_table[] = {
867837
aux_sym__full_uri_token1,
868838
STATE(7), 1,
869839
sym__end_char,
870-
ACTIONS(51), 14,
840+
ACTIONS(51), 13,
871841
anon_sym_COLON,
872842
anon_sym_RPAREN,
873843
anon_sym_SQUOTE,
@@ -881,8 +851,7 @@ static const uint16_t ts_small_parse_table[] = {
881851
anon_sym_RBRACE,
882852
anon_sym_RBRACK,
883853
anon_sym_GT,
884-
anon_sym_DASH,
885-
[23] = 4,
854+
[22] = 4,
886855
ACTIONS(3), 1,
887856
aux_sym__full_uri_token1,
888857
ACTIONS(55), 1,
@@ -891,27 +860,27 @@ static const uint16_t ts_small_parse_table[] = {
891860
anon_sym_LPAREN,
892861
STATE(13), 1,
893862
sym__user,
894-
[36] = 2,
863+
[35] = 2,
895864
ACTIONS(3), 1,
896865
aux_sym__full_uri_token1,
897866
ACTIONS(59), 1,
898867
ts_builtin_sym_end,
899-
[43] = 2,
868+
[42] = 2,
900869
ACTIONS(61), 1,
901870
aux_sym__user_token1,
902871
ACTIONS(63), 1,
903872
aux_sym__full_uri_token1,
904-
[50] = 2,
873+
[49] = 2,
905874
ACTIONS(3), 1,
906875
aux_sym__full_uri_token1,
907876
ACTIONS(65), 1,
908877
anon_sym_COLON,
909-
[57] = 2,
878+
[56] = 2,
910879
ACTIONS(3), 1,
911880
aux_sym__full_uri_token1,
912881
ACTIONS(67), 1,
913882
anon_sym_RPAREN,
914-
[64] = 2,
883+
[63] = 2,
915884
ACTIONS(3), 1,
916885
aux_sym__full_uri_token1,
917886
ACTIONS(69), 1,
@@ -920,12 +889,12 @@ static const uint16_t ts_small_parse_table[] = {
920889

921890
static const uint32_t ts_small_parse_table_map[] = {
922891
[SMALL_STATE(9)] = 0,
923-
[SMALL_STATE(10)] = 23,
924-
[SMALL_STATE(11)] = 36,
925-
[SMALL_STATE(12)] = 43,
926-
[SMALL_STATE(13)] = 50,
927-
[SMALL_STATE(14)] = 57,
928-
[SMALL_STATE(15)] = 64,
892+
[SMALL_STATE(10)] = 22,
893+
[SMALL_STATE(11)] = 35,
894+
[SMALL_STATE(12)] = 42,
895+
[SMALL_STATE(13)] = 49,
896+
[SMALL_STATE(14)] = 56,
897+
[SMALL_STATE(15)] = 63,
929898
};
930899

931900
static const TSParseActionEntry ts_parse_actions[] = {

tree-sitter-comment.wasm

344 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)