Skip to content

Commit a42121d

Browse files
authored
Use binary search to speed up matching keywords (#191)
1 parent af54eb0 commit a42121d

File tree

3 files changed

+27
-18
lines changed

3 files changed

+27
-18
lines changed

src/dialect/keywords.rs

+15-14
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@ macro_rules! define_keywords {
4949
}
5050
}
5151

52+
// The following keywords should be sorted to be able to match using binary search
5253
define_keywords!(
5354
ABS,
5455
ACTION,
5556
ADD,
56-
ASC,
5757
ALL,
5858
ALLOCATE,
5959
ALTER,
@@ -65,6 +65,7 @@ define_keywords!(
6565
ARRAY_AGG,
6666
ARRAY_MAX_CARDINALITY,
6767
AS,
68+
ASC,
6869
ASENSITIVE,
6970
ASYMMETRIC,
7071
AT,
@@ -93,9 +94,9 @@ define_keywords!(
9394
CEILING,
9495
CHAIN,
9596
CHAR,
96-
CHAR_LENGTH,
9797
CHARACTER,
9898
CHARACTER_LENGTH,
99+
CHAR_LENGTH,
99100
CHECK,
100101
CLOB,
101102
CLOSE,
@@ -158,6 +159,7 @@ define_keywords!(
158159
ELEMENT,
159160
ELSE,
160161
END,
162+
END_EXEC = "END-EXEC",
161163
END_FRAME,
162164
END_PARTITION,
163165
EQUALS,
@@ -175,8 +177,8 @@ define_keywords!(
175177
FALSE,
176178
FETCH,
177179
FIELDS,
178-
FIRST,
179180
FILTER,
181+
FIRST,
180182
FIRST_VALUE,
181183
FLOAT,
182184
FLOOR,
@@ -255,8 +257,8 @@ define_keywords!(
255257
NATURAL,
256258
NCHAR,
257259
NCLOB,
258-
NEXT,
259260
NEW,
261+
NEXT,
260262
NO,
261263
NONE,
262264
NORMALIZE,
@@ -268,8 +270,8 @@ define_keywords!(
268270
NULLS,
269271
NUMERIC,
270272
OBJECT,
271-
OCTET_LENGTH,
272273
OCCURRENCES_REGEX,
274+
OCTET_LENGTH,
273275
OF,
274276
OFFSET,
275277
OLD,
@@ -285,12 +287,12 @@ define_keywords!(
285287
OVERLAPS,
286288
OVERLAY,
287289
PARAMETER,
288-
PARTITION,
289290
PARQUET,
291+
PARTITION,
290292
PERCENT,
291-
PERCENT_RANK,
292293
PERCENTILE_CONT,
293294
PERCENTILE_DISC,
295+
PERCENT_RANK,
294296
PERIOD,
295297
PORTION,
296298
POSITION,
@@ -332,8 +334,8 @@ define_keywords!(
332334
ROLLBACK,
333335
ROLLUP,
334336
ROW,
335-
ROW_NUMBER,
336337
ROWS,
338+
ROW_NUMBER,
337339
SAVEPOINT,
338340
SCHEMA,
339341
SCOPE,
@@ -390,10 +392,10 @@ define_keywords!(
390392
TRANSLATION,
391393
TREAT,
392394
TRIGGER,
393-
TRUNCATE,
394395
TRIM,
395396
TRIM_ARRAY,
396397
TRUE,
398+
TRUNCATE,
397399
UESCAPE,
398400
UNBOUNDED,
399401
UNCOMMITTED,
@@ -409,11 +411,11 @@ define_keywords!(
409411
VALUE,
410412
VALUES,
411413
VALUE_OF,
412-
VAR_POP,
413-
VAR_SAMP,
414414
VARBINARY,
415415
VARCHAR,
416416
VARYING,
417+
VAR_POP,
418+
VAR_SAMP,
417419
VERSIONING,
418420
VIEW,
419421
WHEN,
@@ -424,11 +426,10 @@ define_keywords!(
424426
WITH,
425427
WITHIN,
426428
WITHOUT,
427-
WRITE,
428429
WORK,
430+
WRITE,
429431
YEAR,
430-
ZONE,
431-
END_EXEC = "END-EXEC"
432+
ZONE
432433
);
433434

434435
/// These keywords can't be used as a table alias, so that `FROM table_name alias`

src/tokenizer.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,9 @@ impl Token {
143143
}
144144
pub fn make_word(word: &str, quote_style: Option<char>) -> Self {
145145
let word_uppercase = word.to_uppercase();
146-
//TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is
147-
// not fast but I want the simplicity for now while I experiment with pluggable
148-
// dialects
149-
let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str());
146+
//TODO: validate use of a hashset (e.g. FnvHashSet) compared to using binary search
147+
let is_keyword =
148+
quote_style == None && ALL_KEYWORDS.binary_search(&word_uppercase.as_str()).is_ok();
150149
Token::Word(Word {
151150
value: word.to_string(),
152151
quote_style,

tests/sqlparser_common.rs

+9
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
use matches::assert_matches;
2222

2323
use sqlparser::ast::*;
24+
use sqlparser::dialect::keywords::ALL_KEYWORDS;
2425
use sqlparser::parser::*;
2526
use sqlparser::test_utils::{all_dialects, expr_from_projection, number, only};
2627

@@ -2851,6 +2852,14 @@ fn parse_drop_index() {
28512852
}
28522853
}
28532854

2855+
#[test]
2856+
fn keywords_sorted() {
2857+
// assert!(ALL_KEYWORDS.is_sorted())
2858+
let mut copy = Vec::from(ALL_KEYWORDS);
2859+
copy.sort();
2860+
assert!(copy == ALL_KEYWORDS)
2861+
}
2862+
28542863
fn parse_sql_statements(sql: &str) -> Result<Vec<Statement>, ParserError> {
28552864
all_dialects().parse_sql_statements(sql)
28562865
}

0 commit comments

Comments
 (0)