Skip to content

Commit 55fc66f

Browse files
committed
Initial unicode character support for identifiers
Summary: Test Plan: Added a test Reviewers: Subscribers: Tasks: Tags:
1 parent 19f8399 commit 55fc66f

File tree

7 files changed

+25370
-3
lines changed

7 files changed

+25370
-3
lines changed

parser/cpp/prepare-javacc-grammar.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ pwd
33
GRAMMAR_DIR='../grammar'
44
GEN_DIR='target/generated-sources/javacc'
55
mkdir -p $GEN_DIR
6-
cat ./javacc-options.txt $GRAMMAR_DIR/nonreservedwords.txt $GRAMMAR_DIR/reservedwords.txt $GRAMMAR_DIR/sql-spec.txt $GRAMMAR_DIR/presto-extensions.txt $GRAMMAR_DIR/lexical-elements.txt > $GEN_DIR/parser_tmp.jjt
6+
cat ./javacc-options.txt $GRAMMAR_DIR/kw.txt $GRAMMAR_DIR/sql-spec.txt $GRAMMAR_DIR/presto-extensions.txt $GRAMMAR_DIR/nonreservedwords.txt $GRAMMAR_DIR/unicode-identifiers.txt $GRAMMAR_DIR/lexical-elements.txt > $GEN_DIR/parser_tmp.jjt

parser/grammar/compact_char_sets.awk

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
BEGIN { cnt = 1; }
2+
/^\/\// { next; }
3+
/\"\-\"\\u/ {
4+
gsub("\" .*$", "\"");
5+
gsub("^[^\"]*", "");
6+
split($0, a, "-");
7+
gsub("\\\\u", "", a[1]);
8+
gsub("\"", "", a[1]);
9+
s = strtonum("0x" tolower(a[1]));
10+
key[cnt] = s;
11+
arr[s] = $0;
12+
cnt++;
13+
next;
14+
}
15+
/\"\\u/ {
16+
gsub("\" .*$", ""); gsub("^.*\\u", "", $0);
17+
arr[strtonum("0x" tolower($0))] = "\"\\u" $0 "\"";
18+
key[cnt] = strtonum("0x" tolower($0));
19+
cnt++;
20+
next;
21+
}
22+
/}/ {
23+
print("TOKEN: {");
24+
print(" <#UnicodeIdentifierStart: [");
25+
asort(key);
26+
i = 1;
27+
do
28+
{
29+
if (i > 1) printf(",");
30+
printf(arr[key[i]]);
31+
start = i;
32+
33+
while (key[i] + 1 == key[i + 1] && match(arr[key[i]], "-") == 0) { i++; }
34+
if (start != i) { print("-" arr[key[i]]); } else { print ""; }
35+
i++;
36+
} while (i < length(key));
37+
print("]> }");
38+
}

parser/grammar/lexical-elements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ regular_identifier()
8080

8181
| <#identifier_part: <identifier_start> | <identifier_extend> >
8282

83-
| <#identifier_start: ["a"-"z"] // temp
83+
| <#identifier_start: (<UnicodeIdentifierStart>)
8484
/*!! See the Syntax Rules.*/
8585
>
8686

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# Concatenate all the fragments into a .jj file.
22
gendir='../target/generated-sources/javacc'
33
mkdir -p $gendir
4-
cat javacc-options-java.txt nonreservedwords.txt reservedwords.txt sql-spec.txt presto-extensions.txt lexical-elements.txt > $gendir/parser_tmp.jjt
4+
awk -f ./compact_char_sets.awk < unicode-identifiers.txt > $gendir/compact-unicode-identifiers.txt
5+
#cp unicode-identifiers.txt $gendir/comcpat-unicode-identifiers.txt
6+
cat javacc-options-java.txt kw.txt sql-spec.txt presto-extensions.txt nonreservedwords.txt $gendir/comcpat-unicode-identifiers.txt lexical-elements.txt > $gendir/parser_tmp.jjt

parser/grammar/unicode-identifier-extend.txt

Lines changed: 3316 additions & 0 deletions
Large diffs are not rendered by default.

parser/grammar/unicode-identifier-start.txt

Lines changed: 22010 additions & 0 deletions
Large diffs are not rendered by default.

parser/src/test/java/com/facebook/coresql/parser/TestSqlParser.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public class TestSqlParser
5252
"SELECT f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f(f())))))))))))))))))))))))))))));",
5353
"SELECT abs, 2 as abs;",
5454
"SELECT sqrt(x), power(y, 5), myFunction('a') FROM T;",
55+
"SELECT 1 ఒకట;"
5556
};
5657

5758
private AstNode parse(String sql)

0 commit comments

Comments
 (0)