1
+ #include " toylang/lexical.h"
2
+
3
+ #include " gtest/gtest.h"
4
+
5
+ TEST (LexiconTest, Basic) {
6
+ toylang::Scanner scanner;
7
+
8
+ auto abc = toylang::Lexicon::Builder{}
9
+ .DefineToken (" ABC" , toylang::regex::Compile (" abc" ))
10
+ .Build ();
11
+
12
+ scanner.SetLexicon (abc);
13
+ scanner.SetSource (toylang::Source::Create (" abcabcac" ));
14
+ EXPECT_EQ (scanner.NextToken ().id , abc->IdOfToken (" ABC" ));
15
+ EXPECT_EQ (scanner.NextToken ().id , abc->IdOfToken (" ABC" ));
16
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kError );
17
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kEOF );
18
+
19
+ auto abc_asd = toylang::Lexicon::Builder{}
20
+ .DefineToken (" ABC" , toylang::regex::Compile (" abc" ))
21
+ .DefineToken (" ASD" , toylang::regex::Compile (" asd" ))
22
+ .Build ();
23
+
24
+ scanner.SetLexicon (abc_asd);
25
+ scanner.SetSource (toylang::Source::Create (" abcasdas" ));
26
+ EXPECT_EQ (scanner.NextToken ().id , abc_asd->IdOfToken (" ABC" ));
27
+ EXPECT_EQ (scanner.NextToken ().id , abc_asd->IdOfToken (" ASD" ));
28
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kError );
29
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kEOF );
30
+
31
+ auto ab_or_cd_asd = toylang::Lexicon::Builder{}
32
+ .DefineToken (" ABCD" , toylang::regex::Compile (" ab|cd" ))
33
+ .DefineToken (" ASD" , toylang::regex::Compile (" asd" ))
34
+ .Build ();
35
+
36
+ scanner.SetLexicon (ab_or_cd_asd);
37
+ scanner.SetSource (toylang::Source::Create (" abasdcd" ));
38
+ EXPECT_EQ (scanner.NextToken ().id , ab_or_cd_asd->IdOfToken (" ABCD" ));
39
+ EXPECT_EQ (scanner.NextToken ().id , ab_or_cd_asd->IdOfToken (" ASD" ));
40
+ EXPECT_EQ (scanner.NextToken ().id , ab_or_cd_asd->IdOfToken (" ABCD" ));
41
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kEOF );
42
+
43
+ auto abcdefg =
44
+ toylang::Lexicon::Builder{}
45
+ .DefineToken (" ABCDEFG" , toylang::regex::Compile (" a(bc+|de?)f*g" ))
46
+ .DefineToken (" SPACE" , toylang::regex::Compile (" \\ s+" ))
47
+ .Build ();
48
+
49
+ scanner.SetLexicon (abcdefg);
50
+ scanner.SetSource (toylang::Source::Create (" abcg abccg abcfg adfg adefffg" ));
51
+ EXPECT_EQ (scanner.NextToken ().id , abcdefg->IdOfToken (" ABCDEFG" ));
52
+ EXPECT_EQ (scanner.NextToken ().id , abcdefg->IdOfToken (" SPACE" ));
53
+ EXPECT_EQ (scanner.NextToken ().id , abcdefg->IdOfToken (" ABCDEFG" ));
54
+ EXPECT_EQ (scanner.NextToken ().id , abcdefg->IdOfToken (" SPACE" ));
55
+ EXPECT_EQ (scanner.NextToken ().id , abcdefg->IdOfToken (" ABCDEFG" ));
56
+ EXPECT_EQ (scanner.NextToken ().id , abcdefg->IdOfToken (" SPACE" ));
57
+ EXPECT_EQ (scanner.NextToken ().id , abcdefg->IdOfToken (" ABCDEFG" ));
58
+ EXPECT_EQ (scanner.NextToken ().id , abcdefg->IdOfToken (" SPACE" ));
59
+ EXPECT_EQ (scanner.NextToken ().id , abcdefg->IdOfToken (" ABCDEFG" ));
60
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kEOF );
61
+ }
62
+
63
+ TEST (LexiconTest, Range) {
64
+ toylang::Scanner scanner;
65
+
66
+ auto range =
67
+ toylang::Lexicon::Builder{}
68
+ .DefineToken (" RANGE" , toylang::regex::Compile (" ([a-z]|\\ d)+" ))
69
+ .DefineToken (" SPACE" , toylang::regex::Compile (" \\ s+" ))
70
+ .Build ();
71
+
72
+ scanner.SetLexicon (range);
73
+ scanner.SetSource (toylang::Source::Create (" abc defg hijklmnopqrs" ));
74
+ EXPECT_EQ (scanner.NextToken ().id , range->IdOfToken (" RANGE" ));
75
+ EXPECT_EQ (scanner.NextToken ().id , range->IdOfToken (" SPACE" ));
76
+ EXPECT_EQ (scanner.NextToken ().id , range->IdOfToken (" RANGE" ));
77
+ EXPECT_EQ (scanner.NextToken ().id , range->IdOfToken (" SPACE" ));
78
+ EXPECT_EQ (scanner.NextToken ().id , range->IdOfToken (" RANGE" ));
79
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kEOF );
80
+ }
81
+
82
+ TEST (LexiconTest, Class) {
83
+ toylang::Scanner scanner;
84
+
85
+ auto class_ = toylang::Lexicon::Builder{}
86
+ .DefineToken (" ID" , toylang::regex::Compile (" \\ w+" ))
87
+ .DefineToken (" SPACE" , toylang::regex::Compile (" \\ s+" ))
88
+ .Build ();
89
+
90
+ scanner.SetLexicon (class_);
91
+ scanner.SetSource (toylang::Source::Create (" abc df2a 3f" ));
92
+ EXPECT_EQ (scanner.NextToken ().id , class_->IdOfToken (" ID" ));
93
+ EXPECT_EQ (scanner.NextToken ().id , class_->IdOfToken (" SPACE" ));
94
+ EXPECT_EQ (scanner.NextToken ().id , class_->IdOfToken (" ID" ));
95
+ EXPECT_EQ (scanner.NextToken ().id , class_->IdOfToken (" SPACE" ));
96
+ EXPECT_EQ (scanner.NextToken ().id , class_->IdOfToken (" ID" ));
97
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kEOF );
98
+ }
99
+
100
+ TEST (LexiconTest, Numbre) {
101
+ toylang::Scanner scanner;
102
+
103
+ auto number =
104
+ toylang::Lexicon::Builder{}
105
+ .DefineToken (" NUMBER" ,
106
+ toylang::regex::Compile (
107
+ " [+-]?(0|[1-9]\\ d*)(\\ .\\ d+)?([eE][+-]?[0-9]+)?" ))
108
+ .DefineToken (
109
+ " BAD_NUMBER" ,
110
+ toylang::regex::Compile (
111
+ " [+-]?\\ d+(\\ .\\ d+)?([eE][+-]?\\ d+)?(\\ l|\\ u|_)+" ))
112
+ .DefineToken (" SPACE" , toylang::regex::Compile (" \\ s+" ))
113
+ .Build ();
114
+
115
+ scanner.SetLexicon (number);
116
+ scanner.SetSource (
117
+ toylang::Source::Create (" 123 "
118
+ " 123.456 "
119
+ " 0.456 "
120
+ " +123.456e+789 "
121
+ " -123.456e-789 "
122
+ " 123.456e789 "
123
+ " 123.456e "
124
+ " 123a "
125
+ " 0. "
126
+ " 023 "
127
+ " 123.456e+ "
128
+ " 123.456e- " ));
129
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" NUMBER" ));
130
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" SPACE" ));
131
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" NUMBER" ));
132
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" SPACE" ));
133
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" NUMBER" ));
134
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" SPACE" ));
135
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" NUMBER" ));
136
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" SPACE" ));
137
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" NUMBER" ));
138
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" SPACE" ));
139
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" NUMBER" ));
140
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" SPACE" ));
141
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" BAD_NUMBER" ));
142
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" SPACE" ));
143
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" BAD_NUMBER" ));
144
+ EXPECT_EQ (scanner.NextToken ().id , number->IdOfToken (" SPACE" ));
145
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kError );
146
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kError );
147
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kError );
148
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kError );
149
+ }
150
+
151
+ TEST (LexiconTest, Comment) {
152
+ toylang::Scanner scanner;
153
+
154
+ auto comment = toylang::Lexicon::Builder{}
155
+ .DefineToken (" COMMENT_LINE" , toylang::regex::Compile (" \\ /\\ /[^\\ n]*\\ n" ))
156
+ .DefineToken (" COMMENT_BLOCK" , toylang::regex::Compile (" \\ /\\ *([^\\ /]|[^\\ *]\\ /)*\\ *\\ /" ))
157
+ .DefineToken (" SPACE" , toylang::regex::Compile (" \\ s+" ))
158
+ .Build ();
159
+
160
+ scanner.SetLexicon (comment);
161
+ scanner.SetSource (toylang::Source::Create (" /* comment */ // comment支持中文\n "
162
+ " /** document comment \n "
163
+ " * 支持中文 \n "
164
+ " */\n " ));
165
+
166
+ EXPECT_EQ (scanner.NextToken ().id , comment->IdOfToken (" COMMENT_BLOCK" ));
167
+ EXPECT_EQ (scanner.NextToken ().id , comment->IdOfToken (" SPACE" ));
168
+ EXPECT_EQ (scanner.NextToken ().id , comment->IdOfToken (" COMMENT_LINE" ));
169
+ EXPECT_EQ (scanner.NextToken ().id , comment->IdOfToken (" SPACE" ));
170
+ EXPECT_EQ (scanner.NextToken ().id , comment->IdOfToken (" COMMENT_BLOCK" ));
171
+ EXPECT_EQ (scanner.NextToken ().id , comment->IdOfToken (" SPACE" ));
172
+ EXPECT_EQ (scanner.NextToken ().id , toylang::Token::kEOF );
173
+ }
0 commit comments