3
3
use PHPUnit \Framework \TestCase ;
4
4
5
5
class WP_MySQL_Lexer_Tests extends TestCase {
6
+ public function test_tokenize_valid_input (): void {
7
+ $ lexer = new WP_MySQL_Lexer ( 'SELECT id FROM users ' );
8
+
9
+ // SELECT
10
+ $ this ->assertTrue ( $ lexer ->next_token () );
11
+ $ this ->assertSame ( WP_MySQL_Lexer::SELECT_SYMBOL , $ lexer ->get_token ()->get_type () );
12
+
13
+ // id
14
+ $ this ->assertTrue ( $ lexer ->next_token () );
15
+ $ this ->assertSame ( WP_MySQL_Lexer::IDENTIFIER , $ lexer ->get_token ()->get_type () );
16
+
17
+ // FROM
18
+ $ this ->assertTrue ( $ lexer ->next_token () );
19
+ $ this ->assertSame ( WP_MySQL_Lexer::FROM_SYMBOL , $ lexer ->get_token ()->get_type () );
20
+
21
+ // users
22
+ $ this ->assertTrue ( $ lexer ->next_token () );
23
+ $ this ->assertSame ( WP_MySQL_Lexer::IDENTIFIER , $ lexer ->get_token ()->get_type () );
24
+
25
+ // EOF
26
+ $ this ->assertTrue ( $ lexer ->next_token () );
27
+ $ this ->assertSame ( WP_MySQL_Lexer::EOF , $ lexer ->get_token ()->get_type () );
28
+
29
+ // No more tokens.
30
+ $ this ->assertFalse ( $ lexer ->next_token () );
31
+ $ this ->assertNull ( $ lexer ->get_token () );
32
+
33
+ // Again, no more tokens.
34
+ $ this ->assertFalse ( $ lexer ->next_token () );
35
+ $ this ->assertNull ( $ lexer ->get_token () );
36
+ }
37
+
38
+ public function test_tokenize_invalid_input (): void {
39
+ $ lexer = new WP_MySQL_Lexer ( "SELECT x'ab01xyz' " );
40
+
41
+ // SELECT
42
+ $ this ->assertTrue ( $ lexer ->next_token () );
43
+ $ this ->assertSame ( WP_MySQL_Lexer::SELECT_SYMBOL , $ lexer ->get_token ()->get_type () );
44
+
45
+ // Invalid input.
46
+ $ this ->assertFalse ( $ lexer ->next_token () );
47
+ $ this ->assertNull ( $ lexer ->get_token () );
48
+
49
+ // No more tokens.
50
+ $ this ->assertFalse ( $ lexer ->next_token () );
51
+ $ this ->assertNull ( $ lexer ->get_token () );
52
+
53
+ // Again, no more tokens.
54
+ $ this ->assertFalse ( $ lexer ->next_token () );
55
+ $ this ->assertNull ( $ lexer ->get_token () );
56
+ }
57
+
6
58
/**
7
59
* Test that the whole U+0080 to U+FFFF UTF-8 range is valid in an identifier.
8
60
* The validity is checked against PCRE with the "u" (PCRE_UTF8) modifier set.
9
61
*/
10
62
public function test_identifier_utf8_range (): void {
11
63
for ( $ i = 0x80 ; $ i < 0xffff ; $ i += 1 ) {
12
- $ value = mb_chr ( $ i , 'UTF-8 ' );
13
- $ lexer = new WP_MySQL_Lexer ( $ value );
14
- $ type = $ lexer ->next_token ()->get_type ();
64
+ $ value = mb_chr ( $ i , 'UTF-8 ' );
65
+
66
+ $ lexer = new WP_MySQL_Lexer ( $ value );
67
+ $ this ->assertTrue ( $ lexer ->next_token () );
68
+
69
+ $ type = $ lexer ->get_token ()->get_type ();
15
70
$ is_valid = preg_match ( '/^[\x{0080}-\x{ffff}]$/u ' , $ value );
16
71
if ( $ is_valid ) {
17
72
$ this ->assertSame ( WP_MySQL_Lexer::IDENTIFIER , $ type );
18
- } elseif ( strlen ( $ value ) === 0 ) {
19
- $ this ->assertSame ( WP_MySQL_Lexer::EOF , $ type );
20
73
} else {
21
- $ this ->assertSame ( WP_MySQL_Lexer::INVALID_INPUT , $ type );
74
+ $ this ->assertSame ( WP_MySQL_Lexer::EOF , $ type );
22
75
}
23
76
}
24
77
}
@@ -33,14 +86,19 @@ public function test_identifier_utf8_range(): void {
33
86
public function test_identifier_utf8_two_byte_sequences (): void {
34
87
for ( $ byte_1 = 128 ; $ byte_1 <= 255 ; $ byte_1 += 1 ) {
35
88
for ( $ byte_2 = 128 ; $ byte_2 <= 255 ; $ byte_2 += 1 ) {
36
- $ value = chr ( $ byte_1 ) . chr ( $ byte_2 );
89
+ $ value = chr ( $ byte_1 ) . chr ( $ byte_2 );
90
+
91
+ $ lexer = new WP_MySQL_Lexer ( $ value );
92
+ $ result = $ lexer ->next_token ();
93
+ $ token = $ lexer ->get_token ();
94
+
37
95
$ is_valid = preg_match ( '/^[\x{0080}-\x{ffff}]$/u ' , $ value );
38
- $ lexer = new WP_MySQL_Lexer ( $ value );
39
- $ type = $ lexer ->next_token ()->get_type ();
40
96
if ( $ is_valid ) {
41
- $ this ->assertSame ( WP_MySQL_Lexer::IDENTIFIER , $ type );
97
+ $ this ->assertTrue ( $ result );
98
+ $ this ->assertSame ( WP_MySQL_Lexer::IDENTIFIER , $ token ->get_type () );
42
99
} else {
43
- $ this ->assertSame ( WP_MySQL_Lexer::INVALID_INPUT , $ type );
100
+ $ this ->assertFalse ( $ result );
101
+ $ this ->assertNull ( $ token );
44
102
}
45
103
}
46
104
}
@@ -58,14 +116,19 @@ public function test_identifier_utf8_three_byte_sequences(): void {
58
116
for ( $ byte_1 = 0xE0 ; $ byte_1 <= 0xFF ; $ byte_1 += 1 ) {
59
117
for ( $ byte_2 = 128 ; $ byte_2 <= 255 ; $ byte_2 += 1 ) {
60
118
for ( $ byte_3 = 128 ; $ byte_3 <= 255 ; $ byte_3 += 1 ) {
61
- $ value = chr ( $ byte_1 ) . chr ( $ byte_2 ) . chr ( $ byte_3 );
119
+ $ value = chr ( $ byte_1 ) . chr ( $ byte_2 ) . chr ( $ byte_3 );
120
+
121
+ $ lexer = new WP_MySQL_Lexer ( $ value );
122
+ $ result = $ lexer ->next_token ();
123
+ $ token = $ lexer ->get_token ();
124
+
62
125
$ is_valid = preg_match ( '/^[\x{0080}-\x{ffff}]$/u ' , $ value );
63
- $ lexer = new WP_MySQL_Lexer ( $ value );
64
- $ type = $ lexer ->next_token ()->get_type ();
65
126
if ( $ is_valid ) {
66
- $ this ->assertSame ( WP_MySQL_Lexer::IDENTIFIER , $ type );
127
+ $ this ->assertTrue ( $ result );
128
+ $ this ->assertSame ( WP_MySQL_Lexer::IDENTIFIER , $ token ->get_type () );
67
129
} else {
68
- $ this ->assertSame ( WP_MySQL_Lexer::INVALID_INPUT , $ type );
130
+ $ this ->assertFalse ( $ result );
131
+ $ this ->assertNull ( $ token );
69
132
}
70
133
}
71
134
}
@@ -77,8 +140,8 @@ public function test_identifier_utf8_three_byte_sequences(): void {
77
140
*/
78
141
public function test_integer_types ( $ input , $ expected ): void {
79
142
$ lexer = new WP_MySQL_Lexer ( $ input );
80
- $ type = $ lexer ->next_token ()-> get_type ( );
81
- $ this ->assertSame ( $ expected , $ type );
143
+ $ this -> assertTrue ( $ lexer ->next_token () );
144
+ $ this ->assertSame ( $ expected , $ lexer -> get_token ()-> get_type () );
82
145
}
83
146
84
147
public function data_integer_types (): array {
@@ -145,20 +208,20 @@ public function data_identifier_or_number(): array {
145
208
array ( '0b01xyz ' , array ( WP_MySQL_Lexer::IDENTIFIER , WP_MySQL_Lexer::EOF ) ), // identifier
146
209
array ( '0b ' , array ( WP_MySQL_Lexer::IDENTIFIER , WP_MySQL_Lexer::EOF ) ), // identifier
147
210
array ( "b'01' " , array ( WP_MySQL_Lexer::BIN_NUMBER , WP_MySQL_Lexer::EOF ) ),
148
- array ( "b'01xyz' " , array ( WP_MySQL_Lexer:: INVALID_INPUT , WP_MySQL_Lexer:: IDENTIFIER , WP_MySQL_Lexer:: INVALID_INPUT , WP_MySQL_Lexer:: EOF ) ),
211
+ array ( "b'01xyz' " , array () ), // invalid input
149
212
array ( "b'' " , array ( WP_MySQL_Lexer::BIN_NUMBER , WP_MySQL_Lexer::EOF ) ),
150
- array ( "b' " , array ( WP_MySQL_Lexer:: INVALID_INPUT , WP_MySQL_Lexer:: EOF ) ),
151
- array ( "b'01 " , array ( WP_MySQL_Lexer:: INVALID_INPUT , WP_MySQL_Lexer:: EOF ) ),
213
+ array ( "b' " , array () ), // invalid input
214
+ array ( "b'01 " , array () ), // invalid input
152
215
153
216
// hex
154
217
array ( '0xab01 ' , array ( WP_MySQL_Lexer::HEX_NUMBER , WP_MySQL_Lexer::EOF ) ),
155
218
array ( '0xab01xyz ' , array ( WP_MySQL_Lexer::IDENTIFIER , WP_MySQL_Lexer::EOF ) ), // identifier
156
219
array ( '0x ' , array ( WP_MySQL_Lexer::IDENTIFIER , WP_MySQL_Lexer::EOF ) ), // identifier
157
220
array ( "x'ab01' " , array ( WP_MySQL_Lexer::HEX_NUMBER , WP_MySQL_Lexer::EOF ) ),
158
- array ( "x'ab01xyz' " , array ( WP_MySQL_Lexer:: INVALID_INPUT , WP_MySQL_Lexer:: IDENTIFIER , WP_MySQL_Lexer:: INVALID_INPUT , WP_MySQL_Lexer:: EOF ) ),
221
+ array ( "x'ab01xyz' " , array () ), // invalid input
159
222
array ( "x'' " , array ( WP_MySQL_Lexer::HEX_NUMBER , WP_MySQL_Lexer::EOF ) ),
160
- array ( "x' " , array ( WP_MySQL_Lexer:: INVALID_INPUT , WP_MySQL_Lexer:: EOF ) ),
161
- array ( "x'ab " , array ( WP_MySQL_Lexer:: INVALID_INPUT , WP_MySQL_Lexer:: EOF ) ),
223
+ array ( "x' " , array () ), // invalid input
224
+ array ( "x'ab " , array () ), // invalid input
162
225
163
226
// decimal
164
227
array ( '123.456 ' , array ( WP_MySQL_Lexer::DECIMAL_NUMBER , WP_MySQL_Lexer::EOF ) ),
0 commit comments