Skip to content

Commit 62943d6

Browse files
committed
Add a base generic WP_Parser_Token class, add docs
1 parent c8ea855 commit 62943d6

11 files changed

+95
-54
lines changed

tests/bootstrap.php

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
<?php
22

33
require_once __DIR__ . '/wp-sqlite-schema.php';
4-
require_once __DIR__ . '/../wp-includes/mysql/class-wp-mysql-token.php';
5-
require_once __DIR__ . '/../wp-includes/mysql/class-wp-mysql-lexer.php';
64
require_once __DIR__ . '/../wp-includes/parser/class-wp-parser-grammar.php';
75
require_once __DIR__ . '/../wp-includes/parser/class-wp-parser.php';
86
require_once __DIR__ . '/../wp-includes/parser/class-wp-parser-node.php';
7+
require_once __DIR__ . '/../wp-includes/parser/class-wp-parser-token.php';
8+
require_once __DIR__ . '/../wp-includes/mysql/class-wp-mysql-token.php';
9+
require_once __DIR__ . '/../wp-includes/mysql/class-wp-mysql-lexer.php';
910
require_once __DIR__ . '/../wp-includes/mysql/class-wp-mysql-parser.php';
1011
require_once __DIR__ . '/../wp-includes/sqlite/class-wp-sqlite-query-rewriter.php';
1112
require_once __DIR__ . '/../wp-includes/sqlite/class-wp-sqlite-lexer.php';

tests/mysql/WP_MySQL_Lexer_Tests.php

+11-11
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,23 @@ public function test_tokenize_valid_input(): void {
88

99
// SELECT
1010
$this->assertTrue( $lexer->next_token() );
11-
$this->assertSame( WP_MySQL_Lexer::SELECT_SYMBOL, $lexer->get_token()->get_type() );
11+
$this->assertSame( WP_MySQL_Lexer::SELECT_SYMBOL, $lexer->get_token()->id );
1212

1313
// id
1414
$this->assertTrue( $lexer->next_token() );
15-
$this->assertSame( WP_MySQL_Lexer::IDENTIFIER, $lexer->get_token()->get_type() );
15+
$this->assertSame( WP_MySQL_Lexer::IDENTIFIER, $lexer->get_token()->id );
1616

1717
// FROM
1818
$this->assertTrue( $lexer->next_token() );
19-
$this->assertSame( WP_MySQL_Lexer::FROM_SYMBOL, $lexer->get_token()->get_type() );
19+
$this->assertSame( WP_MySQL_Lexer::FROM_SYMBOL, $lexer->get_token()->id );
2020

2121
// users
2222
$this->assertTrue( $lexer->next_token() );
23-
$this->assertSame( WP_MySQL_Lexer::IDENTIFIER, $lexer->get_token()->get_type() );
23+
$this->assertSame( WP_MySQL_Lexer::IDENTIFIER, $lexer->get_token()->id );
2424

2525
// EOF
2626
$this->assertTrue( $lexer->next_token() );
27-
$this->assertSame( WP_MySQL_Lexer::EOF, $lexer->get_token()->get_type() );
27+
$this->assertSame( WP_MySQL_Lexer::EOF, $lexer->get_token()->id );
2828

2929
// No more tokens.
3030
$this->assertFalse( $lexer->next_token() );
@@ -40,7 +40,7 @@ public function test_tokenize_invalid_input(): void {
4040

4141
// SELECT
4242
$this->assertTrue( $lexer->next_token() );
43-
$this->assertSame( WP_MySQL_Lexer::SELECT_SYMBOL, $lexer->get_token()->get_type() );
43+
$this->assertSame( WP_MySQL_Lexer::SELECT_SYMBOL, $lexer->get_token()->id );
4444

4545
// Invalid input.
4646
$this->assertFalse( $lexer->next_token() );
@@ -66,7 +66,7 @@ public function test_identifier_utf8_range(): void {
6666
$lexer = new WP_MySQL_Lexer( $value );
6767
$this->assertTrue( $lexer->next_token() );
6868

69-
$type = $lexer->get_token()->get_type();
69+
$type = $lexer->get_token()->id;
7070
$is_valid = preg_match( '/^[\x{0080}-\x{ffff}]$/u', $value );
7171
if ( $is_valid ) {
7272
$this->assertSame( WP_MySQL_Lexer::IDENTIFIER, $type );
@@ -95,7 +95,7 @@ public function test_identifier_utf8_two_byte_sequences(): void {
9595
$is_valid = preg_match( '/^[\x{0080}-\x{ffff}]$/u', $value );
9696
if ( $is_valid ) {
9797
$this->assertTrue( $result );
98-
$this->assertSame( WP_MySQL_Lexer::IDENTIFIER, $token->get_type() );
98+
$this->assertSame( WP_MySQL_Lexer::IDENTIFIER, $token->id );
9999
} else {
100100
$this->assertFalse( $result );
101101
$this->assertNull( $token );
@@ -125,7 +125,7 @@ public function test_identifier_utf8_three_byte_sequences(): void {
125125
$is_valid = preg_match( '/^[\x{0080}-\x{ffff}]$/u', $value );
126126
if ( $is_valid ) {
127127
$this->assertTrue( $result );
128-
$this->assertSame( WP_MySQL_Lexer::IDENTIFIER, $token->get_type() );
128+
$this->assertSame( WP_MySQL_Lexer::IDENTIFIER, $token->id );
129129
} else {
130130
$this->assertFalse( $result );
131131
$this->assertNull( $token );
@@ -141,7 +141,7 @@ public function test_identifier_utf8_three_byte_sequences(): void {
141141
public function test_integer_types( $input, $expected ): void {
142142
$lexer = new WP_MySQL_Lexer( $input );
143143
$this->assertTrue( $lexer->next_token() );
144-
$this->assertSame( $expected, $lexer->get_token()->get_type() );
144+
$this->assertSame( $expected, $lexer->get_token()->id );
145145
}
146146

147147
public function data_integer_types(): array {
@@ -185,7 +185,7 @@ public function test_identifier_or_number( $input, $expected ): void {
185185
$lexer = new WP_MySQL_Lexer( $input );
186186
$actual = array_map(
187187
function ( $token ) {
188-
return $token->get_type();
188+
return $token->id;
189189
},
190190
$lexer->remaining_tokens()
191191
);

tests/tools/dump-ast.php

+3-2
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@ function ( $severity, $message, $file, $line ) {
1212
}
1313
);
1414

15-
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-token.php';
16-
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-lexer.php';
1715
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser.php';
1816
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-grammar.php';
1917
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-node.php';
18+
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-token.php';
19+
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-lexer.php';
2020
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-parser.php';
21+
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-token.php';
2122

2223
$grammar_data = include __DIR__ . '/../../wp-includes/mysql/mysql-grammar.php';
2324
$grammar = new WP_Parser_Grammar( $grammar_data );

tests/tools/dump-sqlite-query.php

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser.php';
44
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-grammar.php';
55
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-node.php';
6+
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-token.php';
67
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-lexer.php';
78
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-token.php';
89
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-parser.php';

tests/tools/run-lexer-benchmark.php

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ function ( $severity, $message, $file, $line ) {
1212
}
1313
);
1414

15+
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-token.php';
1516
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-token.php';
1617
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-lexer.php';
1718

tests/tools/run-parser-benchmark.php

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@ function ( $severity, $message, $file, $line ) {
1313
}
1414
);
1515

16-
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-token.php';
17-
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-lexer.php';
1816
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-grammar.php';
1917
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-node.php';
18+
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser-token.php';
2019
require_once __DIR__ . '/../../wp-includes/parser/class-wp-parser.php';
20+
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-token.php';
21+
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-lexer.php';
2122
require_once __DIR__ . '/../../wp-includes/mysql/class-wp-mysql-parser.php';
2223

2324
function getStats( $total, $failures, $exceptions ) {
+29-29
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,39 @@
11
<?php
22

33
/**
4-
* @TODO: Consider making this a generic WP_Parser_Token or similar.
5-
* We can also make WP_MySQL_Token extend the generic one.
6-
* @TODO: Document the class.
4+
* MySQL token.
5+
*
6+
* This class represents a MySQL SQL token that is produced by WP_MySQL_Lexer,
7+
* and consumed by WP_MySQL_Parser during the parsing process.
78
*/
8-
class WP_MySQL_Token {
9+
class WP_MySQL_Token extends WP_Parser_Token {
910
/**
10-
* @TODO: Review and document these properties and their visibility.
11+
* Get the name of the token.
12+
*
13+
* This method is intended to be used only for testing and debugging purposes,
14+
* when tokens need to be presented by their names in a human-readable form.
15+
* It should not be used in production code, as it's not performance-optimized.
16+
*
17+
* @return string The token name.
1118
*/
12-
public $type;
13-
public $text;
14-
15-
public function __construct( $type, $text ) {
16-
$this->type = $type;
17-
$this->text = $text;
18-
}
19-
20-
public function get_type() {
21-
return $this->type;
19+
public function get_name(): string {
20+
$name = WP_MySQL_Lexer::get_token_name( $this->id );
21+
if ( null === $name ) {
22+
$name = 'UNKNOWN';
23+
}
24+
return $name;
2225
}
2326

24-
public function get_text() {
25-
return $this->text;
26-
}
27-
28-
public function get_name() {
29-
return WP_MySQL_Lexer::get_token_name( $this->type );
30-
}
31-
32-
public function extract_value() {
33-
return $this->get_text();
34-
}
35-
36-
public function __toString() {
37-
return $this->text . '<' . $this->type . ',' . $this->get_name() . '>';
27+
/**
28+
* Get the token representation as a string.
29+
*
30+
* This method is intended to be used only for testing and debugging purposes,
31+
* when tokens need to be presented in a human-readable form. It should not
32+
* be used in production code, as it's not performance-optimized.
33+
*
34+
* @return string
35+
*/
36+
public function __toString(): string {
37+
return $this->value . '<' . $this->id . ',' . $this->get_name() . '>';
3838
}
3939
}

wp-includes/parser/class-wp-parser-node.php

+4-4
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,9 @@ public function has_child( $rule_name ) {
113113

114114
public function has_token( $token_id = null ) {
115115
foreach ( $this->children as $child ) {
116-
if ( $child instanceof WP_MySQL_Token && (
116+
if ( $child instanceof WP_Parser_Token && (
117117
null === $token_id ||
118-
$child->type === $token_id
118+
$child->id === $token_id
119119
) ) {
120120
return true;
121121
}
@@ -125,9 +125,9 @@ public function has_token( $token_id = null ) {
125125

126126
public function get_token( $token_id = null ) {
127127
foreach ( $this->children as $child ) {
128-
if ( $child instanceof WP_MySQL_Token && (
128+
if ( $child instanceof WP_Parser_Token && (
129129
null === $token_id ||
130-
$child->type === $token_id
130+
$child->id === $token_id
131131
) ) {
132132
return $child;
133133
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<?php
2+
3+
/**
4+
* A token, representing a leaf in the parse tree.
5+
*
6+
* This class represents a token that is consumed and recognized by WP_Parser.
7+
* In a parse tree, a token represent a leaf, that is, a node without children.
8+
* It is a simple generic container for a token ID and value, that can be used
9+
* as a base class and extended for specific use cases.
10+
*/
11+
class WP_Parser_Token {
12+
/**
13+
* Token ID represented as an integer constant.
14+
*
15+
* @var int $id
16+
*/
17+
public $id;
18+
19+
/**
20+
* Token value in its original raw form.
21+
*
22+
* @var string
23+
*/
24+
public $value;
25+
26+
/**
27+
* Constructor.
28+
*
29+
* @param int $id Token type.
30+
* @param string $value Token value.
31+
*/
32+
public function __construct( int $id, string $value ) {
33+
$this->id = $id;
34+
$this->value = $value;
35+
}
36+
}

wp-includes/parser/class-wp-parser.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ private function parse_recursive( $rule_id ) {
3737
return true;
3838
}
3939

40-
if ( $this->tokens[ $this->position ]->type === $rule_id ) {
40+
if ( $this->tokens[ $this->position ]->id === $rule_id ) {
4141
++$this->position;
4242
return $this->tokens[ $this->position - 1 ];
4343
}
@@ -52,7 +52,7 @@ private function parse_recursive( $rule_id ) {
5252
// Bale out from processing the current branch if none of its rules can
5353
// possibly match the current token.
5454
if ( isset( $this->grammar->lookahead_is_match_possible[ $rule_id ] ) ) {
55-
$token_id = $this->tokens[ $this->position ]->type;
55+
$token_id = $this->tokens[ $this->position ]->id;
5656
if (
5757
! isset( $this->grammar->lookahead_is_match_possible[ $rule_id ][ $token_id ] ) &&
5858
! isset( $this->grammar->lookahead_is_match_possible[ $rule_id ][ WP_Parser_Grammar::EMPTY_RULE_ID ] )
@@ -101,7 +101,7 @@ private function parse_recursive( $rule_id ) {
101101
// See: https://github.com/mysql/mysql-workbench/blob/8.0.38/library/parsers/grammars/MySQLParser.g4#L994
102102
// See: https://github.com/antlr/antlr4/issues/488
103103
$la = $this->tokens[ $this->position ] ?? null;
104-
if ( $la && 'selectStatement' === $rule_name && WP_MySQL_Lexer::INTO_SYMBOL === $la->type ) {
104+
if ( $la && 'selectStatement' === $rule_name && WP_MySQL_Lexer::INTO_SYMBOL === $la->id ) {
105105
$branch_matches = false;
106106
}
107107

wp-includes/sqlite-ast/class-wp-sqlite-driver-prototype.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ private function translate_query( $ast ) {
141141

142142
if ( $ast instanceof WP_MySQL_Token ) {
143143
$token = $ast;
144-
switch ( $token->type ) {
144+
switch ( $token->id ) {
145145
case WP_MySQL_Lexer::EOF:
146146
return new WP_SQLite_Expression( array() );
147147

0 commit comments

Comments
 (0)