Skip to content

Commit 68eb288

Browse files
committed
wip
1 parent 90f1f29 commit 68eb288

File tree

3 files changed

+206
-42
lines changed

3 files changed

+206
-42
lines changed

tests/WP_SQLite_Driver_Tests.php

+82-42
Original file line numberDiff line numberDiff line change
@@ -3033,13 +3033,13 @@ public function testTranslatesUtf8SELECT() {
30333033
$this->assertQuery( 'DELETE FROM _options' );
30343034
}
30353035

3036-
public function testTranslateLikeBinaryAndGlob() {
3036+
public function testTranslateLikeBinary() {
30373037
// Create a temporary table for testing
30383038
$this->assertQuery(
3039-
"CREATE TABLE _tmp_table (
3039+
"CREATE TABLE _tmp_table (
30403040
ID INTEGER PRIMARY KEY AUTO_INCREMENT NOT NULL,
3041-
name varchar(20) NOT NULL default ''
3042-
);"
3041+
name varchar(20)
3042+
)"
30433043
);
30443044

30453045
// Insert data into the table
@@ -3052,70 +3052,110 @@ public function testTranslateLikeBinaryAndGlob() {
30523052
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special%chars');" );
30533053
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special_chars');" );
30543054
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('special\\chars');" );
3055+
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('aste*risk');" );
3056+
$this->assertQuery( "INSERT INTO _tmp_table (name) VALUES ('question?mark');" );
30553057

3056-
// Test case-sensitive LIKE BINARY
3058+
// Test exact string
30573059
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'first'" );
30583060
$this->assertCount( 1, $result );
30593061
$this->assertEquals( 'first', $result[0]->name );
30603062

3061-
// Test case-sensitive LIKE BINARY with wildcard %
3063+
// Test exact string with no matches
3064+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'third'" );
3065+
$this->assertCount( 0, $result );
3066+
3067+
// Test mixed case
3068+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'First'" );
3069+
$this->assertCount( 0, $result );
3070+
3071+
// Test % wildcard
30623072
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f%'" );
30633073
$this->assertCount( 1, $result );
30643074
$this->assertEquals( 'first', $result[0]->name );
30653075

3066-
// Test case-sensitive LIKE BINARY with wildcard _
3076+
// Test % wildcard with no matches
3077+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'x%'" );
3078+
$this->assertCount( 0, $result );
3079+
3080+
// Test "%" character (not a wildcard)
3081+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\%chars'" );
3082+
$this->assertCount( 1, $result );
3083+
$this->assertEquals( 'special%chars', $result[0]->name );
3084+
3085+
// Test _ wildcard
30673086
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f_rst'" );
30683087
$this->assertCount( 1, $result );
30693088
$this->assertEquals( 'first', $result[0]->name );
30703089

3071-
// Test case-insensitive LIKE
3072-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE 'FIRST'" );
3073-
$this->assertCount( 2, $result ); // Should match both 'first' and 'FIRST'
3090+
// Test _ wildcard with no matches
3091+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'x_yz'" );
3092+
$this->assertCount( 0, $result );
30743093

3075-
// Test mixed case with LIKE BINARY
3076-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'First'" );
3077-
$this->assertCount( 0, $result );
3094+
// Test "_" character (not a wildcard)
3095+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\_chars'" );
3096+
$this->assertCount( 1, $result );
3097+
$this->assertEquals( 'special_chars', $result[0]->name );
30783098

3079-
// Test no matches with LIKE BINARY
3080-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'third'" );
3081-
$this->assertCount( 0, $result );
3099+
// Test escaping of "*"
3100+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'aste*risk'" );
3101+
$this->assertCount( 1, $result );
3102+
$this->assertEquals( 'aste*risk', $result[0]->name );
30823103

3083-
// Test GLOB equivalent for case-sensitive matching with wildcard
3084-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'f*'" );
3085-
$this->assertCount( 1, $result );
3086-
$this->assertEquals( 'first', $result[0]->name );
3104+
// Test escaping of "*" with no matches
3105+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f*'" );
3106+
$this->assertCount( 0, $result );
30873107

3088-
// Test GLOB with single character wildcard
3089-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'f?rst'" );
3090-
$this->assertCount( 1, $result );
3091-
$this->assertEquals( 'first', $result[0]->name );
3092-
3093-
// Test GLOB with no matches
3094-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'S*'" );
3095-
$this->assertCount( 0, $result );
3108+
// Test escaping of "?"
3109+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'question?mark'" );
3110+
$this->assertCount( 1, $result );
3111+
$this->assertEquals( 'question?mark', $result[0]->name );
30963112

3097-
// Test GLOB case sensitivity with LIKE and GLOB
3098-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'first';" );
3099-
$this->assertCount( 1, $result ); // Should only match 'first'
3113+
// Test escaping of "?" with no matches
3114+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'f?rst'" );
3115+
$this->assertCount( 0, $result );
31003116

3101-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name GLOB 'FIRST';" );
3102-
$this->assertCount( 1, $result ); // Should only match 'FIRST'
3117+
// Test escaping of character class
3118+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '[f]irst'" );
3119+
$this->assertCount( 0, $result );
31033120

3104-
// Test NULL comparison with LIKE BINARY
3105-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'first';" );
3106-
$this->assertCount( 1, $result );
3107-
$this->assertEquals( 'first', $result[0]->name );
3108-
3109-
$result = $this->assertQuery( 'SELECT * FROM _tmp_table WHERE name LIKE BINARY NULL;' );
3110-
$this->assertCount( 0, $result ); // NULL comparison should return no results
3121+
// Test NULL
3122+
$result = $this->assertQuery( 'SELECT * FROM _tmp_table WHERE name LIKE BINARY NULL' );
3123+
$this->assertCount( 0, $result );
31113124

31123125
// Test pattern with special characters using LIKE BINARY
3113-
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '%special%';" );
3126+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY '%special%'" );
31143127
$this->assertCount( 4, $result );
31153128
$this->assertEquals( '%special%', $result[0]->name );
31163129
$this->assertEquals( 'special%chars', $result[1]->name );
31173130
$this->assertEquals( 'special_chars', $result[2]->name );
3118-
$this->assertEquals( 'specialchars', $result[3]->name );
3131+
$this->assertEquals( 'special\chars', $result[3]->name );
3132+
3133+
// Test escaping - "\t" is a tab character
3134+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'firs\\t'" );
3135+
$this->assertCount( 0, $result );
3136+
3137+
// Test escaping - "\\t" is "t" (input resolves to "\t", which LIKE resolves to "t")
3138+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'firs\\\\t'" );
3139+
$this->assertCount( 1, $result );
3140+
$this->assertEquals( 'first', $result[0]->name );
3141+
3142+
// Test escaping - "\%" is a "%" literal
3143+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\%chars'" );
3144+
$this->assertCount( 1, $result );
3145+
$this->assertEquals( 'special%chars', $result[0]->name );
3146+
3147+
// Test escaping - "\\%" is also a "%" literal
3148+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\\\%chars'" );
3149+
$this->assertCount( 1, $result );
3150+
$this->assertEquals( 'special%chars', $result[0]->name );
3151+
3152+
// Test escaping - "\\\%" is "\" and a wildcard
3153+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE BINARY 'special\\\\\\%chars'" );
3154+
$this->assertCount( 0, $result );
3155+
3156+
// Test LIKE without BINARY
3157+
$result = $this->assertQuery( "SELECT * FROM _tmp_table WHERE name LIKE 'FIRST'" );
3158+
$this->assertCount( 2, $result ); // Should match both 'first' and 'FIRST'
31193159
}
31203160

31213161
public function testOnConflictReplace() {

wp-includes/sqlite-ast/class-wp-sqlite-driver.php

+52
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,14 @@ private function translate( $ast ) {
13131313
throw $this->not_supported_exception(
13141314
sprintf( 'data type: %s', $child->value )
13151315
);
1316+
case 'predicateOperations':
1317+
$token = $ast->get_child_token();
1318+
if ( WP_MySQL_Lexer::LIKE_SYMBOL === $token->id ) {
1319+
return $this->translate_like( $ast );
1320+
} else if ( WP_MySQL_Lexer::REGEXP_SYMBOL === $token->id ) {
1321+
return $this->translate_regexp_functions( $ast );
1322+
}
1323+
return $this->translate_sequence( $ast->get_children() );
13161324
case 'systemVariable':
13171325
// @TODO: Emulate some system variables, or use reasonable defaults.
13181326
// See: https://dev.mysql.com/doc/refman/8.4/en/server-system-variable-reference.html
@@ -1336,6 +1344,43 @@ private function translate( $ast ) {
13361344
}
13371345
}
13381346

1347+
private function translate_regexp_functions( WP_Parser_Node $node ): string {
1348+
$tokens = $node->get_descendant_tokens();
1349+
$is_binary = isset($tokens[1]) && WP_MySQL_Lexer::BINARY_SYMBOL === $tokens[1]->id;
1350+
1351+
/*
1352+
* If the query says REGEXP BINARY, the comparison is byte-by-byte
1353+
* and letter casing matters – lowercase and uppercase letters are
1354+
* represented using different byte codes.
1355+
*
1356+
* The REGEXP function can't be easily made to accept two
1357+
* parameters, so we'll have to use a hack to get around this.
1358+
*
1359+
* If the first character of the pattern is a null byte, we'll
1360+
* remove it and make the comparison case-sensitive. This should
1361+
* be reasonably safe since PHP does not allow null bytes in
1362+
* regular expressions anyway.
1363+
*/
1364+
if ( true === $is_binary ) {
1365+
return 'REGEXP CHAR(0) || ' . $this->translate( $node->get_child_node() );
1366+
}
1367+
return 'REGEXP ' . $this->translate( $node->get_child_node() );
1368+
}
1369+
1370+
private function translate_like(WP_Parser_Node $node ): string {
1371+
$tokens = $node->get_descendant_tokens();
1372+
$is_binary = isset($tokens[1]) && WP_MySQL_Lexer::BINARY_SYMBOL === $tokens[1]->id;
1373+
1374+
if ( true === $is_binary ) {
1375+
$children = $node->get_children();
1376+
return sprintf(
1377+
"GLOB _helper_like_to_glob_pattern(%s)",
1378+
$this->translate( $children[1] )
1379+
);
1380+
}
1381+
return $this->translate_sequence( $node->get_children() );
1382+
}
1383+
13391384
private function translate_token( WP_MySQL_Token $token ) {
13401385
switch ( $token->id ) {
13411386
case WP_MySQL_Lexer::EOF:
@@ -1346,6 +1391,13 @@ private function translate_token( WP_MySQL_Token $token ) {
13461391
return '"' . trim( $token->value, '`"' ) . '"';
13471392
case WP_MySQL_Lexer::AUTO_INCREMENT_SYMBOL:
13481393
return 'AUTOINCREMENT';
1394+
case WP_MySQL_Lexer::BINARY_SYMBOL:
1395+
/*
1396+
* There is no "BINARY expr" equivalent in SQLite. We can look for
1397+
* the BINARY keyword in particular cases (with REGEXP, LIKE, etc.)
1398+
* and then remove it from the translated output here.
1399+
*/
1400+
return null;
13491401
default:
13501402
return $token->value;
13511403
}

wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php

+72
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ public function __construct( $pdo ) {
8585
'utc_time' => 'utc_time',
8686
'utc_timestamp' => 'utc_timestamp',
8787
'version' => 'version',
88+
89+
'_helper_like_to_glob_pattern' => '_helper_like_to_glob_pattern',
8890
);
8991

9092
/**
@@ -759,4 +761,74 @@ public function utc_timestamp() {
759761
public function version() {
760762
return '5.5';
761763
}
764+
765+
/**
766+
* A helper to covert LIKE pattern to a GLOB pattern for "LIKE BINARY" support.
767+
*
768+
* To convert LIKE pattern to a GLOB pattern, we need to apply the following
769+
* steps, while exactly maintaining the listed order:
770+
*
771+
* 1. Escape "]" as "[]]" to avoid interpreting "[...]" as a character class.
772+
* 2. Escape "*" as "[*]" (must be after 1 to avoid being escaped).
773+
* 3. Escape "?" as "[?]" (must be after 1 to avoid being escaped).
774+
* 4. Unescape "\\" to "\" (must be before .
775+
* 5. Replace "%" with "*" (when not escaped by "\").
776+
* 6. Replace "_" with "?" (when not escaped by "\").
777+
* 7. Unescape "\%" to "%".
778+
* 8. Unescape "\_" to "_".
779+
*
780+
* @TODO: Unescaping backshlashes for a MySQL LIKE is actually more complex
781+
* due to a bug: https://bugs.mysql.com/bug.php?id=84118
782+
* We can consider implementing compatibility with the bug, but it
783+
* affects all LIKE patterns, not just LIKE BINARY.
784+
*
785+
* @param string $pattern
786+
* @return string
787+
*/
788+
public function _helper_like_to_glob_pattern( $pattern ) {
789+
if ( null === $pattern ) {
790+
return null;
791+
}
792+
793+
$pattern = str_replace( ']', '[]]', $pattern );
794+
$pattern = str_replace( '*', '[*]', $pattern );
795+
$pattern = str_replace( '?', '[?]', $pattern );
796+
797+
$glob_pattern = '';
798+
for ( $i = 0; $i < strlen( $pattern ); $i += 1 ) {
799+
$byte1 = $pattern[ $i ];
800+
if ( '\\' === $byte1 ) {
801+
// Add the escape character.
802+
$glob_pattern .= $byte1;
803+
804+
// Special case: "\\%" and "\\_" are equivalent to "\%" and "\_".
805+
// In such case, we need to skip the extra backslash.
806+
$byte2 = $pattern[ $i + 1 ] ?? null;
807+
$byte3 = $pattern[ $i + 2 ] ?? null;
808+
if ( '\\' === $byte2 && ( '%' === $byte3 || '_' === $byte3 ) ) {
809+
$glob_pattern .= $byte3;
810+
$i += 2;
811+
continue;
812+
}
813+
814+
// We're in an escape sequence. Add the next character as it is.
815+
$glob_pattern .= $byte2;
816+
$i += 1;
817+
} elseif ( '%' === $byte1 ) {
818+
$glob_pattern .= '*';
819+
} elseif ( '_' === $byte1 ) {
820+
$glob_pattern .= '?';
821+
} else {
822+
$glob_pattern .= $byte1;
823+
}
824+
}
825+
826+
// 1. Unescape C-style escape sequences.
827+
$glob_pattern = stripcslashes($glob_pattern);
828+
829+
// 2. Unescape LIKE escape sequences.
830+
$glob_pattern = preg_replace('/\\\\(.)/', '$1', $glob_pattern);
831+
832+
return $glob_pattern;
833+
}
762834
}

0 commit comments

Comments
 (0)