Skip to content

Commit 496e88c

Browse files
committed
Improve string literal translation
1 parent df179b1 commit 496e88c

File tree

2 files changed

+69
-51
lines changed

2 files changed

+69
-51
lines changed

wp-includes/sqlite-ast/class-wp-sqlite-driver.php

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1617,28 +1617,7 @@ private function translate( $ast ) {
16171617
case 'identifierKeyword':
16181618
return '"' . $this->translate( $ast->get_child() ) . '"';
16191619
case 'textStringLiteral':
1620-
$token = $ast->get_child_token();
1621-
1622-
// 1. Remove bounding quotes.
1623-
$quote = $token->value[0];
1624-
$value = substr( $token->value, 1, -1 );
1625-
1626-
// 2. Unescape quotes within the string.
1627-
$value = str_replace( $quote . $quote, $quote, $value );
1628-
$value = str_replace( '\\' . $quote, $quote, $value );
1629-
1630-
// 3. Translate datetime literals.
1631-
// Process only strings that could possibly represent a datetime
1632-
// literal ("YYYY-MM-DDTHH:MM:SS", "YYYY-MM-DDTHH:MM:SSZ", etc.).
1633-
if ( strlen( $value ) >= 19 && is_numeric( $value[0] ) ) {
1634-
$value = $this->translate_datetime_literal( $value );
1635-
}
1636-
1637-
// 4. Remove null characters.
1638-
$value = str_replace( "\0", '', $value );
1639-
1640-
// 5. Escape and add quotes.
1641-
return "'" . str_replace( "'", "''", $value ) . "'";
1620+
return $this->translate_string_literal( $ast );
16421621
case 'dataType':
16431622
case 'nchar':
16441623
$child = $ast->get_child();
@@ -1779,6 +1758,71 @@ private function translate_sequence( array $nodes, string $separator = ' ' ): ?s
17791758
return implode( $separator, $parts );
17801759
}
17811760

1761+
private function translate_string_literal( WP_Parser_Node $node ): string {
1762+
$token = $node->get_child_token();
1763+
1764+
/*
1765+
* 1. Remove bounding quotes.
1766+
*/
1767+
$quote = $token->value[0];
1768+
$value = substr( $token->value, 1, -1 );
1769+
1770+
/*
1771+
* 2. Normalize escaping of "%" and "_" characters.
1772+
*
1773+
* MySQL has unusual handling for "\%" and "\_" in all string literals.
1774+
* While other sequences follow the C-style escaping ("\?" is "?", etc.),
1775+
* "\%" resolves to "\%" and "\_" resolves to "\_" (unlike in C strings).
1776+
*
1777+
* This means that "\%" behaves like "\\%", and "\_" behaves like "\\_".
1778+
* To preserve this behavior, we need to add a second backslash in cases
1779+
* where only one is used. To do so correctly, we need to:
1780+
*
1781+
* 1. Skip all double backslash patterns (as "\\" resolves to "\").
1782+
* 2. Add an extra backslash when "\%" or "\_" follows right after.
1783+
*
1784+
* This may be related to: https://bugs.mysql.com/bug.php?id=84118
1785+
*/
1786+
$value = preg_replace( '/(^|[^\\\\](?:\\\\{2}))*(\\\\[%_])/', '$1\\\\$2', $value );
1787+
1788+
/*
1789+
* 3. Unescape quotes within the string.
1790+
*/
1791+
$value = str_replace( $quote . $quote, $quote, $value );
1792+
1793+
/*
1794+
* 4. Unescape C-style escape sequences.
1795+
*
1796+
* MySQL string literals are represented using C-style encoded strings,
1797+
* but SQLite doesn't support such escaping.
1798+
*
1799+
* @TODO: Handle NO_BACKSLASH_ESCAPES SQL mode.
1800+
*/
1801+
$value = stripcslashes( $value );
1802+
1803+
/*
1804+
* 5. Translate datetime literals.
1805+
*
1806+
* Process only strings that could possibly represent a datetime
1807+
* literal ("YYYY-MM-DDTHH:MM:SS", "YYYY-MM-DDTHH:MM:SSZ", etc.).
1808+
*/
1809+
if ( strlen( $value ) >= 19 && is_numeric( $value[0] ) ) {
1810+
$value = $this->translate_datetime_literal( $value );
1811+
}
1812+
1813+
/*
1814+
* 6. Remove null characters.
1815+
*
1816+
* SQLite doesn't support null characters in strings.
1817+
*/
1818+
$value = str_replace( "\0", '', $value );
1819+
1820+
/*
1821+
* 7. Escape and add quotes.
1822+
*/
1823+
return "'" . str_replace( "'", "''", $value ) . "'";
1824+
}
1825+
17821826
private function translate_simple_expr( WP_Parser_Node $node ): string {
17831827
$token = $node->get_child_token();
17841828

wp-includes/sqlite/class-wp-sqlite-pdo-user-defined-functions.php

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -777,33 +777,7 @@ public function _helper_like_to_glob_pattern( $pattern ) {
777777
}
778778

779779
/*
780-
* 1. Normalize escaping of "%" and "_" characters.
781-
*
782-
* MySQL has unusual handling for "\%" and "\_" in all string literals.
783-
* While other sequences follow the C-style escaping ("\?" is "?", etc.),
784-
* "\%" resolves to "\%" and "\_" resolves to "\_" (unlike in C strings).
785-
*
786-
* This means that "\%" behaves like "\\%", and "\_" behaves like "\\_".
787-
* To preserve this behavior, we need to add a second backslash in cases
788-
* where only one is used. To do so correctly, we need to:
789-
*
790-
* 1. Skip all double backslash patterns (as "\\" resolves to "\").
791-
* 2. Add an extra backslash when "\%" or "\_" follows right after.
792-
*
793-
* This may be related to: https://bugs.mysql.com/bug.php?id=84118
794-
*/
795-
$pattern = preg_replace( '/(^|[^\\\\](?:\\\\{2}))*(\\\\[%_])/', '$1\\\\$2', $pattern );
796-
797-
/*
798-
* 2. Unescape C-style escape sequences.
799-
*
800-
* MySQL string literals are represented using C-style encoded strings,
801-
* but the GLOB pattern in SQLite doesn't support such escaping.
802-
*/
803-
$pattern = stripcslashes( $pattern );
804-
805-
/*
806-
* 3. Escape characters that have special meaning in GLOB patterns.
780+
* 1. Escape characters that have special meaning in GLOB patterns.
807781
*
808782
* We need to:
809783
* 1. Escape "]" as "[]]" to avoid interpreting "[...]" as a character class.
@@ -815,7 +789,7 @@ public function _helper_like_to_glob_pattern( $pattern ) {
815789
$pattern = str_replace( '?', '[?]', $pattern );
816790

817791
/*
818-
* 4. Convert LIKE wildcards to GLOB wildcards ("%" -> "*", "_" -> "?").
792+
* 2. Convert LIKE wildcards to GLOB wildcards ("%" -> "*", "_" -> "?").
819793
*
820794
* We need to convert them only when they don't follow any backslashes,
821795
* or when they follow an even number of backslashes (as "\\" is "\").
@@ -824,7 +798,7 @@ public function _helper_like_to_glob_pattern( $pattern ) {
824798
$pattern = preg_replace( '/(^|[^\\\\](?:\\\\{2})*)_/', '$1?', $pattern );
825799

826800
/*
827-
* 5. Unescape LIKE escape sequences.
801+
* 3. Unescape LIKE escape sequences.
828802
*
829803
* While in MySQL LIKE patterns, a backslash is usually used to escape
830804
* special characters ("%", "_", and "\"), it works with all characters.

0 commit comments

Comments
 (0)