Skip to content

Commit 528136f

Browse files
committed
Add detection of posts table character set
1 parent 86bd2a9 commit 528136f

File tree

2 files changed

+80
-10
lines changed

2 files changed

+80
-10
lines changed

features/db-import.feature

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,16 +157,25 @@ Feature: Import a WordPress database
157157
Given a WP install
158158

159159
When I run `wp post create --post_title="🍣"`
160-
And I run `wp db export wp_cli_test.sql`
161-
Then the wp_cli_test.sql file should exist
162-
163-
When I run `wp post list`
160+
And I run `wp post list`
164161
Then the return code should be 0
165162
And STDOUT should contain:
166163
"""
167164
🍣
168165
"""
169166

167+
When I try `wp db export wp_cli_test.sql --debug`
168+
Then the return code should be 0
169+
And the wp_cli_test.sql file should exist
170+
And STDERR should contain:
171+
"""
172+
Detected character set of the posts table: utf8mb4
173+
"""
174+
And STDERR should contain:
175+
"""
176+
Setting missing default character set to utf8mb4
177+
"""
178+
170179
When I run `wp db import --dbuser=wp_cli_test --dbpass=password1`
171180
Then STDOUT should be:
172181
"""

src/DB_Command.php

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,20 @@
2727
*/
2828
class DB_Command extends WP_CLI_Command {
2929

30+
/**
31+
* Legacy UTF-8 encoding for MySQL.
32+
*
33+
* @var string
34+
*/
35+
const ENCODING_UTF8 = 'utf8';
36+
37+
/**
38+
* Standards-compliant UTF-8 encoding for MySQL.
39+
*
40+
* @var string
41+
*/
42+
const ENCODING_UTF8MB4 = 'utf8mb4';
43+
3044
/**
3145
* A list of incompatible SQL modes.
3246
*
@@ -556,11 +570,19 @@ public function export( $args, $assoc_args ) {
556570

557571
/*
558572
* In case that `--default-character-set` is not given and `DB_CHARSET` is `utf8`,
559-
* use `utf8mb4` as a `default-character-set` to ensure emojis are encoded correctly.
573+
* we try to deduce what the actual character set for the posts table of the
574+
* current database is and use `utf8mb4` as a `default-character-set` if that
575+
* seems like the safer default, to ensure emojis are encoded correctly.
560576
*/
561-
if ( ! isset( $assoc_args['default-character-set'] ) &&
562-
defined( 'DB_CHARSET' ) && 'utf8' === constant( 'DB_CHARSET' ) ) {
563-
$assoc_args['default-character-set'] = 'utf8mb4';
577+
if (
578+
! isset( $assoc_args['default-character-set'] )
579+
&&
580+
( defined( 'DB_CHARSET' ) && self::ENCODING_UTF8 === constant( 'DB_CHARSET' ) )
581+
&&
582+
self::ENCODING_UTF8MB4 === $this->get_posts_table_charset( $assoc_args )
583+
) {
584+
WP_CLI::debug( 'Setting missing default character set to ' . self::ENCODING_UTF8MB4, 'db' );
585+
$assoc_args['default-character-set'] = self::ENCODING_UTF8MB4;
564586
}
565587

566588
$initial_command = sprintf( "{$mysqldump_binary}%s ", $this->get_defaults_flag_string( $assoc_args ) );
@@ -616,6 +638,45 @@ public function export( $args, $assoc_args ) {
616638
}
617639
}
618640

641+
/**
642+
* Get the current character set of the posts table.
643+
*
644+
* @param array Associative array of associative arguments.
645+
* @return string Posts table character set.
646+
*/
647+
private function get_posts_table_charset( $assoc_args ) {
648+
$query = 'SELECT CCSA.character_set_name '
649+
. 'FROM information_schema.`TABLES` T, '
650+
. 'information_schema.`COLLATION_CHARACTER_SET_APPLICABILITY` CCSA '
651+
. 'WHERE CCSA.collation_name = T.table_collation '
652+
. 'AND T.table_schema = "' . DB_NAME . '" '
653+
. 'AND T.table_name LIKE "%\_posts";';
654+
655+
list( $stdout, $stderr, $exit_code ) = self::run(
656+
sprintf(
657+
'/usr/bin/env mysql%s --no-auto-rehash --batch --skip-column-names',
658+
$this->get_defaults_flag_string( $assoc_args )
659+
),
660+
[ 'execute' => $query ],
661+
false
662+
);
663+
664+
if ( $exit_code ) {
665+
WP_CLI::warning(
666+
'Failed to get current character set of the posts table.'
667+
. ( ! empty( $stderr ) ? " Reason: {$stderr}" : '' )
668+
);
669+
670+
return self::ENCODING_UTF8MB4;
671+
}
672+
673+
$stdout = trim( $stdout );
674+
675+
WP_CLI::debug( "Detected character set of the posts table: {$stdout}.", 'db' );
676+
677+
return $stdout;
678+
}
679+
619680
/**
620681
* Imports a database from a file or from STDIN.
621682
*
@@ -1244,7 +1305,7 @@ public function search( $args, $assoc_args ) {
12441305
}
12451306

12461307
$encoding = null;
1247-
if ( 0 === strpos( $wpdb->charset, 'utf8' ) ) {
1308+
if ( 0 === strpos( $wpdb->charset, self::ENCODING_UTF8 ) ) {
12481309
$encoding = 'UTF-8';
12491310
}
12501311

@@ -1616,7 +1677,7 @@ private static function is_text_col( $type ) {
16161677
* @return string|array An escaped string if given a string, or an array of escaped strings if given an array of strings.
16171678
*/
16181679
private static function esc_sql_ident( $idents ) {
1619-
$backtick = function ( $v ) {
1680+
$backtick = static function ( $v ) {
16201681
// Escape any backticks in the identifier by doubling.
16211682
return '`' . str_replace( '`', '``', $v ) . '`';
16221683
};

0 commit comments

Comments
 (0)