Skip to content

Commit 16e42ed

Browse files
Merge pull request #55 from wp-cli/append_next
Lessen context duplication in db search.
2 parents 8728a1f + 44b9d79 commit 16e42ed

File tree

2 files changed

+72
-14
lines changed

2 files changed

+72
-14
lines changed

features/db-search.feature

+38-2
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,7 @@ Feature: Search through the database
956956
"""
957957
And STDOUT should contain:
958958
"""
959-
:1234_XYXYX_2345678_X [...] X_2345678_XYXYX_234567890 [...] 345678901_XYXYX_2345
959+
:1234_XYXYX_2345678_XYXYX_234567890 [...] 345678901_XYXYX_2345
960960
"""
961961
And STDERR should be empty
962962

@@ -967,6 +967,42 @@ Feature: Search through the database
967967
"""
968968
And STDOUT should contain:
969969
"""
970-
:1234_XYXYX_2345678_X [...] X_2345678_XYXYX_234567890 [...] 345678901_XYXYX_2345
970+
:1234_XYXYX_2345678_XYXYX_234567890 [...] 345678901_XYXYX_2345
971+
"""
972+
And STDERR should be empty
973+
974+
Scenario: Search with large data
975+
Given a WP install
976+
# Note "_utf8 X'CC88'" is combining umlaut. Doing it this way as non-ASCII stuff gets stripped due to (eventually) been put thru `escapeshellarg()` with a default C locale.
977+
# Also restricted by default MySQL values for the version-dependent size of the innodb redo log file (max 10% one transaction) and `max_allowed_packet` size (16MB).
978+
And I run `wp db query "INSERT INTO wp_options (option_name, option_value) VALUES ('opt_large', CONCAT(REPEAT('a', 1024 * 1024 * 8 - 9), 'o', _utf8 X'CC88', 'XYXYX'));"`
979+
980+
When I run `wp db search XYXYX --before_context=1 --stats`
981+
Then STDOUT should contain:
982+
"""
983+
Success: Found 1 match
984+
"""
985+
And STDOUT should contain:
986+
"""
987+
:öXYXYX
988+
"""
989+
And STDOUT should not contain:
990+
"""
991+
:aöXYXYX
992+
"""
993+
And STDERR should be empty
994+
995+
When I run `wp db search XYXYX --regex --before_context=1 --stats`
996+
Then STDOUT should contain:
997+
"""
998+
Success: Found 1 match
999+
"""
1000+
And STDOUT should contain:
1001+
"""
1002+
:öXYXYX
1003+
"""
1004+
And STDOUT should not contain:
1005+
"""
1006+
:aöXYXYX
9711007
"""
9721008
And STDERR should be empty

src/DB_Command.php

+34-12
Original file line numberDiff line numberDiff line change
@@ -905,21 +905,43 @@ public function search( $args, $assoc_args ) {
905905

906906
$bits = array();
907907
$col_encoding = $encoding;
908-
if ( null === $col_encoding ) {
909-
$col_encoding = false;
910-
if ( ( $before_context || $after_context ) && function_exists( 'mb_detect_encoding' ) ) {
911-
$col_encoding = mb_detect_encoding( $col_val, null, true /*strict*/ );
912-
}
908+
if ( ! $col_encoding && ( $before_context || $after_context ) && function_exists( 'mb_detect_encoding' ) ) {
909+
$col_encoding = mb_detect_encoding( $col_val, null, true /*strict*/ );
913910
}
914-
foreach ( $matches[0] as $match_arr ) {
915-
$match = $match_arr[0];
916-
$offset = $match_arr[1];
911+
$append_next = false;
912+
$last_offset = 0;
913+
$match_cnt = count( $matches[0] );
914+
for ( $i = 0; $i < $match_cnt; $i++ ) {
915+
$match = $matches[0][ $i ][0];
916+
$offset = $matches[0][ $i ][1];
917+
$log = $colors['match'][0] . $match . $colors['match'][1];
918+
$before = $after = '';
919+
$after_shortened = false;
920+
917921
// Offsets are in bytes, so need to use `strlen()` and `substr()` before using `safe_substr()`.
918-
$before = $before_context && $offset ? \cli\safe_substr( substr( $col_val, 0, $offset ), -$before_context, null /*length*/, false /*is_width*/, $col_encoding ) : '';
919-
$after = $after_context ? \cli\safe_substr( substr( $col_val, $offset + strlen( $match ) ), 0, $after_context, false /*is_width*/, $col_encoding ) : '';
920-
$bits[] = $before . $colors['match'][0] . $match . $colors['match'][1] . $after;
922+
if ( $before_context && $offset && ! $append_next ) {
923+
$before = \cli\safe_substr( substr( $col_val, $last_offset, $offset - $last_offset ), -$before_context, null /*length*/, false /*is_width*/, $col_encoding );
924+
}
925+
if ( $after_context ) {
926+
$end_offset = $offset + strlen( $match );
927+
$after = \cli\safe_substr( substr( $col_val, $end_offset ), 0, $after_context, false /*is_width*/, $col_encoding );
928+
// To lessen context duplication in output, shorten the after context if it overlaps with the next match.
929+
if ( $i + 1 < $match_cnt && $end_offset + strlen( $after ) > $matches[0][ $i + 1 ][1] ) {
930+
$after = substr( $after, 0, $matches[0][ $i + 1 ][1] - $end_offset );
931+
$after_shortened = true;
932+
// On the next iteration, will append with no before context.
933+
}
934+
}
935+
if ( $append_next ) {
936+
$cnt = count( $bits );
937+
$bits[ $cnt - 1 ] .= $log . $after;
938+
} else {
939+
$bits[] = $before . $log . $after;
940+
}
941+
$append_next = $after_shortened;
942+
$last_offset = $offset;
921943
}
922-
$match_count += count( $bits );
944+
$match_count += $match_cnt;
923945
$col_val = implode( ' [...] ', $bits );
924946

925947
WP_CLI::log( $matches_only ? $col_val : ( $one_line ? "{$table_column_val}:{$pk_val}{$col_val}" : "{$pk_val}{$col_val}" ) );

0 commit comments

Comments
 (0)