Skip to content

Commit b80e784

Browse files
committed
More docs, remove a binary op in UTF-8 encoding
1 parent e0ef94a commit b80e784

File tree

1 file changed

+12
-7
lines changed

1 file changed

+12
-7
lines changed

src/wp-includes/html-api/class-wp-html-decoder.php

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,11 @@ public static function read_character_reference( $context, $text, $at, &$skip_by
329329
*/
330330

331331
/*
332+
* Code points in the C1 controls area need to be remapped as if they
333+
* were stored in Windows-1252. Note! This transformation only happens
334+
* for numeric character references. The raw code points in the byte
335+
* stream are not translated.
336+
*
332337
* > If the number is one of the numbers in the first column of
333338
* > the following table, then find the row with that number in
334339
* > the first column, and set the character reference code to
@@ -455,27 +460,27 @@ public static function code_point_to_utf8_bytes( $code_point ) {
455460
return '';
456461
}
457462

458-
if ( $code_point < 0x80 ) {
463+
if ( $code_point <= 0x7F ) {
459464
return chr( $code_point );
460465
}
461466

462-
if ( $code_point < 0x800 ) {
463-
$byte1 = ( $code_point >> 6 ) & 0x1F | 0xC0;
467+
if ( $code_point <= 0x7FF ) {
468+
$byte1 = ( $code_point >> 6 ) | 0xC0;
464469
$byte2 = $code_point & 0x3F | 0x80;
465470

466471
return pack( 'CC', $byte1, $byte2 );
467472
}
468473

469-
if ( $code_point < 0x10000 ) {
470-
$byte1 = ( $code_point >> 12 ) & 0x0F | 0xE0;
474+
if ( $code_point <= 0xFFFF ) {
475+
$byte1 = ( $code_point >> 12 ) | 0xE0;
471476
$byte2 = ( $code_point >> 6 ) & 0x3F | 0x80;
472477
$byte3 = $code_point & 0x3F | 0x80;
473478

474479
return pack( 'CCC', $byte1, $byte2, $byte3 );
475480
}
476481

477-
if ( $code_point < 0x110000 ) {
478-
$byte1 = ( $code_point >> 18 ) & 0x07 | 0xF0;
482+
if ( $code_point <= 0x10FFFF ) {
483+
$byte1 = ( $code_point >> 18 ) | 0xF0;
479484
$byte2 = ( $code_point >> 12 ) & 0x3F | 0x80;
480485
$byte3 = ( $code_point >> 6 ) & 0x3F | 0x80;
481486
$byte4 = $code_point & 0x3F | 0x80;

0 commit comments

Comments
 (0)