@@ -329,6 +329,11 @@ public static function read_character_reference( $context, $text, $at, &$skip_by
329
329
*/
330
330
331
331
/*
332
+ * Code points in the C1 controls area need to be remapped as if they
333
+ * were stored in Windows-1252. Note! This transformation only happens
334
+ * for numeric character references. The raw code points in the byte
335
+ * stream are not translated.
336
+ *
332
337
* > If the number is one of the numbers in the first column of
333
338
* > the following table, then find the row with that number in
334
339
* > the first column, and set the character reference code to
@@ -455,27 +460,27 @@ public static function code_point_to_utf8_bytes( $code_point ) {
455
460
return '� ' ;
456
461
}
457
462
458
- if ( $ code_point < 0x80 ) {
463
+ if ( $ code_point <= 0x7F ) {
459
464
return chr ( $ code_point );
460
465
}
461
466
462
- if ( $ code_point < 0x800 ) {
463
- $ byte1 = ( $ code_point >> 6 ) & 0x1F | 0xC0 ;
467
+ if ( $ code_point <= 0x7FF ) {
468
+ $ byte1 = ( $ code_point >> 6 ) | 0xC0 ;
464
469
$ byte2 = $ code_point & 0x3F | 0x80 ;
465
470
466
471
return pack ( 'CC ' , $ byte1 , $ byte2 );
467
472
}
468
473
469
- if ( $ code_point < 0x10000 ) {
470
- $ byte1 = ( $ code_point >> 12 ) & 0x0F | 0xE0 ;
474
+ if ( $ code_point <= 0xFFFF ) {
475
+ $ byte1 = ( $ code_point >> 12 ) | 0xE0 ;
471
476
$ byte2 = ( $ code_point >> 6 ) & 0x3F | 0x80 ;
472
477
$ byte3 = $ code_point & 0x3F | 0x80 ;
473
478
474
479
return pack ( 'CCC ' , $ byte1 , $ byte2 , $ byte3 );
475
480
}
476
481
477
- if ( $ code_point < 0x110000 ) {
478
- $ byte1 = ( $ code_point >> 18 ) & 0x07 | 0xF0 ;
482
+ if ( $ code_point <= 0x10FFFF ) {
483
+ $ byte1 = ( $ code_point >> 18 ) | 0xF0 ;
479
484
$ byte2 = ( $ code_point >> 12 ) & 0x3F | 0x80 ;
480
485
$ byte3 = ( $ code_point >> 6 ) & 0x3F | 0x80 ;
481
486
$ byte4 = $ code_point & 0x3F | 0x80 ;
0 commit comments