@@ -329,6 +329,11 @@ public static function read_character_reference( $context, $text, $at, &$skip_by
329329 */
330330
331331 /*
332+ * Code points in the C1 controls area need to be remapped as if they
333+ * were stored in Windows-1252. Note! This transformation only happens
334+ * for numeric character references. The raw code points in the byte
335+ * stream are not translated.
336+ *
332337 * > If the number is one of the numbers in the first column of
333338 * > the following table, then find the row with that number in
334339 * > the first column, and set the character reference code to
@@ -455,27 +460,27 @@ public static function code_point_to_utf8_bytes( $code_point ) {
455460 return '� ' ;
456461 }
457462
458- if ( $ code_point < 0x80 ) {
463+ if ( $ code_point <= 0x7F ) {
459464 return chr ( $ code_point );
460465 }
461466
462- if ( $ code_point < 0x800 ) {
463- $ byte1 = ( $ code_point >> 6 ) & 0x1F | 0xC0 ;
467+ if ( $ code_point <= 0x7FF ) {
468+ $ byte1 = ( $ code_point >> 6 ) | 0xC0 ;
464469 $ byte2 = $ code_point & 0x3F | 0x80 ;
465470
466471 return pack ( 'CC ' , $ byte1 , $ byte2 );
467472 }
468473
469- if ( $ code_point < 0x10000 ) {
470- $ byte1 = ( $ code_point >> 12 ) & 0x0F | 0xE0 ;
474+ if ( $ code_point <= 0xFFFF ) {
475+ $ byte1 = ( $ code_point >> 12 ) | 0xE0 ;
471476 $ byte2 = ( $ code_point >> 6 ) & 0x3F | 0x80 ;
472477 $ byte3 = $ code_point & 0x3F | 0x80 ;
473478
474479 return pack ( 'CCC ' , $ byte1 , $ byte2 , $ byte3 );
475480 }
476481
477- if ( $ code_point < 0x110000 ) {
478- $ byte1 = ( $ code_point >> 18 ) & 0x07 | 0xF0 ;
482+ if ( $ code_point <= 0x10FFFF ) {
483+ $ byte1 = ( $ code_point >> 18 ) | 0xF0 ;
479484 $ byte2 = ( $ code_point >> 12 ) & 0x3F | 0x80 ;
480485 $ byte3 = ( $ code_point >> 6 ) & 0x3F | 0x80 ;
481486 $ byte4 = $ code_point & 0x3F | 0x80 ;
0 commit comments