@@ -611,6 +611,42 @@ describe('Common implementation mistakes', () => {
611611 } )
612612 } )
613613
614+ // These are mistabeled in WPT html dataset files, their recorded codepoints do not match actual ones
615+ // All browsers (and the script) agree on how these are decoded though, but let's explicitly recheck
616+ // Refs: https://github.com/web-platform-tests/wpt/issues/56748
617+ describe ( 'WPT mislabels' , ( ) => {
618+ const vectors = {
619+ 'euc-jp' : [
620+ [ [ 0x5c ] , '\x5C' ] , // Not U+A5
621+ [ [ 0x7e ] , '\x7E' ] , // Not U+203E
622+ [ [ 0xa1 , 0xdd ] , '\uFF0D' ] , // Not U+2212
623+ ] ,
624+ shift_jis : [
625+ [ [ 0x5c ] , '\x5C' ] , // Not U+A5
626+ [ [ 0x7e ] , '\x7E' ] , // Not U+203E
627+ [ [ 0x81 , 0x7c ] , '\uFF0D' ] , // Not U+2212
628+ ] ,
629+ 'iso-2022-jp' : [
630+ [ [ 0x1b , 0x28 , 0x4a , 0x5c , 0x1b , 0x28 , 0x42 ] , '\xA5' ] , // Correctly labeled, U+A5
631+ [ [ 0x1b , 0x28 , 0x4a , 0x7e , 0x1b , 0x28 , 0x42 ] , '\u203E' ] , // Correctly labeled, U+203E
632+ [ [ 0x1b , 0x24 , 0x42 , 0x21 , 0x5d , 0x1b , 0x28 , 0x42 ] , '\uFF0D' ] , // Not U+2212
633+ ] ,
634+ }
635+
636+ for ( const [ encoding , list ] of Object . entries ( vectors ) ) {
637+ describe ( encoding , ( ) => {
638+ for ( const fatal of [ false , true ] ) {
639+ test ( fatal ? 'fatal' : 'loose' , ( t ) => {
640+ for ( const [ bytes , string ] of list ) {
641+ const d = new TextDecoder ( encoding , { fatal } )
642+ t . assert . strictEqual ( d . decode ( Uint8Array . from ( bytes ) ) , string )
643+ }
644+ } )
645+ }
646+ } )
647+ }
648+ } )
649+
614650 describe ( 'invalid labels' , ( ) => {
615651 test ( 'non-ascii' , ( t ) => {
616652 const bad = [ '\u212Aoi8-r' , '\u212Aoi8-u' , 'euc-\u212Ar' ]
0 commit comments