@@ -688,6 +688,7 @@ const preencoders = {
688688 const t = p % 188
689689 return ( ( l + ( l < 0x1f ? 0x81 : 0xc1 ) ) << 8 ) | ( ( t < 0x3f ? 0x40 : 0x41 ) + t )
690690 } ,
691+ 'iso-2022-jp' : ( p ) => ( ( ( ( p / 94 ) | 0 ) + 0x21 ) << 8 ) | ( ( p % 94 ) + 0x21 ) ,
691692 'euc-jp' : ( p ) => ( ( ( ( p / 94 ) | 0 ) + 0xa1 ) << 8 ) | ( ( p % 94 ) + 0xa1 ) ,
692693 'euc-kr' : ( p ) => ( ( ( ( p / 190 ) | 0 ) + 0x81 ) << 8 ) | ( ( p % 190 ) + 0x41 ) ,
693694 gb18030 : ( p ) => ( ( ( ( p / 190 ) | 0 ) + 0x81 ) << 8 ) | ( ( p % 190 < 0x3f ? 0x40 : 0x41 ) + ( p % 190 ) ) ,
@@ -697,11 +698,13 @@ preencoders.gbk = preencoders.gb18030
697698
698699// We accept that encoders use non-trivial amount of mem, for perf
699700// most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use
700- function getMap ( id , size ) {
701+ function getMap ( id , size , ascii ) {
701702 const cached = maps . get ( id )
702703 if ( cached ) return cached
703704 let tname = id
704705 const sjis = id === 'shift_jis'
706+ const iso2022jp = id === 'iso-2022-jp'
707+ if ( iso2022jp ) tname = 'jis0208'
705708 if ( id === 'gbk' ) tname = 'gb18030'
706709 if ( id === 'euc-jp' || sjis ) tname = 'jis0208'
707710 const table = getTable ( tname )
@@ -738,7 +741,7 @@ function getMap(id, size) {
738741 }
739742 }
740743
741- for ( let i = 0 ; i < 0x80 ; i ++ ) map [ i ] = i
744+ if ( ascii ) for ( let i = 0 ; i < 0x80 ; i ++ ) map [ i ] = i
742745 if ( sjis || id === 'euc-jp' ) {
743746 if ( sjis ) map [ 0x80 ] = 0x80
744747 const d = sjis ? 0xfe_c0 : 0x70_c0
@@ -757,32 +760,38 @@ function getMap(id, size) {
757760 return map
758761}
759762
760- const encoders = new Set ( [ 'big5' , 'euc-kr' , 'euc-jp' , 'shift_jis' , 'gbk' , 'gb18030' ] )
761763const NON_LATIN = / [ ^ \x00 - \xFF ] / // eslint-disable-line no-control-regex
762- let gb18030r
764+ let gb18030r , katakana
763765
764766export function multibyteEncoder ( enc , onError ) {
765- if ( ! encoders . has ( enc ) ) throw new RangeError ( 'Unsupported encoding' )
767+ if ( ! Object . hasOwn ( mappers , enc ) ) throw new RangeError ( 'Unsupported encoding' )
766768 const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1
767- const width = enc === 'gb18030' ? 4 : 2
768- const map = getMap ( enc , size )
769- if ( enc === 'gb18030' && ! gb18030r ) gb18030r = getTable ( 'gb18030-ranges' )
770-
769+ const iso2022jp = enc === 'iso-2022-jp'
770+ const gb18030 = enc === 'gb18030'
771+ const ascii = isAsciiSuperset ( enc )
772+ const width = iso2022jp ? 5 : gb18030 ? 4 : 2
773+ const tailsize = iso2022jp ? 3 : 0
774+ const map = getMap ( enc , size , ascii )
775+ if ( gb18030 && ! gb18030r ) gb18030r = getTable ( 'gb18030-ranges' )
776+ if ( iso2022jp && ! katakana ) katakana = getTable ( 'iso-2022-jp-katakana' )
771777 return ( str ) => {
772778 if ( typeof str !== 'string' ) throw new TypeError ( E_STRING )
773- if ( ! NON_LATIN . test ( str ) ) {
779+ if ( ascii && ! NON_LATIN . test ( str ) ) {
774780 try {
775781 return encodeAscii ( str , E_STRICT )
776782 } catch { }
777783 }
778784
779785 const length = str . length
780- const u8 = new Uint8Array ( length * width )
786+ const u8 = new Uint8Array ( length * width + tailsize )
781787 let i = 0
782- while ( i < length ) {
783- const x = str . charCodeAt ( i )
784- if ( x >= 128 ) break
785- u8 [ i ++ ] = x
788+
789+ if ( ascii ) {
790+ while ( i < length ) {
791+ const x = str . charCodeAt ( i )
792+ if ( x >= 128 ) break
793+ u8 [ i ++ ] = x
794+ }
786795 }
787796
788797 // eslint-disable-next-line unicorn/consistent-function-scoping
@@ -793,7 +802,69 @@ export function multibyteEncoder(enc, onError) {
793802
794803 if ( ! map || map . length < size ) /* c8 ignore next */ throw new Error ( 'Unreachable' ) // Important for perf
795804
796- if ( enc === 'gb18030' ) {
805+ if ( iso2022jp ) {
806+ let state = 0 // 0 = ASCII, 1 = Roman, 2 = jis0208
807+ const restore = ( ) => {
808+ state = 0
809+ u8 [ i ++ ] = 0x1b
810+ u8 [ i ++ ] = 0x28
811+ u8 [ i ++ ] = 0x42
812+ }
813+
814+ for ( let j = 0 ; j < length ; j ++ ) {
815+ let x = str . charCodeAt ( j )
816+ if ( x >= 0xd8_00 && x < 0xe0_00 ) {
817+ if ( state === 2 ) restore ( )
818+ if ( x >= 0xdc_00 || j + 1 === length ) {
819+ i += err ( x ) // lone
820+ } else {
821+ const x1 = str . charCodeAt ( j + 1 )
822+ if ( x1 < 0xdc_00 || x1 >= 0xe0_00 ) {
823+ i += err ( x ) // lone
824+ } else {
825+ j ++ // consume x1
826+ i += err ( 0x1_00_00 + ( ( x1 & 0x3_ff ) | ( ( x & 0x3_ff ) << 10 ) ) )
827+ }
828+ }
829+ } else if ( x < 0x80 ) {
830+ if ( state === 2 || ( state === 1 && ( x === 0x5c || x === 0x7e ) ) ) restore ( )
831+ if ( x === 0xe || x === 0xf || x === 0x1b ) {
832+ i += err ( 0xff_fd ) // 12.2.2. step 3: This returns U+FFFD rather than codePoint to prevent attacks
833+ } else {
834+ u8 [ i ++ ] = x
835+ }
836+ } else if ( x === 0xa5 || x === 0x20_3e ) {
837+ if ( state !== 1 ) {
838+ state = 1
839+ u8 [ i ++ ] = 0x1b
840+ u8 [ i ++ ] = 0x28
841+ u8 [ i ++ ] = 0x4a
842+ }
843+
844+ u8 [ i ++ ] = x === 0xa5 ? 0x5c : 0x7e
845+ } else {
846+ if ( x === 0x22_12 ) x = 0xff_0d
847+ if ( x >= 0xff_61 && x <= 0xff_9f ) x = katakana [ x - 0xff_61 ]
848+ const e = map [ x ]
849+ if ( e ) {
850+ if ( state !== 2 ) {
851+ state = 2
852+ u8 [ i ++ ] = 0x1b
853+ u8 [ i ++ ] = 0x24
854+ u8 [ i ++ ] = 0x42
855+ }
856+
857+ u8 [ i ++ ] = e >> 8
858+ u8 [ i ++ ] = e & 0xff
859+ } else {
860+ if ( state === 2 ) restore ( )
861+ i += err ( x )
862+ }
863+ }
864+ }
865+
866+ if ( state ) restore ( )
867+ } else if ( gb18030 ) {
797868 // Deduping this branch hurts other encoders perf
798869 const encode = ( cp ) => {
799870 let a = 0 , b = 0 // prettier-ignore
0 commit comments