@@ -688,6 +688,7 @@ const preencoders = {
688688 const t = p % 188
689689 return ( ( l + ( l < 0x1f ? 0x81 : 0xc1 ) ) << 8 ) | ( ( t < 0x3f ? 0x40 : 0x41 ) + t )
690690 } ,
691+ 'iso-2022-jp' : ( p ) => ( ( ( ( p / 94 ) | 0 ) + 0x21 ) << 8 ) | ( ( p % 94 ) + 0x21 ) ,
691692 'euc-jp' : ( p ) => ( ( ( ( p / 94 ) | 0 ) + 0xa1 ) << 8 ) | ( ( p % 94 ) + 0xa1 ) ,
692693 'euc-kr' : ( p ) => ( ( ( ( p / 190 ) | 0 ) + 0x81 ) << 8 ) | ( ( p % 190 ) + 0x41 ) ,
693694 gb18030 : ( p ) => ( ( ( ( p / 190 ) | 0 ) + 0x81 ) << 8 ) | ( ( p % 190 < 0x3f ? 0x40 : 0x41 ) + ( p % 190 ) ) ,
@@ -702,6 +703,8 @@ function getMap(id, size) {
702703 if ( cached ) return cached
703704 let tname = id
704705 const sjis = id === 'shift_jis'
706+ const iso2022jp = id === 'iso-2022-jp'
707+ if ( iso2022jp ) tname = 'jis0208'
705708 if ( id === 'gbk' ) tname = 'gb18030'
706709 if ( id === 'euc-jp' || sjis ) tname = 'jis0208'
707710 const table = getTable ( tname )
@@ -738,7 +741,7 @@ function getMap(id, size) {
738741 }
739742 }
740743
741- for ( let i = 0 ; i < 0x80 ; i ++ ) map [ i ] = i
744+ if ( isAsciiSuperset ( id ) ) for ( let i = 0 ; i < 0x80 ; i ++ ) map [ i ] = i
742745 if ( sjis || id === 'euc-jp' ) {
743746 if ( sjis ) map [ 0x80 ] = 0x80
744747 const d = sjis ? 0xfe_c0 : 0x70_c0
@@ -757,17 +760,19 @@ function getMap(id, size) {
757760 return map
758761}
759762
760- const encoders = new Set ( [ 'big5' , 'euc-kr' , 'euc-jp' , 'shift_jis' , 'gbk' , 'gb18030' ] )
761763const NON_LATIN = / [ ^ \x00 - \xFF ] / // eslint-disable-line no-control-regex
762- let gb18030r
764+ let gb18030r , katakana
763765
764766export function multibyteEncoder ( enc , onError ) {
765- if ( ! encoders . has ( enc ) ) throw new RangeError ( 'Unsupported encoding' )
767+ if ( ! Object . hasOwn ( mappers , enc ) ) throw new RangeError ( 'Unsupported encoding' )
766768 const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1
767- const width = enc === 'gb18030' ? 4 : 2
769+ const iso2022jp = enc === 'iso-2022-jp'
770+ const gb18030 = enc === 'gb18030'
771+ const width = iso2022jp ? 5 : gb18030 ? 4 : 2
772+ const tailsize = iso2022jp ? 3 : 0
768773 const map = getMap ( enc , size )
769- if ( enc === ' gb18030' && ! gb18030r ) gb18030r = getTable ( 'gb18030-ranges' )
770-
774+ if ( gb18030 && ! gb18030r ) gb18030r = getTable ( 'gb18030-ranges' )
775+ if ( iso2022jp && ! katakana ) katakana = getTable ( 'iso-2022-jp-katakana' )
771776 return ( str ) => {
772777 if ( typeof str !== 'string' ) throw new TypeError ( E_STRING )
773778 if ( ! NON_LATIN . test ( str ) ) {
@@ -777,12 +782,15 @@ export function multibyteEncoder(enc, onError) {
777782 }
778783
779784 const length = str . length
780- const u8 = new Uint8Array ( length * width )
785+ const u8 = new Uint8Array ( length * width + tailsize )
781786 let i = 0
782- while ( i < length ) {
783- const x = str . charCodeAt ( i )
784- if ( x >= 128 ) break
785- u8 [ i ++ ] = x
787+
788+ if ( isAsciiSuperset ( enc ) ) {
789+ while ( i < length ) {
790+ const x = str . charCodeAt ( i )
791+ if ( x >= 128 ) break
792+ u8 [ i ++ ] = x
793+ }
786794 }
787795
788796 // eslint-disable-next-line unicorn/consistent-function-scoping
@@ -793,7 +801,78 @@ export function multibyteEncoder(enc, onError) {
793801
794802 if ( ! map || map . length < size ) /* c8 ignore next */ throw new Error ( 'Unreachable' ) // Important for perf
795803
796- if ( enc === 'gb18030' ) {
804+ if ( iso2022jp ) {
805+ let state = 0 // 0 = ASCII, 1 = Roman, 2 = jis0208
806+ for ( let j = 0 ; j < length ; j ++ ) {
807+ let x = str . charCodeAt ( j )
808+ if ( x >= 0xd8_00 && x < 0xe0_00 ) {
809+ if ( x >= 0xdc_00 || j + 1 === length ) {
810+ i += err ( x ) // lone
811+ } else {
812+ const x1 = str . charCodeAt ( j + 1 )
813+ if ( x1 < 0xdc_00 || x1 >= 0xe0_00 ) {
814+ i += err ( x ) // lone
815+ } else {
816+ j ++ // consume x1
817+ i += err ( 0x1_00_00 + ( ( x1 & 0x3_ff ) | ( ( x & 0x3_ff ) << 10 ) ) )
818+ }
819+ }
820+ } else if ( x < 0x80 ) {
821+ if ( state === 2 || ( state === 1 && ( x === 0x5c || x === 0x7e ) ) ) {
822+ state = 0
823+ u8 [ i ++ ] = 0x1b
824+ u8 [ i ++ ] = 0x28
825+ u8 [ i ++ ] = 0x42
826+ }
827+
828+ if ( x === 0xe || x === 0xf || x === 0x1b ) {
829+ i += err ( 0xff_fd ) // 12.2.2. step 3: This returns U+FFFD rather than codePoint to prevent attacks
830+ } else {
831+ u8 [ i ++ ] = x
832+ }
833+ } else if ( x === 0xa5 || x === 0x20_3e ) {
834+ if ( state !== 1 ) {
835+ state = 1
836+ u8 [ i ++ ] = 0x1b
837+ u8 [ i ++ ] = 0x28
838+ u8 [ i ++ ] = 0x4a
839+ }
840+
841+ u8 [ i ++ ] = x === 0xa5 ? 0x5c : 0x7e
842+ } else {
843+ // Checks above are unaffected by these, all these are above 0x30_00
844+ if ( x === 0x22_12 ) x = 0xff_0d
845+ if ( x >= 0xff_61 && x <= 0xff_9f ) x = katakana [ x - 0xff_61 ]
846+ const e = map [ x ]
847+ if ( e ) {
848+ if ( state !== 2 ) {
849+ state = 2
850+ u8 [ i ++ ] = 0x1b
851+ u8 [ i ++ ] = 0x24
852+ u8 [ i ++ ] = 0x42
853+ }
854+
855+ u8 [ i ++ ] = e >> 8
856+ u8 [ i ++ ] = e & 0xff
857+ } else {
858+ if ( state === 2 ) {
859+ state = 0
860+ u8 [ i ++ ] = 0x1b
861+ u8 [ i ++ ] = 0x28
862+ u8 [ i ++ ] = 0x42
863+ }
864+
865+ i += err ( x )
866+ }
867+ }
868+ }
869+
870+ if ( state ) {
871+ u8 [ i ++ ] = 0x1b
872+ u8 [ i ++ ] = 0x28
873+ u8 [ i ++ ] = 0x42
874+ }
875+ } else if ( gb18030 ) {
797876 // Deduping this branch hurts other encoders perf
798877 const encode = ( cp ) => {
799878 let a = 0 , b = 0 // prettier-ignore
0 commit comments