@@ -331,12 +331,57 @@ const mappers = {
331331
332332 return { bytes, eof, pushback }
333333 } ,
334+ // https://encoding.spec.whatwg.org/#big5
335+ big5 : ( err ) => {
336+ const big5 = getTable ( 'big5' )
337+ let lead = 0
338+
339+ const decodeLead = ( b ) => {
340+ let cp
341+ if ( ( b >= 0x40 && b <= 0x7e ) || ( b >= 0xa1 && b !== 0xff ) ) {
342+ cp = big5 [ ( lead - 0x81 ) * 157 + b - ( b < 0x7f ? 0x40 : 0x62 ) ]
343+ }
344+
345+ lead = 0
346+ if ( cp ) return cp // strings
347+ return b < 128 ? String . fromCharCode ( err ( ) , b ) : String . fromCharCode ( err ( ) )
348+ }
349+
350+ // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
351+ // We store that as strings
352+ // eslint-disable-next-line sonarjs/no-identical-functions
353+ const fast = ( arr , start , end , stream ) => {
354+ let res = ''
355+ let i = start
356+
357+ if ( lead && i < end ) res += decodeLead ( arr [ i ++ ] )
358+ while ( i < end ) {
359+ const b = arr [ i ++ ]
360+ if ( b < 128 ) {
361+ res += String . fromCharCode ( b )
362+ } else if ( b < 0x81 || b === 0xff ) {
363+ res += String . fromCharCode ( err ( ) )
364+ } else {
365+ lead = b
366+ if ( i < end ) res += decodeLead ( arr [ i ++ ] )
367+ }
368+ }
369+
370+ if ( lead && ! stream ) {
371+ lead = 0
372+ res += String . fromCharCode ( err ( ) )
373+ }
374+
375+ return res
376+ }
377+
378+ return { fast, isAscii : ( ) => lead === 0 }
379+ } ,
334380}
335381
336382export const isAsciiSuperset = ( enc ) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
337383
338384export function multibyteDecoder ( enc , loose = false ) {
339- if ( enc === 'big5' ) return big5decoder ( loose )
340385 if ( ! Object . hasOwn ( mappers , enc ) ) throw new RangeError ( 'Unsupported encoding' )
341386
342387 // Input is assumed to be typechecked already
@@ -394,60 +439,3 @@ export function multibyteDecoder(enc, loose = false) {
394439 return res
395440 }
396441}
397-
398- // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
399- // We store that as strings
400- function big5decoder ( loose ) {
401- // Input is assumed to be typechecked already
402- let lead = 0
403- let big5
404- return ( arr , stream = false ) => {
405- const onErr = loose
406- ? ( ) => '\uFFFD'
407- : ( ) => {
408- // Lead is always already cleared before throwing
409- throw new TypeError ( E_STRICT )
410- }
411-
412- let res = ''
413- const length = arr . length
414- if ( ! lead ) {
415- res = decodeLatin1 ( arr , 0 , asciiPrefix ( arr ) )
416- if ( res . length === arr . length ) return res // ascii
417- }
418-
419- if ( ! big5 ) big5 = getTable ( 'big5' )
420- for ( let i = res . length ; i < length ; i ++ ) {
421- const b = arr [ i ]
422- if ( lead ) {
423- let cp
424- if ( ( b >= 0x40 && b <= 0x7e ) || ( b >= 0xa1 && b !== 0xff ) ) {
425- cp = big5 [ ( lead - 0x81 ) * 157 + b - ( b < 0x7f ? 0x40 : 0x62 ) ]
426- }
427-
428- lead = 0
429- if ( cp ) {
430- res += cp // strings
431- } else {
432- res += onErr ( )
433- // same as pushing it back: lead is cleared, pushed back can't contain more than 1 byte
434- if ( b < 128 ) res += String . fromCharCode ( b )
435- }
436- } else if ( b < 128 ) {
437- res += String . fromCharCode ( b )
438- } else if ( b < 0x81 || b === 0xff ) {
439- res += onErr ( )
440- } else {
441- lead = b
442- }
443- }
444-
445- if ( ! stream && lead ) {
446- // Destroy decoder state
447- lead = 0
448- res += onErr ( )
449- }
450-
451- return res
452- }
453- }
0 commit comments