@@ -24,7 +24,7 @@ const mappers = {
2424 return b < 128 ? String . fromCharCode ( err ( ) , b ) : String . fromCharCode ( err ( ) )
2525 }
2626
27- const fast = ( arr , start , end , stream ) => {
27+ const decode = ( arr , start , end , stream ) => {
2828 let res = ''
2929 let i = start
3030
@@ -49,7 +49,7 @@ const mappers = {
4949 return res
5050 }
5151
52- return { fast , isAscii : ( ) => lead === 0 }
52+ return { decode , isAscii : ( ) => lead === 0 }
5353 } ,
5454 // https://encoding.spec.whatwg.org/#euc-jp-decoder
5555 'euc-jp' : ( err ) => {
@@ -81,7 +81,7 @@ const mappers = {
8181 return b < 128 ? String . fromCharCode ( err ( ) , b ) : String . fromCharCode ( err ( ) )
8282 }
8383
84- const fast = ( arr , start , end , stream ) => {
84+ const decode = ( arr , start , end , stream ) => {
8585 let res = ''
8686 let i = start
8787
@@ -109,26 +109,20 @@ const mappers = {
109109 return res
110110 }
111111
112- return { fast , isAscii : ( ) => lead === 0 } // j12 can be true only when lead is non-zero
112+ return { decode , isAscii : ( ) => lead === 0 } // j12 can be true only when lead is non-zero
113113 } ,
114114 // https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
115- // Per-letter of the spec, don't shortcut on state changes on EOF. Some code is regrouped but preserving the logic
116115 'iso-2022-jp' : ( err ) => {
117116 const jis0208 = getTable ( 'jis0208' )
118- const EOF = - 1
119117 let dState = 1
120118 let oState = 1
121- let lead = 0
119+ let lead = 0 // 0 or 0x21-0x7e
122120 let out = false
123121
124- const pushback = [ ]
125- const bytes = ( b ) => {
126- if ( dState < 5 ) {
127- if ( b === EOF ) return null
128- if ( b === 0x1b ) {
129- dState = 6 // escape start
130- return
131- }
122+ const bytes = ( pushback , b ) => {
123+ if ( dState < 5 && b === 0x1b ) {
124+ dState = 6 // escape start
125+ return
132126 }
133127
134128 switch ( dState ) {
@@ -180,7 +174,7 @@ const mappers = {
180174
181175 out = false
182176 dState = oState
183- if ( b !== EOF ) pushback . push ( b )
177+ pushback . push ( b )
184178 return err ( )
185179 case 7 : {
186180 // Escape
@@ -209,16 +203,68 @@ const mappers = {
209203
210204 out = false
211205 dState = oState
212- if ( b !== EOF ) pushback . push ( b )
213- pushback . push ( l )
206+ pushback . push ( b , l )
207+ return err ( )
208+ }
209+ }
210+ }
211+
212+ const eof = ( pushback ) => {
213+ if ( dState < 5 ) return null
214+ switch ( dState ) {
215+ case 5 :
216+ out = false
217+ dState = 4
218+ return err ( )
219+ case 6 :
220+ out = false
221+ dState = oState
222+ return err ( )
223+ case 7 : {
224+ out = false
225+ dState = oState
226+ pushback . push ( lead )
227+ lead = 0
214228 return err ( )
215229 }
216230 }
217231 }
218232
219- const eof = ( ) => bytes ( EOF )
233+ const decode = ( arr , start , end , stream ) => {
234+ let res = ''
235+ let i = start
236+ const pback = [ ] // local and auto-cleared
237+
238+ // First, dump everything until EOF
239+ // Same as the full loop, but without EOF handling
240+ while ( i < end || pback . length > 0 ) {
241+ const c = bytes ( pback , pback . length > 0 ? pback . pop ( ) : arr [ i ++ ] )
242+ if ( c !== undefined ) res += String . fromCodePoint ( c )
243+ }
220244
221- return { bytes, eof, pushback }
245+ // Then, dump EOF. This needs the same loop as the characters can be pushed back
246+ if ( ! stream ) {
247+ while ( i <= end || pback . length > 0 ) {
248+ const isEOF = i === end && pback . length === 0
249+ const c = isEOF ? eof ( pback ) : bytes ( pback , pback . length > 0 ? pback . pop ( ) : arr [ i ++ ] )
250+ if ( isEOF && c === null ) break // clean exit
251+ if ( c !== undefined ) res += String . fromCodePoint ( c )
252+ }
253+ }
254+
255+ // Chrome and WebKit fail on this, we don't: completely destroy the old decoder state when finished streaming
256+ // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
257+ // > Set this’s do not flush to options["stream"]
258+ if ( ! stream ) {
259+ dState = oState = 1
260+ lead = 0
261+ out = false
262+ }
263+
264+ return res
265+ }
266+
267+ return { decode, isAscii : ( ) => false }
222268 } ,
223269 // https://encoding.spec.whatwg.org/#shift_jis-decoder
224270 shift_jis : ( err ) => {
@@ -238,7 +284,7 @@ const mappers = {
238284 return b < 128 ? String . fromCharCode ( err ( ) , b ) : String . fromCharCode ( err ( ) )
239285 }
240286
241- const fast = ( arr , start , end , stream ) => {
287+ const decode = ( arr , start , end , stream ) => {
242288 let res = ''
243289 let i = start
244290
@@ -265,7 +311,7 @@ const mappers = {
265311 return res
266312 }
267313
268- return { fast , isAscii : ( ) => lead === 0 }
314+ return { decode , isAscii : ( ) => lead === 0 }
269315 } ,
270316 // https://encoding.spec.whatwg.org/#gbk-decoder
271317 gbk : ( err ) => mappers . gb18030 ( err ) , // 10.1.1. GBK’s decoder is gb18030’s decoder
@@ -291,7 +337,7 @@ const mappers = {
291337 // g2 is 0 or 0x30-0x39
292338 // g3 is 0 or 0x81-0xfe
293339
294- const fast = ( arr , start , end , stream ) => {
340+ const decode = ( arr , start , end , stream ) => {
295341 let res = ''
296342 let i = start
297343 const pushback = [ ] // local and auto-cleared
@@ -359,7 +405,7 @@ const mappers = {
359405 return res
360406 }
361407
362- return { fast , isAscii : ( ) => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
408+ return { decode , isAscii : ( ) => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
363409 } ,
364410 // https://encoding.spec.whatwg.org/#big5
365411 big5 : ( err ) => {
@@ -380,7 +426,7 @@ const mappers = {
380426 // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
381427 // We store that as strings
382428 // eslint-disable-next-line sonarjs/no-identical-functions
383- const fast = ( arr , start , end , stream ) => {
429+ const decode = ( arr , start , end , stream ) => {
384430 let res = ''
385431 let i = start
386432
@@ -405,7 +451,7 @@ const mappers = {
405451 return res
406452 }
407453
408- return { fast , isAscii : ( ) => lead === 0 }
454+ return { decode , isAscii : ( ) => lead === 0 }
409455 } ,
410456}
411457
@@ -421,7 +467,6 @@ export function multibyteDecoder(enc, loose = false) {
421467 const onErr = loose
422468 ? ( ) => REP
423469 : ( ) => {
424- if ( mapper . pushback ) mapper . pushback . length = 0 // the queue is cleared on returning an error
425470 // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
426471 // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
427472 // iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
@@ -431,43 +476,13 @@ export function multibyteDecoder(enc, loose = false) {
431476
432477 return ( arr , stream = false ) => {
433478 let res = ''
434- const length = arr . length
435479 if ( asciiSuperset && ( ! mapper || mapper . isAscii ?. ( ) ) ) {
436480 res = decodeLatin1 ( arr , 0 , asciiPrefix ( arr ) )
437481 if ( res . length === arr . length ) return res // ascii
438482 }
439483
440484 streaming = stream // affects onErr
441485 if ( ! mapper ) mapper = mappers [ enc ] ( onErr )
442- if ( mapper . fast ) return res + mapper . fast ( arr , res . length , arr . length , stream ) // does not need mapper deletion
443- const { bytes, eof, pushback } = mapper
444- let i = res . length
445-
446- // First, dump everything until EOF
447- // Same as the full loop, but without EOF handling
448- while ( i < length || pushback . length > 0 ) {
449- const c = bytes ( pushback . length > 0 ? pushback . pop ( ) : arr [ i ++ ] )
450- if ( c === undefined ) continue // consuming
451- res += String . fromCodePoint ( c ) // gb18030 returns codepoints above 0xFFFF from ranges
452- }
453-
454- // Then, dump EOF. This needs the same loop as the characters can be pushed back
455- // TODO: only some encodings need this, most can be optimized
456- if ( ! stream ) {
457- while ( i <= length || pushback . length > 0 ) {
458- const isEOF = i === length && pushback . length === 0
459- const c = isEOF ? eof ( ) : bytes ( pushback . length > 0 ? pushback . pop ( ) : arr [ i ++ ] )
460- if ( isEOF && c === null ) break // clean exit
461- if ( c === undefined ) continue // consuming
462- res += String . fromCodePoint ( c ) // gb18030 returns codepoints above 0xFFFF from ranges
463- }
464- }
465-
466- // Chrome and WebKit fail on this, we don't: completely destroy the old decoder instance when finished streaming
467- // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
468- // > Set this’s do not flush to options["stream"]
469- if ( ! stream ) mapper = null
470-
471- return res
486+ return res + mapper . decode ( arr , res . length , arr . length , stream )
472487 }
473488}
0 commit comments