@@ -24,7 +24,7 @@ const mappers = {
2424 return b < 128 ? String . fromCharCode ( err ( ) , b ) : String . fromCharCode ( err ( ) )
2525 }
2626
27- const fast = ( arr , start , end , stream ) => {
27+ const decode = ( arr , start , end , stream ) => {
2828 let res = ''
2929 let i = start
3030
@@ -49,7 +49,7 @@ const mappers = {
4949 return res
5050 }
5151
52- return { fast , isAscii : ( ) => lead === 0 }
52+ return { decode , isAscii : ( ) => lead === 0 }
5353 } ,
5454 // https://encoding.spec.whatwg.org/#euc-jp-decoder
5555 'euc-jp' : ( err ) => {
@@ -81,7 +81,7 @@ const mappers = {
8181 return b < 128 ? String . fromCharCode ( err ( ) , b ) : String . fromCharCode ( err ( ) )
8282 }
8383
84- const fast = ( arr , start , end , stream ) => {
84+ const decode = ( arr , start , end , stream ) => {
8585 let res = ''
8686 let i = start
8787
@@ -109,26 +109,20 @@ const mappers = {
109109 return res
110110 }
111111
112- return { fast , isAscii : ( ) => lead === 0 } // j12 can be true only when lead is non-zero
112+ return { decode , isAscii : ( ) => lead === 0 } // j12 can be true only when lead is non-zero
113113 } ,
114114 // https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
115- // Per-letter of the spec, don't shortcut on state changes on EOF. Some code is regrouped but preserving the logic
116115 'iso-2022-jp' : ( err ) => {
117116 const jis0208 = getTable ( 'jis0208' )
118- const EOF = - 1
119117 let dState = 1
120118 let oState = 1
121- let lead = 0
119+ let lead = 0 // 0 or 0x21-0x7e
122120 let out = false
123121
124- const pushback = [ ]
125- const bytes = ( b ) => {
126- if ( dState < 5 ) {
127- if ( b === EOF ) return null
128- if ( b === 0x1b ) {
129- dState = 6 // escape start
130- return
131- }
122+ const bytes = ( pushback , b ) => {
123+ if ( dState < 5 && b === 0x1b ) {
124+ dState = 6 // escape start
125+ return
132126 }
133127
134128 switch ( dState ) {
@@ -180,7 +174,7 @@ const mappers = {
180174
181175 out = false
182176 dState = oState
183- if ( b !== EOF ) pushback . push ( b )
177+ pushback . push ( b )
184178 return err ( )
185179 case 7 : {
186180 // Escape
@@ -209,16 +203,72 @@ const mappers = {
209203
210204 out = false
211205 dState = oState
212- if ( b !== EOF ) pushback . push ( b )
213- pushback . push ( l )
206+ pushback . push ( b , l )
214207 return err ( )
215208 }
216209 }
217210 }
218211
219- const eof = ( ) => bytes ( EOF )
212+ const eof = ( pushback ) => {
213+ if ( dState < 5 ) return null
214+ switch ( dState ) {
215+ case 5 :
216+ out = false
217+ dState = 4
218+ return err ( )
219+ case 6 :
220+ out = false
221+ dState = oState
222+ return err ( )
223+ case 7 : {
224+ out = false
225+ dState = oState
226+ pushback . push ( lead ) // lead is always ASCII
227+ lead = 0
228+ return err ( )
229+ }
230+ }
231+ }
232+
233+ const decode = ( arr , start , end , stream ) => {
234+ let res = ''
235+ let i = start
236+ const pushback = [ ] // local and auto-cleared
237+
238+ // First, dump everything until EOF
239+ // Same as the full loop, but without EOF handling
240+ while ( i < end || pushback . length > 0 ) {
241+ const c = bytes ( pushback , pushback . length > 0 ? pushback . pop ( ) : arr [ i ++ ] )
242+ if ( c !== undefined ) res += String . fromCodePoint ( c )
243+ }
244+
245+ // Then, dump EOF. This needs the same loop as the characters can be pushed back
246+ if ( ! stream ) {
247+ while ( i <= end || pushback . length > 0 ) {
248+ if ( i < end || pushback . length > 0 ) {
249+ const c = bytes ( pushback , pushback . length > 0 ? pushback . pop ( ) : arr [ i ++ ] )
250+ if ( c !== undefined ) res += String . fromCodePoint ( c )
251+ } else {
252+ const c = eof ( pushback )
253+ if ( c === null ) break // clean exit
254+ res += String . fromCodePoint ( c )
255+ }
256+ }
257+ }
220258
221- return { bytes, eof, pushback }
259+ // Chrome and WebKit fail on this, we don't: completely destroy the old decoder state when finished streaming
260+ // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
261+ // > Set this’s do not flush to options["stream"]
262+ if ( ! stream ) {
263+ dState = oState = 1
264+ lead = 0
265+ out = false
266+ }
267+
268+ return res
269+ }
270+
271+ return { decode, isAscii : ( ) => false }
222272 } ,
223273 // https://encoding.spec.whatwg.org/#shift_jis-decoder
224274 shift_jis : ( err ) => {
@@ -238,7 +288,7 @@ const mappers = {
238288 return b < 128 ? String . fromCharCode ( err ( ) , b ) : String . fromCharCode ( err ( ) )
239289 }
240290
241- const fast = ( arr , start , end , stream ) => {
291+ const decode = ( arr , start , end , stream ) => {
242292 let res = ''
243293 let i = start
244294
@@ -265,7 +315,7 @@ const mappers = {
265315 return res
266316 }
267317
268- return { fast , isAscii : ( ) => lead === 0 }
318+ return { decode , isAscii : ( ) => lead === 0 }
269319 } ,
270320 // https://encoding.spec.whatwg.org/#gbk-decoder
271321 gbk : ( err ) => mappers . gb18030 ( err ) , // 10.1.1. GBK’s decoder is gb18030’s decoder
@@ -291,7 +341,7 @@ const mappers = {
291341 // g2 is 0 or 0x30-0x39
292342 // g3 is 0 or 0x81-0xfe
293343
294- const fast = ( arr , start , end , stream ) => {
344+ const decode = ( arr , start , end , stream ) => {
295345 let res = ''
296346 let i = start
297347 const pushback = [ ] // local and auto-cleared
@@ -359,7 +409,7 @@ const mappers = {
359409 return res
360410 }
361411
362- return { fast , isAscii : ( ) => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
412+ return { decode , isAscii : ( ) => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
363413 } ,
364414 // https://encoding.spec.whatwg.org/#big5
365415 big5 : ( err ) => {
@@ -380,7 +430,7 @@ const mappers = {
380430 // The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
381431 // We store that as strings
382432 // eslint-disable-next-line sonarjs/no-identical-functions
383- const fast = ( arr , start , end , stream ) => {
433+ const decode = ( arr , start , end , stream ) => {
384434 let res = ''
385435 let i = start
386436
@@ -405,7 +455,7 @@ const mappers = {
405455 return res
406456 }
407457
408- return { fast , isAscii : ( ) => lead === 0 }
458+ return { decode , isAscii : ( ) => lead === 0 }
409459 } ,
410460}
411461
@@ -421,7 +471,6 @@ export function multibyteDecoder(enc, loose = false) {
421471 const onErr = loose
422472 ? ( ) => REP
423473 : ( ) => {
424- if ( mapper . pushback ) mapper . pushback . length = 0 // the queue is cleared on returning an error
425474 // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
426475 // Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
427476 // iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
@@ -431,43 +480,13 @@ export function multibyteDecoder(enc, loose = false) {
431480
432481 return ( arr , stream = false ) => {
433482 let res = ''
434- const length = arr . length
435483 if ( asciiSuperset && ( ! mapper || mapper . isAscii ?. ( ) ) ) {
436484 res = decodeLatin1 ( arr , 0 , asciiPrefix ( arr ) )
437485 if ( res . length === arr . length ) return res // ascii
438486 }
439487
440488 streaming = stream // affects onErr
441489 if ( ! mapper ) mapper = mappers [ enc ] ( onErr )
442- if ( mapper . fast ) return res + mapper . fast ( arr , res . length , arr . length , stream ) // does not need mapper deletion
443- const { bytes, eof, pushback } = mapper
444- let i = res . length
445-
446- // First, dump everything until EOF
447- // Same as the full loop, but without EOF handling
448- while ( i < length || pushback . length > 0 ) {
449- const c = bytes ( pushback . length > 0 ? pushback . pop ( ) : arr [ i ++ ] )
450- if ( c === undefined ) continue // consuming
451- res += String . fromCodePoint ( c ) // gb18030 returns codepoints above 0xFFFF from ranges
452- }
453-
454- // Then, dump EOF. This needs the same loop as the characters can be pushed back
455- // TODO: only some encodings need this, most can be optimized
456- if ( ! stream ) {
457- while ( i <= length || pushback . length > 0 ) {
458- const isEOF = i === length && pushback . length === 0
459- const c = isEOF ? eof ( ) : bytes ( pushback . length > 0 ? pushback . pop ( ) : arr [ i ++ ] )
460- if ( isEOF && c === null ) break // clean exit
461- if ( c === undefined ) continue // consuming
462- res += String . fromCodePoint ( c ) // gb18030 returns codepoints above 0xFFFF from ranges
463- }
464- }
465-
466- // Chrome and WebKit fail on this, we don't: completely destroy the old decoder instance when finished streaming
467- // > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
468- // > Set this’s do not flush to options["stream"]
469- if ( ! stream ) mapper = null
470-
471- return res
490+ return res + mapper . decode ( arr , res . length , arr . length , stream )
472491 }
473492}
0 commit comments