@@ -13,7 +13,7 @@ export const E_STRICT = 'Input is not well-formed for this encoding'
1313const REP = 0xff_fd
1414const mappers = {
1515 // https://encoding.spec.whatwg.org/#euc-kr-decoder
16- 'euc-kr' : ( ) => {
16+ 'euc-kr' : ( err ) => {
1717 const euc = getTable ( 'euc-kr' )
1818 let lead = 0
1919
@@ -24,25 +24,24 @@ const mappers = {
2424 lead = 0
2525 if ( cp !== undefined && cp !== REP ) return cp
2626 if ( b < 128 ) pushback . push ( b )
27- return - 2
27+ return err ( )
2828 }
2929
3030 if ( b < 128 ) return b
31- if ( b < 0x81 || b === 0xff ) return - 2
31+ if ( b < 0x81 || b === 0xff ) return err ( )
3232 lead = b
33- return - 1
3433 }
3534
3635 const eof = ( ) => {
3736 if ( ! lead ) return null
3837 lead = 0
39- return - 2
38+ return err ( )
4039 }
4140
4241 return { bytes, eof, pushback }
4342 } ,
4443 // https://encoding.spec.whatwg.org/#euc-jp-decoder
45- 'euc-jp' : ( ) => {
44+ 'euc-jp' : ( err ) => {
4645 const jis0208 = getTable ( 'jis0208' )
4746 const jis0212 = getTable ( 'jis0212' )
4847 let j12 = false
@@ -58,7 +57,7 @@ const mappers = {
5857 if ( lead === 0x8f && b >= 0xa1 && b <= 0xfe ) {
5958 j12 = true
6059 lead = b
61- return - 1
60+ return
6261 }
6362
6463 if ( lead ) {
@@ -71,27 +70,26 @@ const mappers = {
7170 j12 = false
7271 if ( cp !== undefined && cp !== REP ) return cp
7372 if ( b < 128 ) pushback . push ( b )
74- return - 2
73+ return err ( )
7574 }
7675
7776 if ( b < 128 ) return b
78- if ( ( b < 0xa1 && b !== 0x8e && b !== 0x8f ) || b === 0xff ) return - 2
77+ if ( ( b < 0xa1 && b !== 0x8e && b !== 0x8f ) || b === 0xff ) return err ( )
7978 lead = b
80- return - 1
8179 }
8280
8381 // eslint-disable-next-line sonarjs/no-identical-functions
8482 const eof = ( ) => {
8583 if ( ! lead ) return null
8684 lead = 0
87- return - 2
85+ return err ( )
8886 }
8987
9088 return { bytes, eof, pushback }
9189 } ,
9290 // https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
9391 // Per-letter of the spec, don't shortcut on state changes on EOF. Some code is regrouped but preserving the logic
94- 'iso-2022-jp' : ( ) => {
92+ 'iso-2022-jp' : ( err ) => {
9593 const jis0208 = getTable ( 'jis0208' )
9694 const EOF = - 1
9795 let dState = 1
@@ -105,7 +103,7 @@ const mappers = {
105103 if ( b === EOF ) return null
106104 if ( b === 0x1b ) {
107105 dState = 6 // escape start
108- return - 1
106+ return
109107 }
110108 }
111109
@@ -120,49 +118,46 @@ const mappers = {
120118 }
121119
122120 if ( b <= 0x7f && b !== 0x0e && b !== 0x0f ) return b
123- return - 2
121+ return err ( )
124122 case 3 :
125123 // Katakana
126124 out = false
127125 if ( b >= 0x21 && b <= 0x5f ) return 0xff_40 + b
128- return - 2
126+ return err ( )
129127 case 4 :
130128 // Leading byte
131129 out = false
132- if ( ( b >= 0x21 ) & ( b <= 0x7e ) ) {
133- lead = b
134- dState = 5
135- return - 1
136- }
137-
138- return - 2
130+ if ( b < 0x21 || b > 0x7e ) return err ( )
131+ lead = b
132+ dState = 5
133+ return
139134 case 5 :
140135 // Trailing byte
141136 out = false
142137 if ( b === 0x1b ) {
143138 dState = 6 // escape start
144- return - 2
139+ return err ( )
145140 }
146141
147142 dState = 4
148143 if ( b >= 0x21 && b <= 0x7e ) {
149144 const cp = jis0208 [ ( lead - 0x21 ) * 94 + b - 0x21 ]
150- return cp !== undefined && cp !== REP ? cp : - 2
145+ if ( cp !== undefined && cp !== REP ) return cp
151146 }
152147
153- return - 2
148+ return err ( )
154149 case 6 :
155150 // Escape start
156151 if ( b === 0x24 || b === 0x28 ) {
157152 lead = b
158153 dState = 7
159- return - 1
154+ return
160155 }
161156
162157 out = false
163158 dState = oState
164159 if ( b !== EOF ) pushback . push ( b )
165- return - 2
160+ return err ( )
166161 case 7 : {
167162 // Escape
168163 const l = lead
@@ -185,14 +180,14 @@ const mappers = {
185180 dState = oState = s
186181 const output = out
187182 out = true
188- return output ? - 2 : - 1
183+ return output ? err ( ) : undefined
189184 }
190185
191186 out = false
192187 dState = oState
193188 if ( b !== EOF ) pushback . push ( b )
194189 pushback . push ( l )
195- return - 2
190+ return err ( )
196191 }
197192 }
198193 }
@@ -202,7 +197,7 @@ const mappers = {
202197 return { bytes, eof, pushback }
203198 } ,
204199 // https://encoding.spec.whatwg.org/#shift_jis-decoder
205- shift_jis : ( ) => {
200+ shift_jis : ( err ) => {
206201 const jis0208 = getTable ( 'jis0208' )
207202 let lead = 0
208203
@@ -219,29 +214,28 @@ const mappers = {
219214 }
220215
221216 if ( b < 128 ) pushback . push ( b )
222- return - 2
217+ return err ( )
223218 }
224219
225220 if ( b <= 0x80 ) return b // 0x80 is allowed
226221 if ( b >= 0xa1 && b <= 0xdf ) return 0xff_61 - 0xa1 + b
227- if ( b < 0x81 || ( b > 0x9f && b < 0xe0 ) || b > 0xfc ) return - 2
222+ if ( b < 0x81 || ( b > 0x9f && b < 0xe0 ) || b > 0xfc ) return err ( )
228223 lead = b
229- return - 1
230224 }
231225
232226 // eslint-disable-next-line sonarjs/no-identical-functions
233227 const eof = ( ) => {
234228 if ( ! lead ) return null
235229 lead = 0 // this clears state completely on EOF
236- return - 2
230+ return err ( )
237231 }
238232
239233 return { bytes, eof, pushback }
240234 } ,
241235 // https://encoding.spec.whatwg.org/#gbk-decoder
242- gbk : ( ) => mappers . gb18030 ( ) , // 10.1.1. GBK’s decoder is gb18030’s decoder
236+ gbk : ( err ) => mappers . gb18030 ( err ) , // 10.1.1. GBK’s decoder is gb18030’s decoder
243237 // https://encoding.spec.whatwg.org/#gb18030-decoder
244- gb18030 : ( ) => {
238+ gb18030 : ( err ) => {
245239 const gb18030 = getTable ( 'gb18030' )
246240 const gb18030r = getTable ( 'gb18030-ranges' )
247241 let g1 = 0 , g2 = 0 , g3 = 0 // prettier-ignore
@@ -264,30 +258,30 @@ const mappers = {
264258 if ( b < 0x30 || b > 0x39 ) {
265259 pushback . push ( b , g3 , g2 )
266260 g1 = g2 = g3 = 0
267- return - 2
261+ return err ( )
268262 }
269263
270264 const cp = index ( ( g1 - 0x81 ) * 12_600 + ( g2 - 0x30 ) * 1260 + ( g3 - 0x81 ) * 10 + b - 0x30 )
271265 g1 = g2 = g3 = 0
272266 if ( cp !== undefined ) return cp // Can validly return replacement
273- return - 2
267+ return err ( )
274268 }
275269
276270 if ( g2 ) {
277271 if ( b >= 0x81 && b <= 0xfe ) {
278272 g3 = b
279- return - 1
273+ return
280274 }
281275
282276 pushback . push ( b , g2 )
283277 g1 = g2 = 0
284- return - 2
278+ return err ( )
285279 }
286280
287281 if ( g1 ) {
288282 if ( b >= 0x30 && b <= 0x39 ) {
289283 g2 = b
290- return - 1
284+ return
291285 }
292286
293287 let cp
@@ -298,20 +292,19 @@ const mappers = {
298292 g1 = 0
299293 if ( cp !== undefined && cp !== REP ) return cp
300294 if ( b < 128 ) pushback . push ( b )
301- return - 2
295+ return err ( )
302296 }
303297
304298 if ( b < 128 ) return b
305299 if ( b === 0x80 ) return 0x20_ac
306- if ( b === 0xff ) return - 2
300+ if ( b === 0xff ) return err ( )
307301 g1 = b
308- return - 1
309302 }
310303
311304 const eof = ( ) => {
312305 if ( ! g1 && ! g2 && ! g3 ) return null
313306 g1 = g2 = g3 = 0
314- return - 2
307+ return err ( )
315308 }
316309
317310 return { bytes, eof, pushback }
@@ -329,7 +322,7 @@ export function multibyteDecoder(enc, loose = false) {
329322 const asciiSuperset = isAsciiSuperset ( enc )
330323 return ( arr , stream = false ) => {
331324 const onErr = loose
332- ? ( ) => '\uFFFD'
325+ ? ( ) => REP
333326 : ( ) => {
334327 mapper . pushback . length = 0 // the queue is cleared on returning an error
335328 // The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
@@ -346,19 +339,16 @@ export function multibyteDecoder(enc, loose = false) {
346339 if ( res . length === arr . length ) return res // ascii
347340 }
348341
349- if ( ! mapper ) mapper = mappers [ enc ] ( )
342+ if ( ! mapper ) mapper = mappers [ enc ] ( onErr )
350343 const { bytes, eof, pushback } = mapper
351344 let i = res . length
352345
353346 // First, dump everything until EOF
354347 // Same as the full loop, but without EOF handling
355348 while ( i < length || pushback . length > 0 ) {
356349 const c = bytes ( pushback . length > 0 ? pushback . pop ( ) : arr [ i ++ ] )
357- if ( c >= 0 ) {
358- res += String . fromCodePoint ( c ) // gb18030 returns codepoints above 0xFFFF from ranges
359- } else if ( c === - 2 ) {
360- res += onErr ( )
361- }
350+ if ( c === undefined ) continue // consuming
351+ res += String . fromCodePoint ( c ) // gb18030 returns codepoints above 0xFFFF from ranges
362352 }
363353
364354 // Then, dump EOF. This needs the same loop as the characters can be pushed back
@@ -368,12 +358,8 @@ export function multibyteDecoder(enc, loose = false) {
368358 const isEOF = i === length && pushback . length === 0
369359 const c = isEOF ? eof ( ) : bytes ( pushback . length > 0 ? pushback . pop ( ) : arr [ i ++ ] )
370360 if ( isEOF && c === null ) break // clean exit
371- if ( c === - 1 ) continue // consuming
372- if ( c === - 2 ) {
373- res += onErr ( )
374- } else {
375- res += String . fromCodePoint ( c ) // gb18030 returns codepoints above 0xFFFF from ranges
376- }
361+ if ( c === undefined ) continue // consuming
362+ res += String . fromCodePoint ( c ) // gb18030 returns codepoints above 0xFFFF from ranges
377363 }
378364 }
379365
0 commit comments