Skip to content

Commit 21fca95

Browse files
committed
fix: gbk fast decoder pushback
1 parent 2c646bd commit 21fca95

File tree

2 files changed

+32
-13
lines changed

2 files changed

+32
-13
lines changed

fallback/multi-byte.js

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -291,11 +291,10 @@ const mappers = {
291291
// g2 is 0 or 0x30-0x39
292292
// g3 is 0 or 0x81-0xfe
293293

294-
const pushback = []
295-
296294
const fast = (arr, start, end, stream) => {
297295
let res = ''
298296
let i = start
297+
const pushback = [] // local and auto-cleared
299298

300299
// First, dump everything until EOF
301300
// Same as the full loop, but without EOF handling
@@ -418,25 +417,27 @@ export function multibyteDecoder(enc, loose = false) {
418417
// Input is assumed to be typechecked already
419418
let mapper
420419
const asciiSuperset = isAsciiSuperset(enc)
421-
return (arr, stream = false) => {
422-
const onErr = loose
423-
? () => REP
424-
: () => {
425-
if (mapper.pushback) mapper.pushback.length = 0 // the queue is cleared on returning an error
426-
// The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
427-
// Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
428-
// iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
429-
if (!stream) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
430-
throw new TypeError(E_STRICT)
431-
}
420+
let streaming // because onErr is cached in mapper
421+
const onErr = loose
422+
? () => REP
423+
: () => {
424+
if (mapper.pushback) mapper.pushback.length = 0 // the queue is cleared on returning an error
425+
// The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
426+
// Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
427+
// iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
428+
if (!streaming) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
429+
throw new TypeError(E_STRICT)
430+
}
432431

432+
return (arr, stream = false) => {
433433
let res = ''
434434
const length = arr.length
435435
if (asciiSuperset && (!mapper || mapper.isAscii?.())) {
436436
res = decodeLatin1(arr, 0, asciiPrefix(arr))
437437
if (res.length === arr.length) return res // ascii
438438
}
439439

440+
streaming = stream // affects onErr
440441
if (!mapper) mapper = mappers[enc](onErr)
441442
if (mapper.fast) return res + mapper.fast(arr, res.length, arr.length, stream) // does not need mapper deletion
442443
const { bytes, eof, pushback } = mapper

tests/encoding/mistakes.test.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,24 @@ describe('Common implementation mistakes', () => {
730730
t.assert.strictEqual(d.decode(Uint8Array.of(0x42)), '\uFF82')
731731
}
732732
})
733+
734+
for (const encoding of ['gb18030', 'gbk']) {
735+
test(encoding, (t) => {
736+
{
737+
const d = new TextDecoder(encoding, { fatal: true })
738+
t.assert.strictEqual(d.decode(Uint8Array.of(0x80), { stream: true }), '\u20AC')
739+
t.assert.throws(() => d.decode(u(0x81, 0x30, 0x21, 0x21, 0x21), { stream: true }))
740+
t.assert.strictEqual(d.decode(Uint8Array.of(0x80)), '\u20AC') // pushback is cleared
741+
}
742+
743+
{
744+
const d = new TextDecoder(encoding, { fatal: true })
745+
t.assert.strictEqual(d.decode(Uint8Array.of(0x80), { stream: true }), '\u20AC')
746+
t.assert.throws(() => d.decode(u(0x81, 0x30, 0x81, 0x42, 0x42), { stream: true }))
747+
t.assert.strictEqual(d.decode(Uint8Array.of(0x80)), '\u20AC') // pushback is cleared
748+
}
749+
})
750+
}
733751
})
734752

735753
// These are mislabeled in WPT html dataset files, their recorded codepoints do not match actual ones

0 commit comments

Comments
 (0)