Skip to content

Commit 8f308ba

Browse files
committed
fix: do not set BOM seen if decode() errored
1 parent 7b2ca41 commit 8f308ba

File tree

2 files changed

+36
-2
lines changed

2 files changed

+36
-2
lines changed

fallback/encoding.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,11 @@ export class TextDecoder {
100100
}
101101
}
102102

103+
let seenBOM = false
103104
if (this.#canBOM) {
104105
const bom = this.#findBom(prefix ?? u)
105106
if (bom) {
106-
if (stream) this.#canBOM = false
107+
seenBOM = true
107108
if (prefix) {
108109
prefix = prefix.subarray(bom)
109110
} else {
@@ -117,7 +118,8 @@ export class TextDecoder {
117118
if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
118119
try {
119120
const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
120-
if (res.length > 0 && stream) this.#canBOM = false
121+
// "BOM seen" is set on the current decode call only if it did not error, in "serialize I/O queue" after decoding
122+
if (stream && (seenBOM || res.length > 0)) this.#canBOM = false
121123
return res
122124
} catch (err) {
123125
this.#chunk = null // reset unfinished chunk on errors

tests/encoding/mistakes.test.js

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,16 @@ describe('Common implementation mistakes', () => {
713713
t.assert.throws(() => fatal.decode(u(0xfd, 0xef), { stream: true }))
714714
t.assert.strictEqual(fatal.decode(), '')
715715
}
716+
717+
// Chrome fails
718+
{
719+
const d = new TextDecoder('utf-8', { fatal: true })
720+
t.assert.strictEqual(d.decode(u(0xea, 0xaf, 0x8d)), '\uABCD')
721+
t.assert.strictEqual(d.decode(u(0xea), { stream: true }).length, 0)
722+
t.assert.throws(() => d.decode(u(0xea, 0xaf, 0x8d), { stream: true }))
723+
t.assert.strictEqual(d.decode(u(0xea, 0xaf, 0x8d), { stream: true }), '\uABCD')
724+
t.assert.strictEqual(d.decode(u(), { stream: true }).length, 0)
725+
}
716726
})
717727

718728
test('utf-8 BOM handling', (t) => {
@@ -724,12 +734,34 @@ describe('Common implementation mistakes', () => {
724734
t.assert.strictEqual(d.decode(u(0xef, 0xbb, 0xbf)).length, 0)
725735
}
726736

737+
// Bun fails
727738
{
728739
const d = new TextDecoder('utf-8', { fatal: true })
729740
t.assert.strictEqual(d.decode(u(0xef, 0xbb, 0xbf), { stream: true }).length, 0) // BOM
730741
t.assert.throws(() => d.decode(u(0xff), { stream: true }))
731742
t.assert.strictEqual(d.decode(u(0xef, 0xbb, 0xbf)).length, 1)
732743
}
744+
745+
// WebKit, FireFox, Servo, Deno got this wrong. Chrome and Node.js pass
746+
{
747+
const d = new TextDecoder('utf-8', { fatal: true })
748+
t.assert.throws(() => d.decode(u(0x20, 0xff), { stream: true }))
749+
t.assert.strictEqual(d.decode(u(0xef, 0xbb, 0xbf)).length, 0)
750+
t.assert.throws(() => d.decode(u(0xff), { stream: true }))
751+
t.assert.strictEqual(d.decode(u(0xef, 0xbb, 0xbf)).length, 0)
752+
t.assert.throws(() => d.decode(u(0xef, 0xbb, 0xbf, 0xff), { stream: true }))
753+
t.assert.strictEqual(d.decode(u(0xef, 0xbb, 0xbf)).length, 0)
754+
}
755+
756+
// Chrome has this wrong
757+
{
758+
const d = new TextDecoder('utf-8', { fatal: true })
759+
t.assert.strictEqual(d.decode(u(0xef), { stream: true }).length, 0)
760+
t.assert.throws(() => d.decode(u(0xef, 0xbb, 0xbf), { stream: true }))
761+
t.assert.strictEqual(d.decode(u(0xef, 0xbb, 0xbf), { stream: true }).length, 0)
762+
t.assert.strictEqual(d.decode(u(0xef, 0xbb, 0xbf), { stream: true }).length, 1)
763+
t.assert.strictEqual(d.decode(u(), { stream: true }).length, 0)
764+
}
733765
})
734766

735767
// https://github.com/facebook/hermes/pull/1855#issuecomment-3632349129

0 commit comments

Comments
 (0)