Skip to content

Commit c4ec8ab

Browse files
committed
test: test WPT mislabels
1 parent 14dc01d commit c4ec8ab

2 files changed

Lines changed: 39 additions & 2 deletions

File tree

tests/encoding/mistakes.test.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,42 @@ describe('Common implementation mistakes', () => {
611611
})
612612
})
613613

614+
// These are mistabeled in WPT html dataset files, their recorded codepoints do not match actual ones
615+
// All browsers (and the script) agree on how these are decoded though, but let's explicitly recheck
616+
// Refs: https://github.com/web-platform-tests/wpt/issues/56748
617+
describe('WPT mislabels', () => {
618+
const vectors = {
619+
'euc-jp': [
620+
[[0x5c], '\x5C'], // Not U+A5
621+
[[0x7e], '\x7E'], // Not U+203E
622+
[[0xa1, 0xdd], '\uFF0D'], // Not U+2212
623+
],
624+
shift_jis: [
625+
[[0x5c], '\x5C'], // Not U+A5
626+
[[0x7e], '\x7E'], // Not U+203E
627+
[[0x81, 0x7c], '\uFF0D'], // Not U+2212
628+
],
629+
'iso-2022-jp': [
630+
[[0x1b, 0x28, 0x4a, 0x5c, 0x1b, 0x28, 0x42], '\xA5'], // Correctly labeled, U+A5
631+
[[0x1b, 0x28, 0x4a, 0x7e, 0x1b, 0x28, 0x42], '\u203E'], // Correctly labeled, U+203E
632+
[[0x1b, 0x24, 0x42, 0x21, 0x5d, 0x1b, 0x28, 0x42], '\uFF0D'], // Not U+2212
633+
],
634+
}
635+
636+
for (const [encoding, list] of Object.entries(vectors)) {
637+
describe(encoding, () => {
638+
for (const fatal of [false, true]) {
639+
test(fatal ? 'fatal' : 'loose', (t) => {
640+
for (const [bytes, string] of list) {
641+
const d = new TextDecoder(encoding, { fatal })
642+
t.assert.strictEqual(d.decode(Uint8Array.from(bytes)), string)
643+
}
644+
})
645+
}
646+
})
647+
}
648+
})
649+
614650
describe('invalid labels', () => {
615651
test('non-ascii', (t) => {
616652
const bad = ['\u212Aoi8-r', '\u212Aoi8-u', 'euc-\u212Ar']

tests/wpt/loader.cjs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,9 @@ function loadTextDecoderHtml(fullName) {
150150

151151
// Test decoder!
152152
if (
153-
['euc-jp', 'iso-2022-jp', 'shift_jis'].includes(decoder.encoding) &&
154-
[0xa5, 0x20_3e, 0x22_12].includes(cp)
153+
(['euc-jp', 'shift_jis'].includes(decoder.encoding) &&
154+
[0xa5, 0x20_3e, 0x22_12].includes(cp)) ||
155+
(decoder.encoding === 'iso-2022-jp' && cp === 0x22_12)
155156
) {
156157
// Those three encodings are assymetrical on these codepoints
157158
// See https://encoding.spec.whatwg.org/ for mentions of those exact code points

0 commit comments

Comments
 (0)