Skip to content

Commit 357121c

Browse files
committed
test: add TextDecoder single-byte tables test
1 parent 181e6e2 commit 357121c

2 files changed

Lines changed: 82 additions & 0 deletions

File tree

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// Comment out this line to test on native impl, e.g. to cross-test in browsers
2+
import { TextDecoder } from '@exodus/bytes/encoding.js'
3+
4+
import { readFileSync } from 'node:fs'
5+
import { join } from 'node:path'
6+
import { test, describe } from 'node:test'
7+
import { legacySingleByte as encodings } from '../fixtures/encoding/encodings.cjs'
8+
9+
describe('single-byte encodings are supersets of ascii', () => {
10+
for (const encoding of encodings) {
11+
test(encoding, (t) => {
12+
const loose = new TextDecoder(encoding)
13+
const fatal = new TextDecoder(encoding, { fatal: true })
14+
for (let i = 0; i < 128; i++) {
15+
const str = String.fromCodePoint(i)
16+
t.assert.strictEqual(loose.decode(Uint8Array.of(i)), str, i)
17+
t.assert.strictEqual(fatal.decode(Uint8Array.of(i)), str, i)
18+
}
19+
})
20+
}
21+
})
22+
23+
describe('single-byte encodings index', () => {
24+
for (const encoding of encodings) {
25+
test(encoding, (t) => {
26+
const loose = new TextDecoder(encoding)
27+
const fatal = new TextDecoder(encoding, { fatal: true })
28+
const file = encoding === 'iso-8859-8-i' ? `index-iso-8859-8.txt` : `index-${encoding}.txt`
29+
const text = readFileSync(
30+
join(import.meta.dirname, '../fixtures/encoding/single-byte', file),
31+
'utf8'
32+
)
33+
const rows = text
34+
.split('\n')
35+
.map((x) => x.trim())
36+
.filter((x) => x && x[0] !== '#')
37+
.map((x) => x.split('\t'))
38+
.map(([istr, codeHex, description]) => {
39+
const i = Number(istr)
40+
t.assert.ok(i < 128)
41+
const code = parseInt(codeHex.slice(2), 16)
42+
t.assert.strictEqual(`${i}`, istr)
43+
t.assert.strictEqual('0x' + code.toString(16).padStart(4, '0').toUpperCase(), codeHex)
44+
t.assert.ok(code && code !== 0xff_fd && code <= 0xff_ff) // Can't be a replacement char, has to be <= 16-bit
45+
t.assert.ok(code < 0xd8_00 || code >= 0xe0_00) // not a surrogate
46+
return [i, { i, code, description }]
47+
})
48+
49+
t.assert.ok(rows.length <= 128)
50+
const known = new Map(rows)
51+
t.assert.strictEqual(rows.length, known.size) // all unique
52+
53+
for (let i = 0; i < 128; i++) {
54+
const row = known.get(i)
55+
const byte = i + 128
56+
if (row) {
57+
t.assert.strictEqual(i, row.i)
58+
const str = String.fromCodePoint(row.code)
59+
t.assert.strictEqual(fatal.decode(Uint8Array.of(byte)), str, row.description)
60+
t.assert.strictEqual(loose.decode(Uint8Array.of(byte)), str, row.description)
61+
} else {
62+
t.assert.throws(() => fatal.decode(Uint8Array.of(byte)), TypeError)
63+
t.assert.strictEqual(loose.decode(Uint8Array.of(byte)), '\uFFFD')
64+
}
65+
}
66+
})
67+
}
68+
})
69+
70+
// https://encoding.spec.whatwg.org/#x-user-defined-decoder
71+
test('x-user-defined', (t) => {
72+
const encoding = 'x-user-defined'
73+
const loose = new TextDecoder(encoding)
74+
const fatal = new TextDecoder(encoding, { fatal: true })
75+
for (let byte = 0; byte < 256; byte++) {
76+
const str = String.fromCodePoint(byte >= 0x80 ? 0xf7_80 + byte - 0x80 : byte)
77+
t.assert.strictEqual(fatal.decode(Uint8Array.of(byte)), str, byte)
78+
t.assert.strictEqual(loose.decode(Uint8Array.of(byte)), str, byte)
79+
}
80+
})

tests/single-byte.test.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import encodingsObject from '../fallback/single-byte.encodings.js'
77

88
const encodings = Object.keys(encodingsObject)
99

10+
// See also tests/encoding/single-byte.tables.test.js for similar TextDecoder tests
11+
1012
describe('single-byte encodings are supersets of ascii', () => {
1113
for (const encoding of encodings) {
1214
test(encoding, (t) => {

0 commit comments

Comments
 (0)