|
| 1 | +// Comment out this line to test on native impl, e.g. to cross-test in browsers |
| 2 | +import { TextDecoder } from '@exodus/bytes/encoding.js' |
| 3 | + |
| 4 | +import { readFileSync } from 'node:fs' |
| 5 | +import { join } from 'node:path' |
| 6 | +import { test, describe } from 'node:test' |
| 7 | +import { legacySingleByte as encodings } from '../fixtures/encoding/encodings.cjs' |
| 8 | + |
| 9 | +describe('single-byte encodings are supersets of ascii', () => { |
| 10 | + for (const encoding of encodings) { |
| 11 | + test(encoding, (t) => { |
| 12 | + const loose = new TextDecoder(encoding) |
| 13 | + const fatal = new TextDecoder(encoding, { fatal: true }) |
| 14 | + for (let i = 0; i < 128; i++) { |
| 15 | + const str = String.fromCodePoint(i) |
| 16 | + t.assert.strictEqual(loose.decode(Uint8Array.of(i)), str, i) |
| 17 | + t.assert.strictEqual(fatal.decode(Uint8Array.of(i)), str, i) |
| 18 | + } |
| 19 | + }) |
| 20 | + } |
| 21 | +}) |
| 22 | + |
| 23 | +describe('single-byte encodings index', () => { |
| 24 | + for (const encoding of encodings) { |
| 25 | + test(encoding, (t) => { |
| 26 | + const loose = new TextDecoder(encoding) |
| 27 | + const fatal = new TextDecoder(encoding, { fatal: true }) |
| 28 | + const file = encoding === 'iso-8859-8-i' ? `index-iso-8859-8.txt` : `index-${encoding}.txt` |
| 29 | + const text = readFileSync( |
| 30 | + join(import.meta.dirname, '../fixtures/encoding/single-byte', file), |
| 31 | + 'utf8' |
| 32 | + ) |
| 33 | + const rows = text |
| 34 | + .split('\n') |
| 35 | + .map((x) => x.trim()) |
| 36 | + .filter((x) => x && x[0] !== '#') |
| 37 | + .map((x) => x.split('\t')) |
| 38 | + .map(([istr, codeHex, description]) => { |
| 39 | + const i = Number(istr) |
| 40 | + t.assert.ok(i < 128) |
| 41 | + const code = parseInt(codeHex.slice(2), 16) |
| 42 | + t.assert.strictEqual(`${i}`, istr) |
| 43 | + t.assert.strictEqual('0x' + code.toString(16).padStart(4, '0').toUpperCase(), codeHex) |
| 44 | + t.assert.ok(code && code !== 0xff_fd && code <= 0xff_ff) // Can't be a replacement char, has to be <= 16-bit |
| 45 | + t.assert.ok(code < 0xd8_00 || code >= 0xe0_00) // not a surrogate |
| 46 | + return [i, { i, code, description }] |
| 47 | + }) |
| 48 | + |
| 49 | + t.assert.ok(rows.length <= 128) |
| 50 | + const known = new Map(rows) |
| 51 | + t.assert.strictEqual(rows.length, known.size) // all unique |
| 52 | + |
| 53 | + for (let i = 0; i < 128; i++) { |
| 54 | + const row = known.get(i) |
| 55 | + const byte = i + 128 |
| 56 | + if (row) { |
| 57 | + t.assert.strictEqual(i, row.i) |
| 58 | + const str = String.fromCodePoint(row.code) |
| 59 | + t.assert.strictEqual(fatal.decode(Uint8Array.of(byte)), str, row.description) |
| 60 | + t.assert.strictEqual(loose.decode(Uint8Array.of(byte)), str, row.description) |
| 61 | + } else { |
| 62 | + t.assert.throws(() => fatal.decode(Uint8Array.of(byte)), TypeError) |
| 63 | + t.assert.strictEqual(loose.decode(Uint8Array.of(byte)), '\uFFFD') |
| 64 | + } |
| 65 | + } |
| 66 | + }) |
| 67 | + } |
| 68 | +}) |
| 69 | + |
| 70 | +// https://encoding.spec.whatwg.org/#x-user-defined-decoder |
| 71 | +test('x-user-defined', (t) => { |
| 72 | + const encoding = 'x-user-defined' |
| 73 | + const loose = new TextDecoder(encoding) |
| 74 | + const fatal = new TextDecoder(encoding, { fatal: true }) |
| 75 | + for (let byte = 0; byte < 256; byte++) { |
| 76 | + const str = String.fromCodePoint(byte >= 0x80 ? 0xf7_80 + byte - 0x80 : byte) |
| 77 | + t.assert.strictEqual(fatal.decode(Uint8Array.of(byte)), str, byte) |
| 78 | + t.assert.strictEqual(loose.decode(Uint8Array.of(byte)), str, byte) |
| 79 | + } |
| 80 | +}) |
0 commit comments