|
| 1 | +import { |
| 2 | + TextDecoder, |
| 3 | + TextEncoder, |
| 4 | + getBOMEncoding, |
| 5 | + legacyHookDecode, |
| 6 | +} from '@exodus/bytes/encoding-browser.js' |
| 7 | +import { fromHex } from '@exodus/bytes/hex.js' |
| 8 | +import { test, describe } from 'node:test' |
| 9 | +import { labels } from './fixtures/encodings.cjs' |
| 10 | +import unfinishedBytesFixtures from './fixtures/unfinishedBytes.js' |
| 11 | + |
| 12 | +test('Unfinished bytes', (t) => { |
| 13 | + for (const [encoding, trail, u8] of unfinishedBytesFixtures) { |
| 14 | + const decoder = new TextDecoder(encoding) |
| 15 | + const a0 = decoder.decode(u8, { stream: true }) |
| 16 | + const b0 = decoder.decode() |
| 17 | + const ab = new TextDecoder(encoding).decode(u8) |
| 18 | + const a1 = new TextDecoder(encoding).decode(u8.subarray(0, u8.length - trail)) |
| 19 | + const b1 = new TextDecoder(encoding).decode(u8.subarray(u8.length - trail)) |
| 20 | + t.assert.strictEqual(a0, a1) |
| 21 | + t.assert.strictEqual(b0, b1) |
| 22 | + t.assert.strictEqual(a0 + b0, ab) |
| 23 | + t.assert.strictEqual(decoder.decode(u8), ab) // reuse |
| 24 | + |
| 25 | + if (trail === 0) { |
| 26 | + t.assert.strictEqual(a0, ab) |
| 27 | + t.assert.strictEqual(b0, '') |
| 28 | + } |
| 29 | + |
| 30 | + if (trail === u8.length) { |
| 31 | + t.assert.strictEqual(a0, '') |
| 32 | + t.assert.strictEqual(b0, ab) |
| 33 | + } |
| 34 | + } |
| 35 | +}) |
| 36 | + |
| 37 | +test('String coercion', (t) => { |
| 38 | + const encoder = new TextEncoder() |
| 39 | + const map = [ |
| 40 | + [{}, '[object Object]'], |
| 41 | + [null, 'null'], |
| 42 | + [undefined, 'undefined'], |
| 43 | + ] |
| 44 | + |
| 45 | + for (const [arg, string] of map) { |
| 46 | + const length = string.length |
| 47 | + const a = encoder.encode(string) |
| 48 | + t.assert.strictEqual(a.length, length) |
| 49 | + |
| 50 | + const b = encoder.encode(arg) |
| 51 | + if (arg === undefined) { |
| 52 | + // undefined is special |
| 53 | + t.assert.strictEqual(b.length, 0) |
| 54 | + t.assert.deepStrictEqual(b, Uint8Array.of()) |
| 55 | + } else { |
| 56 | + const b = encoder.encode(arg) |
| 57 | + t.assert.strictEqual(b.length, length) |
| 58 | + t.assert.deepStrictEqual(b, a) |
| 59 | + } |
| 60 | + |
| 61 | + const c = new Uint8Array(20) |
| 62 | + t.assert.deepStrictEqual(encoder.encodeInto(arg, c), { read: length, written: length }) |
| 63 | + t.assert.deepStrictEqual(c.subarray(0, length), a) |
| 64 | + } |
| 65 | +}) |
| 66 | + |
| 67 | +// https://encoding.spec.whatwg.org/#x-user-defined-decoder |
| 68 | +test('x-user-defined encoding', (t) => { |
| 69 | + const decoder = new TextDecoder('x-user-defined') |
| 70 | + for (let byte = 0; byte < 256; byte++) { |
| 71 | + const codePoint = byte >= 128 ? 0xf7_80 + byte - 0x80 : byte |
| 72 | + t.assert.strictEqual(decoder.decode(Uint8Array.of(byte)), String.fromCodePoint(codePoint)) |
| 73 | + } |
| 74 | +}) |
| 75 | + |
| 76 | +// iso-8859-1, iso-8859-9, iso-8859-11 differ in WHATWG Encoding spec from https://unicode.org/Public/MAPPINGS/ISO8859 |
| 77 | +// and map to windows-1252, windows-1254, windows-874 instead |
| 78 | +test('not all ISO-8859 encodings are present in TextDecoder', (t) => { |
| 79 | + t.assert.strictEqual(new TextDecoder('iso-8859-1').encoding, 'windows-1252') |
| 80 | + t.assert.strictEqual(new TextDecoder('iso-8859-2').encoding, 'iso-8859-2') // present |
| 81 | + t.assert.strictEqual(new TextDecoder('iso-8859-9').encoding, 'windows-1254') |
| 82 | + t.assert.strictEqual(new TextDecoder('iso-8859-11').encoding, 'windows-874') |
| 83 | + t.assert.throws(() => new TextDecoder('iso-8859-12')) |
| 84 | + t.assert.strictEqual(new TextDecoder('iso-8859-13').encoding, 'iso-8859-13') // present |
| 85 | +}) |
| 86 | + |
| 87 | +describe('encodings are ASCII supersets, except utf-16 and iso-2022-jp', () => { |
| 88 | + for (const label of labels) { |
| 89 | + if (label === 'replacement' || label === 'utf-16le' || label === 'utf-16be') continue |
| 90 | + test(label, (t) => { |
| 91 | + const loose = new TextDecoder(label) |
| 92 | + const fatal = new TextDecoder(label, { fatal: true }) |
| 93 | + for (let i = 0; i < 128; i++) { |
| 94 | + if (label === 'iso-2022-jp' && [0x0e, 0x0f, 0x1b].includes(i)) continue |
| 95 | + t.assert.strictEqual(loose.decode(Uint8Array.of(i)), String.fromCodePoint(i)) |
| 96 | + t.assert.strictEqual(fatal.decode(Uint8Array.of(i)), String.fromCodePoint(i)) |
| 97 | + } |
| 98 | + }) |
| 99 | + } |
| 100 | +}) |
| 101 | + |
| 102 | +describe('legacyHookDecode', () => { |
| 103 | + const fixtures = { |
| 104 | + replacement: [ |
| 105 | + ['', ''], |
| 106 | + ['00', '\uFFFD'], |
| 107 | + ['ff', '\uFFFD'], |
| 108 | + ['20', '\uFFFD'], |
| 109 | + ['2020', '\uFFFD'], |
| 110 | + // BOM takes preference |
| 111 | + ['efbbbf', ''], |
| 112 | + ['efbbbf2a', '*'], |
| 113 | + ['efbbbf202a', ' *'], |
| 114 | + ['fffe', ''], |
| 115 | + ['fffe2a20', '\u202A'], |
| 116 | + ['fffe2a', '\uFFFD'], |
| 117 | + ['fffe00d72a', '\uD700\uFFFD'], |
| 118 | + ['fffe00d82a', '\uFFFD'], |
| 119 | + ['fffe00dc2a', '\uFFFD\uFFFD'], |
| 120 | + ['feff', ''], |
| 121 | + ['feff202a', '\u202A'], |
| 122 | + ['feff20', '\uFFFD'], |
| 123 | + ['feffd70020', '\uD700\uFFFD'], |
| 124 | + ['feffd80020', '\uFFFD'], |
| 125 | + ['feffdc0020', '\uFFFD\uFFFD'], |
| 126 | + ], |
| 127 | + // non-normalized names |
| 128 | + Utf8: [['c280', '\x80']], |
| 129 | + unicodefeff: [['c280', '\u80C2']], |
| 130 | + UnicodeFFFE: [['c280', '\uC280']], |
| 131 | + } |
| 132 | + |
| 133 | + test('null encoding', (t) => { |
| 134 | + t.assert.throws(() => legacyHookDecode(Uint8Array.of(), null), RangeError) |
| 135 | + }) |
| 136 | + |
| 137 | + for (const [encoding, data] of Object.entries(fixtures)) { |
| 138 | + test(encoding, (t) => { |
| 139 | + for (const [hex, string] of data) { |
| 140 | + t.assert.strictEqual(legacyHookDecode(fromHex(hex), encoding), string, `${hex}`) |
| 141 | + } |
| 142 | + }) |
| 143 | + } |
| 144 | +}) |
| 145 | + |
| 146 | +test('getBOMEncoding', (t) => { |
| 147 | + const fixtures = [ |
| 148 | + [null, ''], |
| 149 | + [null, 'ff'], |
| 150 | + [null, 'fe'], |
| 151 | + [null, 'ef'], |
| 152 | + [null, 'efbb'], |
| 153 | + [null, 'efbb00'], |
| 154 | + [null, 'efbfbb'], |
| 155 | + [null, 'ffbbbf'], |
| 156 | + ['utf-8', 'efbbbf'], |
| 157 | + ['utf-8', 'efbbbf00'], |
| 158 | + ['utf-16le', 'fffe'], |
| 159 | + ['utf-16le', 'fffefffe'], |
| 160 | + ['utf-16le', 'fffefffefffe'], |
| 161 | + ['utf-16le', 'fffebb'], |
| 162 | + ['utf-16le', 'fffebf'], |
| 163 | + ['utf-16be', 'feff'], |
| 164 | + ['utf-16be', 'fefffeff'], |
| 165 | + ['utf-16be', 'fefffefffeff'], |
| 166 | + ] |
| 167 | + |
| 168 | + for (const [enc, hex] of fixtures) { |
| 169 | + t.assert.strictEqual(getBOMEncoding(fromHex(hex)), enc, `${hex} -> ${enc}`) |
| 170 | + } |
| 171 | +}) |
0 commit comments