Skip to content

Commit 6c919d8

Browse files
committed
test: add validation to dump.js
1 parent 5ae66ec commit 6c919d8

File tree

1 file changed

+95
-2
lines changed
  • tests/encoding/fixtures/multi-byte

1 file changed

+95
-2
lines changed

tests/encoding/fixtures/multi-byte/dump.js

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { readFileSync, readdirSync } from 'node:fs'
2-
import { toBase64url } from '@exodus/bytes/base64.js'
3-
import { utf16fromString } from '@exodus/bytes/utf16.js'
2+
import { fromBase64url, toBase64url } from '@exodus/bytes/base64.js'
3+
import { utf16fromString, utf16toString } from '@exodus/bytes/utf16.js'
4+
import { to16input } from './../../../../fallback/utf16.js'
5+
import { raw } from '../indexes.cjs' // for revalidation only
46
import { join } from 'node:path'
57
import assert from 'node:assert/strict'
68
import { gzipSync } from 'node:zlib'
@@ -237,3 +239,94 @@ console.log(final)
237239
const json = JSON.stringify(JSON.parse(final)) // report minified size
238240
console.error(`Raw size: ${json.length}`)
239241
console.error(`Gzip size: ${gzipSync(json).length}`)
242+
243+
// The data in this file is then copied manually into fallback/multi-byte.encodings.json
244+
245+
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only, no-inner-declarations */
246+
247+
// Extractor algorithm, similar to the one in fallback/multi-byte.table.js but doesn't do extra post-processing
248+
// The algo there is based on this one, any changes to format should be first introduced and tested here
249+
{
250+
const indices = JSON.parse(final)
251+
252+
function loadBase64(str) {
253+
const x = fromBase64url(str)
254+
const len = x.length
255+
const len2 = len >> 1
256+
const y = new Uint8Array(len)
257+
let a = -1, b = 0 // prettier-ignore
258+
for (let i = 0, j = 0; i < len; i += 2, j++) {
259+
a = (a + x[j] + 1) & 0xff
260+
b = (b + x[len2 + j]) & 0xff
261+
y[i] = a
262+
y[i + 1] = b
263+
}
264+
265+
return y
266+
}
267+
268+
function unwrap(res, t, pos, highMode = false) {
269+
let code = 0
270+
for (let i = 0; i < t.length; i++) {
271+
let x = t[i]
272+
if (typeof x === 'number') {
273+
if (x === 0) {
274+
pos += t[++i]
275+
} else {
276+
if (x < 0) {
277+
code -= x
278+
x = 1
279+
} else {
280+
code += t[++i]
281+
}
282+
283+
if (highMode) {
284+
for (let k = 0; k < x; k++, pos++, code++) {
285+
res[pos] = code // this is different in fallback/multi-byte.table.js as it pre-processes high codepoints
286+
}
287+
} else {
288+
for (let k = 0; k < x; k++, pos++, code++) res[pos] = code
289+
}
290+
}
291+
} else if (x[0] === '$' && Object.hasOwn(indices, x)) {
292+
pos = unwrap(res, indices[x], pos, highMode) // self-reference using shared chunks
293+
} else if (highMode) {
294+
const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints
295+
let c
296+
for (let i = 0; i < s.length; ) {
297+
c = s[i++]
298+
res[pos++] = c.codePointAt(0) // this is different in fallback/multi-byte.table.js as it pre-processes high codepoints
299+
}
300+
301+
code = c.codePointAt(0) + 1
302+
} else {
303+
const u16 = to16input(loadBase64(x), true) // data is little-endian
304+
res.set(u16, pos)
305+
pos += u16.length
306+
code = u16[u16.length - 1] + 1
307+
}
308+
}
309+
310+
return pos
311+
}
312+
313+
// Revalidation that we can unpack correctly
314+
const sizes = {
315+
jis0208: 11_104,
316+
jis0212: 7211,
317+
'euc-kr': 23_750,
318+
gb18030: 23_940,
319+
big5: 19_782,
320+
}
321+
for (const [id, size] of Object.entries(sizes)) {
322+
const C = id === 'big5' ? Uint32Array : Uint16Array
323+
const u = new C(size)
324+
unwrap(u, indices[id], 0, u instanceof Uint32Array)
325+
const arr = Array.from(u, (x) => (x === 0 ? null : x))
326+
assert.ok(Object.hasOwn(raw, id))
327+
const expected = raw[id]
328+
// we don't save useless nulls
329+
while (arr.length < expected.length) arr.push(null)
330+
assert.deepStrictEqual(arr, expected)
331+
}
332+
}

0 commit comments

Comments
 (0)