|
1 | 1 | import { readFileSync, readdirSync } from 'node:fs' |
2 | | -import { toBase64url } from '@exodus/bytes/base64.js' |
3 | | -import { utf16fromString } from '@exodus/bytes/utf16.js' |
| 2 | +import { fromBase64url, toBase64url } from '@exodus/bytes/base64.js' |
| 3 | +import { utf16fromString, utf16toString } from '@exodus/bytes/utf16.js' |
| 4 | +import { to16input } from './../../../../fallback/utf16.js' |
| 5 | +import { raw } from '../indexes.cjs' // for revalidation only |
4 | 6 | import { join } from 'node:path' |
5 | 7 | import assert from 'node:assert/strict' |
6 | 8 | import { gzipSync } from 'node:zlib' |
@@ -237,3 +239,94 @@ console.log(final) |
237 | 239 | const json = JSON.stringify(JSON.parse(final)) // report minified size |
238 | 240 | console.error(`Raw size: ${json.length}`) |
239 | 241 | console.error(`Gzip size: ${gzipSync(json).length}`) |
| 242 | + |
| 243 | +// The data in this file is then copied manually into fallback/multi-byte.encodings.json |
| 244 | + |
| 245 | +/* eslint-disable @exodus/mutable/no-param-reassign-prop-only, no-inner-declarations */ |
| 246 | + |
| 247 | +// Extractor algorithm, similar to the one in fallback/multi-byte.table.js but doesn't do extra post-processing |
| 248 | +// The algo there is based on this one, any changes to format should be first introduced and tested here |
| 249 | +{ |
| 250 | + const indices = JSON.parse(final) |
| 251 | + |
| 252 | + function loadBase64(str) { |
| 253 | + const x = fromBase64url(str) |
| 254 | + const len = x.length |
| 255 | + const len2 = len >> 1 |
| 256 | + const y = new Uint8Array(len) |
| 257 | + let a = -1, b = 0 // prettier-ignore |
| 258 | + for (let i = 0, j = 0; i < len; i += 2, j++) { |
| 259 | + a = (a + x[j] + 1) & 0xff |
| 260 | + b = (b + x[len2 + j]) & 0xff |
| 261 | + y[i] = a |
| 262 | + y[i + 1] = b |
| 263 | + } |
| 264 | + |
| 265 | + return y |
| 266 | + } |
| 267 | + |
| 268 | + function unwrap(res, t, pos, highMode = false) { |
| 269 | + let code = 0 |
| 270 | + for (let i = 0; i < t.length; i++) { |
| 271 | + let x = t[i] |
| 272 | + if (typeof x === 'number') { |
| 273 | + if (x === 0) { |
| 274 | + pos += t[++i] |
| 275 | + } else { |
| 276 | + if (x < 0) { |
| 277 | + code -= x |
| 278 | + x = 1 |
| 279 | + } else { |
| 280 | + code += t[++i] |
| 281 | + } |
| 282 | + |
| 283 | + if (highMode) { |
| 284 | + for (let k = 0; k < x; k++, pos++, code++) { |
| 285 | + res[pos] = code // this is different in fallback/multi-byte.table.js as it pre-processes high codepoints |
| 286 | + } |
| 287 | + } else { |
| 288 | + for (let k = 0; k < x; k++, pos++, code++) res[pos] = code |
| 289 | + } |
| 290 | + } |
| 291 | + } else if (x[0] === '$' && Object.hasOwn(indices, x)) { |
| 292 | + pos = unwrap(res, indices[x], pos, highMode) // self-reference using shared chunks |
| 293 | + } else if (highMode) { |
| 294 | + const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints |
| 295 | + let c |
| 296 | + for (let i = 0; i < s.length; ) { |
| 297 | + c = s[i++] |
| 298 | + res[pos++] = c.codePointAt(0) // this is different in fallback/multi-byte.table.js as it pre-processes high codepoints |
| 299 | + } |
| 300 | + |
| 301 | + code = c.codePointAt(0) + 1 |
| 302 | + } else { |
| 303 | + const u16 = to16input(loadBase64(x), true) // data is little-endian |
| 304 | + res.set(u16, pos) |
| 305 | + pos += u16.length |
| 306 | + code = u16[u16.length - 1] + 1 |
| 307 | + } |
| 308 | + } |
| 309 | + |
| 310 | + return pos |
| 311 | + } |
| 312 | + |
| 313 | + // Revalidation that we can unpack correctly |
| 314 | + const sizes = { |
| 315 | + jis0208: 11_104, |
| 316 | + jis0212: 7211, |
| 317 | + 'euc-kr': 23_750, |
| 318 | + gb18030: 23_940, |
| 319 | + big5: 19_782, |
| 320 | + } |
| 321 | + for (const [id, size] of Object.entries(sizes)) { |
| 322 | + const C = id === 'big5' ? Uint32Array : Uint16Array |
| 323 | + const u = new C(size) |
| 324 | + unwrap(u, indices[id], 0, u instanceof Uint32Array) |
| 325 | + const arr = Array.from(u, (x) => (x === 0 ? null : x)) |
| 326 | + assert.ok(Object.hasOwn(raw, id)) |
| 327 | + const expected = raw[id] |
| 328 | + // we don't save useless nulls |
| 329 | + while (arr.length < expected.length) arr.push(null) |
| 330 | + assert.deepStrictEqual(arr, expected) |
| 331 | + } |
| 332 | +} |
0 commit comments