Skip to content

Commit 2bc302c

Browse files
committed
perf: store low charcodes as numbers in big5
1 parent b81e4dd commit 2bc302c

File tree

3 files changed

+18
-9
lines changed

3 files changed

+18
-9
lines changed

fallback/multi-byte.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ function bigDecoder(err, pair) {
1515
let o16
1616

1717
const decodeLead = (b) => {
18-
const str = pair(lead, b)
18+
const p = pair(lead, b)
1919
lead = 0
20-
if (typeof str === 'number') {
21-
o16[oi++] = str
22-
} else if (str) {
20+
if (typeof p === 'number') {
21+
o16[oi++] = p
22+
} else if (p) {
2323
// This is still faster than string concatenation. Can we optimize strings though?
24-
for (let i = 0; i < str.length; i++) o16[oi++] = str.charCodeAt(i)
24+
for (let i = 0; i < p.length; i++) o16[oi++] = p.charCodeAt(i)
2525
} else {
2626
o16[oi++] = err()
2727
if (b < 128) o16[oi++] = b

fallback/multi-byte.table.js

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ function unwrap(res, t, pos, stringMode = false) {
5656
}
5757

5858
if (stringMode) {
59-
for (let k = 0; k < x; k++, pos++, code++) res[pos] = String.fromCodePoint(code)
59+
for (let k = 0; k < x; k++, pos++, code++) {
60+
res[pos] = code <= 0xff_ff ? code : String.fromCodePoint(code)
61+
}
6062
} else {
6163
for (let k = 0; k < x; k++, pos++, code++) res[pos] = code
6264
}
@@ -65,8 +67,13 @@ function unwrap(res, t, pos, stringMode = false) {
6567
pos = unwrap(res, indices[x], pos, stringMode) // self-reference using shared chunks
6668
} else if (stringMode) {
6769
const s = [...utf16toString(loadBase64(x), 'uint8-le')] // splits by codepoints
68-
for (let i = 0; i < s.length; ) res[pos++] = s[i++] // TODO: splice?
69-
code = s[s.length - 1].codePointAt(0) + 1
70+
let char
71+
for (let i = 0; i < s.length; ) {
72+
char = s[i++]
73+
res[pos++] = char.length === 1 ? char.charCodeAt(0) : char // strings only for high codepoints
74+
}
75+
76+
code = char.codePointAt(0) + 1
7077
} else {
7178
const u16 = to16input(loadBase64(x), true) // data is little-endian
7279
res.set(u16, pos)

tests/multi-byte.test.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,11 @@ describe('multi-byte encodings tables', () => {
6262
t.assert.strictEqual(typeof table[i], 'string')
6363
t.assert.strictEqual(table[i].length, 2)
6464
} else if (row) {
65-
const expected = non16bit ? String.fromCodePoint(row.code) : row.code
65+
const expected =
66+
non16bit && typeof table[i] === 'string' ? String.fromCodePoint(row.code) : row.code
6667
t.assert.strictEqual(i, row.i)
6768
t.assert.strictEqual(table[i], expected, `Offset ${i}: ${row.description}`)
69+
if (typeof expected === 'number') t.assert.ok(expected > 0 && expected < 0xff_fd)
6870
} else {
6971
t.assert.strictEqual(table[i], non16bit ? undefined : 0xff_fd, `Offset ${i}`)
7072
}

0 commit comments

Comments
 (0)