Skip to content

Commit 8ae26ea

Browse files
committed
perf: use utf16 slice for euc-kr and big5
1 parent e5bcf39 commit 8ae26ea

File tree

1 file changed

+34
-20
lines changed

1 file changed

+34
-20
lines changed

fallback/multi-byte.js

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,36 +11,48 @@ export const E_STRICT = 'Input is not well-formed for this encoding'
1111
// Common between euc-kr and big5
1212
function bigDecoder(err, pair) {
1313
let lead = 0
14+
let oi = 0
15+
let o16
1416

1517
const decodeLead = (b) => {
1618
const str = pair(lead, b)
1719
lead = 0
18-
if (str) return str
19-
return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
20+
if (typeof str === 'number') {
21+
o16[oi++] = str
22+
} else if (str) {
23+
// This is still faster than string concatenation. Can we optimize strings though?
24+
for (let i = 0; i < str.length; i++) o16[oi++] = str.charCodeAt(i)
25+
} else {
26+
o16[oi++] = err()
27+
if (b < 128) o16[oi++] = b
28+
}
2029
}
2130

2231
const decode = (arr, start, end, stream) => {
23-
let res = ''
2432
let i = start
33+
o16 = new Uint16Array(end - start)
34+
oi = 0
2535

26-
if (lead && i < end) res += decodeLead(arr[i++])
36+
if (lead && i < end) decodeLead(arr[i++])
2737
while (i < end) {
2838
const b = arr[i++]
2939
if (b < 128) {
30-
res += String.fromCharCode(b)
40+
o16[oi++] = b
3141
} else if (b === 0x80 || b === 0xff) {
32-
res += String.fromCharCode(err())
42+
o16[oi++] = err()
3343
} else {
3444
lead = b
35-
if (i < end) res += decodeLead(arr[i++])
45+
if (i < end) decodeLead(arr[i++])
3646
}
3747
}
3848

3949
if (lead && !stream) {
4050
lead = 0
41-
res += String.fromCharCode(err())
51+
o16[oi++] = err()
4252
}
4353

54+
const res = decodeUCS2(o16, oi)
55+
o16 = null
4456
return res
4557
}
4658

@@ -57,7 +69,7 @@ const mappers = {
5769
return bigDecoder(err, (l, b) => {
5870
if (b < 0x41 || b > 0xfe) return
5971
const cp = euc[(l - 0x81) * 190 + b - 0x41]
60-
return cp !== undefined && cp !== REP ? String.fromCharCode(cp) : undefined
72+
return cp !== undefined && cp !== REP ? cp : undefined
6173
})
6274
},
6375
// https://encoding.spec.whatwg.org/#euc-jp-decoder
@@ -282,43 +294,43 @@ const mappers = {
282294
const jis0208 = getTable('jis0208')
283295
let lead = 0
284296
let oi = 0
285-
let out
297+
let o16
286298

287299
const decodeLead = (b) => {
288300
const l = lead
289301
lead = 0
290302
if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
291303
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
292304
if (p >= 8836 && p <= 10_715) {
293-
out[oi++] = 0xe0_00 - 8836 + p
305+
o16[oi++] = 0xe0_00 - 8836 + p
294306
return
295307
}
296308

297309
const cp = jis0208[p]
298310
if (cp !== undefined && cp !== REP) {
299-
out[oi++] = cp
311+
o16[oi++] = cp
300312
return
301313
}
302314
}
303315

304-
out[oi++] = err()
305-
if (b < 128) out[oi++] = b
316+
o16[oi++] = err()
317+
if (b < 128) o16[oi++] = b
306318
}
307319

308320
const decode = (arr, start, end, stream) => {
309-
out = new Uint16Array(end - start)
321+
o16 = new Uint16Array(end - start)
310322
oi = 0
311323
let i = start
312324

313325
if (lead && i < end) decodeLead(arr[i++])
314326
while (i < end) {
315327
const b = arr[i++]
316328
if (b <= 0x80) {
317-
out[oi++] = b // 0x80 is allowed
329+
o16[oi++] = b // 0x80 is allowed
318330
} else if (b >= 0xa1 && b <= 0xdf) {
319-
out[oi++] = 0xfe_c0 + b
331+
o16[oi++] = 0xfe_c0 + b
320332
} else if (b === 0xa0 || b > 0xfc) {
321-
out[oi++] = err()
333+
o16[oi++] = err()
322334
} else {
323335
lead = b
324336
if (i < end) decodeLead(arr[i++])
@@ -327,10 +339,12 @@ const mappers = {
327339

328340
if (lead && !stream) {
329341
lead = 0
330-
out[oi++] = err()
342+
o16[oi++] = err()
331343
}
332344

333-
return decodeUCS2(out, oi)
345+
const res = decodeUCS2(o16, oi)
346+
o16 = null
347+
return res
334348
}
335349

336350
return { decode, isAscii: () => lead === 0 }

0 commit comments

Comments
 (0)