Skip to content

Commit e5bcf39

Browse files
committed
perf: use utf16 slice in shift_jis decoder
1 parent 38e0eba commit e5bcf39

File tree

3 files changed

+37
-15
lines changed

3 files changed

+37
-15
lines changed

fallback/latin1.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
nativeBuffer,
66
isHermes,
77
isDeno,
8+
isLE,
89
} from './_utils.js'
910

1011
// See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
@@ -60,6 +61,16 @@ export function decodeLatin1(arr, start = 0, stop = arr.length) {
6061
return String.fromCharCode.apply(String, sliced)
6162
}
6263

64+
// Unchecked for well-formedness, raw. Expects Uint16Array input
65+
export const decodeUCS2 =
66+
nativeBuffer && isLE && !isDeno
67+
? (u16, stop = u16.length) => {
68+
// TODO: fast path for BE, perhaps faster path for Deno. Note that decoder replaces, this function doesn't
69+
if (stop > 32) return nativeBuffer.from(u16.buffer, u16.byteOffset, stop * 2).ucs2Slice() // from 64 bytes, below are in heap
70+
return decodeLatin1(u16, 0, stop)
71+
}
72+
: (u16, stop = u16.length) => decodeLatin1(u16, 0, stop)
73+
6374
// Does not check input, uses best available method
6475
// Building an array for this is only faster than proper string concatenation when TextDecoder or native Buffer are available
6576
export const decodeAscii = nativeBuffer

fallback/multi-byte.js

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { asciiPrefix, decodeLatin1 } from './latin1.js'
1+
import { asciiPrefix, decodeLatin1, decodeUCS2 } from './latin1.js'
22
import { getTable } from './multi-byte.table.js'
33

44
export const E_STRICT = 'Input is not well-formed for this encoding'
@@ -281,45 +281,56 @@ const mappers = {
281281
shift_jis: (err) => {
282282
const jis0208 = getTable('jis0208')
283283
let lead = 0
284+
let oi = 0
285+
let out
284286

285287
const decodeLead = (b) => {
286288
const l = lead
287289
lead = 0
288290
if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
289291
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
290-
if (p >= 8836 && p <= 10_715) return String.fromCharCode(0xe0_00 - 8836 + p)
292+
if (p >= 8836 && p <= 10_715) {
293+
out[oi++] = 0xe0_00 - 8836 + p
294+
return
295+
}
296+
291297
const cp = jis0208[p]
292-
if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
298+
if (cp !== undefined && cp !== REP) {
299+
out[oi++] = cp
300+
return
301+
}
293302
}
294303

295-
return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
304+
out[oi++] = err()
305+
if (b < 128) out[oi++] = b
296306
}
297307

298308
const decode = (arr, start, end, stream) => {
299-
let res = ''
309+
out = new Uint16Array(end - start)
310+
oi = 0
300311
let i = start
301312

302-
if (lead && i < end) res += decodeLead(arr[i++])
313+
if (lead && i < end) decodeLead(arr[i++])
303314
while (i < end) {
304315
const b = arr[i++]
305316
if (b <= 0x80) {
306-
res += String.fromCharCode(b) // 0x80 is allowed
317+
out[oi++] = b // 0x80 is allowed
307318
} else if (b >= 0xa1 && b <= 0xdf) {
308-
res += String.fromCharCode(0xfe_c0 + b)
319+
out[oi++] = 0xfe_c0 + b
309320
} else if (b === 0xa0 || b > 0xfc) {
310-
res += String.fromCharCode(err())
321+
out[oi++] = err()
311322
} else {
312323
lead = b
313-
if (i < end) res += decodeLead(arr[i++])
324+
if (i < end) decodeLead(arr[i++])
314325
}
315326
}
316327

317328
if (lead && !stream) {
318329
lead = 0
319-
res += String.fromCharCode(err())
330+
out[oi++] = err()
320331
}
321332

322-
return res
333+
return decodeUCS2(out, oi)
323334
}
324335

325336
return { decode, isAscii: () => lead === 0 }

fallback/utf16.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { decodeLatin1, encodeCharcodes } from './latin1.js'
1+
import { decodeUCS2, encodeCharcodes } from './latin1.js'
22
import { isLE } from './_utils.js'
33

44
export const E_STRICT = 'Input is not well-formed utf16'
@@ -38,9 +38,9 @@ export function to16input(u8, le) {
3838
}
3939

4040
export const decode = (u16, loose = false, checked = false) => {
41-
if (checked || isWellFormed(u16)) return decodeLatin1(u16, 0, u16.length) // it's capable of decoding Uint16Array to UTF-16 as well
41+
if (checked || isWellFormed(u16)) return decodeUCS2(u16)
4242
if (!loose) throw new TypeError(E_STRICT)
43-
return decodeLatin1(toWellFormed(Uint16Array.from(u16)), 0, u16.length) // cloned for replacement
43+
return decodeUCS2(toWellFormed(Uint16Array.from(u16))) // cloned for replacement
4444
}
4545

4646
export function encode(str, loose = false, checked = false, swapped = false) {

0 commit comments

Comments
 (0)