Skip to content

Commit 9f461d8

Browse files
committed
perf: add fast decoder for shift_jis
1 parent 79c5448 commit 9f461d8

File tree

1 file changed

+35
-24
lines changed

1 file changed

+35
-24
lines changed

fallback/multi-byte.js

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -216,36 +216,47 @@ const mappers = {
216216
const jis0208 = getTable('jis0208')
217217
let lead = 0
218218

219-
const pushback = []
220-
const bytes = (b) => {
221-
if (lead) {
222-
const l = lead
223-
lead = 0
224-
if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
225-
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
226-
if (p >= 8836 && p <= 10_715) return 0xe0_00 - 8836 + p // 16-bit
227-
const cp = jis0208[p]
228-
if (cp !== undefined && cp !== REP) return cp
229-
}
230-
231-
if (b < 128) pushback.push(b)
232-
return err()
219+
const decodeLead = (b) => {
220+
const l = lead
221+
lead = 0
222+
if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
223+
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
224+
if (p >= 8836 && p <= 10_715) return String.fromCharCode(0xe0_00 - 8836 + p)
225+
const cp = jis0208[p]
226+
if (cp !== undefined && cp !== REP) return String.fromCharCode(cp)
233227
}
234228

235-
if (b <= 0x80) return b // 0x80 is allowed
236-
if (b >= 0xa1 && b <= 0xdf) return 0xff_61 - 0xa1 + b
237-
if (b < 0x81 || (b > 0x9f && b < 0xe0) || b > 0xfc) return err()
238-
lead = b
229+
return b < 128 ? String.fromCharCode(err(), b) : String.fromCharCode(err())
239230
}
240231

241-
// eslint-disable-next-line sonarjs/no-identical-functions
242-
const eof = () => {
243-
if (!lead) return null
244-
lead = 0 // this clears state completely on EOF
245-
return err()
232+
const fast = (arr, start, end, stream) => {
233+
let res = ''
234+
let i = start
235+
236+
if (lead && i < end) res += decodeLead(arr[i++])
237+
while (i < end) {
238+
const b = arr[i++]
239+
if (b <= 0x80) {
240+
res += String.fromCharCode(b) // 0x80 is allowed
241+
} else if (b >= 0xa1 && b <= 0xdf) {
242+
res += String.fromCharCode(0xff_61 - 0xa1 + b)
243+
} else if (b < 0x81 || (b > 0x9f && b < 0xe0) || b > 0xfc) {
244+
res += String.fromCharCode(err())
245+
} else {
246+
lead = b
247+
if (i < end) res += decodeLead(arr[i++])
248+
}
249+
}
250+
251+
if (lead && !stream) {
252+
lead = 0
253+
res += String.fromCharCode(err())
254+
}
255+
256+
return res
246257
}
247258

248-
return { bytes, eof, pushback }
259+
return { fast, isAscii: () => lead === 0 }
249260
},
250261
// https://encoding.spec.whatwg.org/#gbk-decoder
251262
gbk: (err) => mappers.gb18030(err), // 10.1.1. GBK’s decoder is gb18030’s decoder

0 commit comments

Comments
 (0)