Skip to content

Commit 8dec88e

Browse files
committed
refactor: move fast path from utf8 entry point to fallback
1 parent 08479d9 commit 8dec88e

File tree

4 files changed

+32
-28
lines changed

4 files changed

+32
-28
lines changed

fallback/_utils.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ export const nativeBuffer = haveNativeBuffer ? Buffer : null
44
export const isHermes = !!globalThis.HermesInternal
55
export const isDeno = !!globalThis.Deno
66
export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
7+
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
78

89
// We consider Node.js TextDecoder/TextEncoder native
910
let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))

fallback/utf8.js

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,30 @@
1-
import { encodeAsciiPrefix } from './latin1.js'
1+
import { isHermes, E_STRICT_UNICODE } from './_utils.js'
2+
import { asciiPrefix, decodeLatin1, encodeAsciiPrefix } from './latin1.js'
23

34
export const E_STRICT = 'Input is not well-formed utf8'
4-
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
55

66
const replacementPoint = 0xff_fd
7+
const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
8+
const { decodeURIComponent, escape } = globalThis
9+
10+
export function decodeFast(arr, loose) {
11+
// Fast path for ASCII prefix, this is faster than all alternatives below
12+
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
13+
if (prefix.length === arr.length) return prefix
14+
15+
// This codepath gives a ~3x perf boost on Hermes
16+
if (shouldUseEscapePath && escape && decodeURIComponent) {
17+
const o = escape(decodeLatin1(arr, prefix.length, arr.length))
18+
try {
19+
return prefix + decodeURIComponent(o) // Latin1 to utf8
20+
} catch {
21+
if (!loose) throw new TypeError(E_STRICT)
22+
// Ok, we have to use manual implementation for loose decoder
23+
}
24+
}
25+
26+
return prefix + decode(arr, loose, prefix.length)
27+
}
728

829
// https://encoding.spec.whatwg.org/#utf-8-decoder
930
// We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything

utf8.js

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import { assertUint8 } from './assert.js'
22
import { typedView } from './array.js'
3-
import { isHermes, nativeDecoder, nativeEncoder, E_STRING } from './fallback/_utils.js'
4-
import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
3+
import { nativeDecoder, nativeEncoder, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
54
import * as js from './fallback/utf8.js'
65

7-
const { TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
6+
const { TextDecoder } = globalThis
87
// ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
98
// We don't want to strip anything unexpectedly
109
const decoderLoose = nativeDecoder
@@ -13,10 +12,6 @@ const decoderFatal = nativeDecoder
1312
: null
1413
const { isWellFormed } = String.prototype
1514

16-
const { E_STRICT, E_STRICT_UNICODE } = js
17-
18-
const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
19-
2015
function deLoose(str, loose, res) {
2116
if (loose || str.length === res.length) return res // length is equal only for ascii, which is automatically fine
2217
if (isWellFormed) {
@@ -51,32 +46,19 @@ function deLoose(str, loose, res) {
5146
function encode(str, loose = false) {
5247
if (typeof str !== 'string') throw new TypeError(E_STRING)
5348
if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
54-
if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
49+
if (nativeEncoder || !js.encode) return deLoose(str, loose, nativeEncoder.encode(str))
5550
// No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
5651
return js.encode(str, loose)
5752
}
5853

5954
function decode(arr, loose = false) {
6055
assertUint8(arr)
6156
if (arr.byteLength === 0) return ''
62-
if (nativeDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
63-
64-
// Fast path for ASCII prefix, this is faster than all alternatives below
65-
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
66-
if (prefix.length === arr.length) return prefix
67-
68-
// This codepath gives a ~3x perf boost on Hermes
69-
if (shouldUseEscapePath && escape && decodeURIComponent) {
70-
const o = escape(decodeLatin1(arr, prefix.length, arr.length))
71-
try {
72-
return prefix + decodeURIComponent(o) // Latin1 to utf8
73-
} catch {
74-
if (!loose) throw new TypeError(E_STRICT)
75-
// Ok, we have to use manual implementation for loose decoder
76-
}
57+
if (nativeDecoder || !js.decodeFast) {
58+
return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
7759
}
7860

79-
return prefix + js.decode(arr, loose, prefix.length)
61+
return js.decodeFast(arr, loose)
8062
}
8163

8264
export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)

utf8.node.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { assertUint8 } from './assert.js'
22
import { typedView } from './array.js'
3-
import { E_STRING } from './fallback/_utils.js'
4-
import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf8.js'
3+
import { E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
4+
import { E_STRICT } from './fallback/utf8.js'
55
import { isAscii } from 'node:buffer'
66

77
if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')

0 commit comments

Comments
 (0)