refactor: move fast path from utf8 entry point to fallback

ChALkeR · ChALkeR · commit 8dec88ecb52b · 2026-02-09T17:25:53.000+04:00
diff --git a/fallback/_utils.js b/fallback/_utils.js
@@ -4,6 +4,7 @@ export const nativeBuffer = haveNativeBuffer ? Buffer : null
 export const isHermes = !!globalThis.HermesInternal
 export const isDeno = !!globalThis.Deno
 export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
+export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
 
 // We consider Node.js TextDecoder/TextEncoder native
 let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
diff --git a/fallback/utf8.js b/fallback/utf8.js
@@ -1,9 +1,30 @@
-import { encodeAsciiPrefix } from './latin1.js'
+import { isHermes, E_STRICT_UNICODE } from './_utils.js'
+import { asciiPrefix, decodeLatin1, encodeAsciiPrefix } from './latin1.js'
 
 export const E_STRICT = 'Input is not well-formed utf8'
-export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
 
 const replacementPoint = 0xff_fd
+const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
+const { decodeURIComponent, escape } = globalThis
+
+export function decodeFast(arr, loose) {
+  // Fast path for ASCII prefix, this is faster than all alternatives below
+  const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
+  if (prefix.length === arr.length) return prefix
+
+  // This codepath gives a ~3x perf boost on Hermes
+  if (shouldUseEscapePath && escape && decodeURIComponent) {
+    const o = escape(decodeLatin1(arr, prefix.length, arr.length))
+    try {
+      return prefix + decodeURIComponent(o) // Latin1 to utf8
+    } catch {
+      if (!loose) throw new TypeError(E_STRICT)
+      // Ok, we have to use manual implementation for loose decoder
+    }
+  }
+
+  return prefix + decode(arr, loose, prefix.length)
+}
 
 // https://encoding.spec.whatwg.org/#utf-8-decoder
 // We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
diff --git a/utf8.js b/utf8.js
@@ -1,10 +1,9 @@
 import { assertUint8 } from './assert.js'
 import { typedView } from './array.js'
-import { isHermes, nativeDecoder, nativeEncoder, E_STRING } from './fallback/_utils.js'
-import { asciiPrefix, decodeLatin1 } from './fallback/latin1.js'
+import { nativeDecoder, nativeEncoder, E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
 import * as js from './fallback/utf8.js'
 
-const { TextDecoder, decodeURIComponent, escape } = globalThis // Buffer is optional
+const { TextDecoder } = globalThis
 // ignoreBOM: true means that BOM will be left as-is, i.e. will be present in the output
 // We don't want to strip anything unexpectedly
 const decoderLoose = nativeDecoder
@@ -13,10 +12,6 @@ const decoderFatal = nativeDecoder
   : null
 const { isWellFormed } = String.prototype
 
-const { E_STRICT, E_STRICT_UNICODE } = js
-
-const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
-
 function deLoose(str, loose, res) {
   if (loose || str.length === res.length) return res // length is equal only for ascii, which is automatically fine
   if (isWellFormed) {
@@ -51,32 +46,19 @@ function deLoose(str, loose, res) {
 function encode(str, loose = false) {
   if (typeof str !== 'string') throw new TypeError(E_STRING)
   if (str.length === 0) return new Uint8Array() // faster than Uint8Array.of
-  if (nativeEncoder) return deLoose(str, loose, nativeEncoder.encode(str))
+  if (nativeEncoder || !js.encode) return deLoose(str, loose, nativeEncoder.encode(str))
   // No reason to use unescape + encodeURIComponent: it's slower than JS on normal engines, and modern Hermes already has TextEncoder
   return js.encode(str, loose)
 }
 
 function decode(arr, loose = false) {
   assertUint8(arr)
   if (arr.byteLength === 0) return ''
-  if (nativeDecoder) return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
-
-  // Fast path for ASCII prefix, this is faster than all alternatives below
-  const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
-  if (prefix.length === arr.length) return prefix
-
-  // This codepath gives a ~3x perf boost on Hermes
-  if (shouldUseEscapePath && escape && decodeURIComponent) {
-    const o = escape(decodeLatin1(arr, prefix.length, arr.length))
-    try {
-      return prefix + decodeURIComponent(o) // Latin1 to utf8
-    } catch {
-      if (!loose) throw new TypeError(E_STRICT)
-      // Ok, we have to use manual implementation for loose decoder
-    }
+  if (nativeDecoder || !js.decodeFast) {
+    return loose ? decoderLoose.decode(arr) : decoderFatal.decode(arr) // Node.js and browsers
   }
 
-  return prefix + js.decode(arr, loose, prefix.length)
+  return js.decodeFast(arr, loose)
 }
 
 export const utf8fromString = (str, format = 'uint8') => typedView(encode(str, false), format)
diff --git a/utf8.node.js b/utf8.node.js
@@ -1,7 +1,7 @@
 import { assertUint8 } from './assert.js'
 import { typedView } from './array.js'
-import { E_STRING } from './fallback/_utils.js'
-import { E_STRICT, E_STRICT_UNICODE } from './fallback/utf8.js'
+import { E_STRING, E_STRICT_UNICODE } from './fallback/_utils.js'
+import { E_STRICT } from './fallback/utf8.js'
 import { isAscii } from 'node:buffer'
 
 if (Buffer.TYPED_ARRAY_SUPPORT) throw new Error('Unexpected Buffer polyfill')