feat: accept non-normalized encoding labels in legacyHookDecode

ChALkeR · ChALkeR · commit 2d544fd4be1e · 2025-12-22T16:11:10.000+04:00
diff --git a/README.md b/README.md
@@ -227,15 +227,14 @@ Given a `TypedArray` or an `ArrayBuffer` instance `input`, returns either of:
 Implements [decode](https://encoding.spec.whatwg.org/#decode) legacy hook.
 
 Given a `TypedArray` or an `ArrayBuffer` instance `input` and an optional `fallbackEncoding`
-normalized encoding name, sniffs encoding from BOM with `fallbackEncoding` fallback and then
+encoding [label](https://encoding.spec.whatwg.org/#names-and-labels),
+sniffs encoding from BOM with `fallbackEncoding` fallback and then
 decodes the `input` using that encoding, skipping BOM if it was present.
 
 Notes:
 
  * BOM-sniffed encoding takes precedence over `fallbackEncoding` option per spec.
    Use with care.
- * `fallbackEncoding` must be ASCII-lowercased encoding name,
-   e.g. a result of `normalizeEncoding(label)` call.
  * Always operates in non-fatal [mode](https://encoding.spec.whatwg.org/#textdecoder-error-mode),
    aka replacement. It can convert different byte sequences to equal strings.
 
diff --git a/fallback/encoding.js b/fallback/encoding.js
@@ -256,13 +256,13 @@ export function getBOMEncoding(input) {
 // https://encoding.spec.whatwg.org/#decode
 // Warning: encoding sniffed from BOM takes preference over the supplied one
 // Warning: lossy, performs replacement, no option of throwing
-// Expects normalized (lower-case) encoding as input. Completely ignores it and even skips validation when BOM is found
+// Completely ignores encoding and even skips validation when BOM is found
 // Unlike TextDecoder public API, additionally supports 'replacement' encoding
 export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
   let u8 = fromSource(input)
   const bomEncoding = getBOMEncoding(u8)
   if (bomEncoding) u8 = u8.subarray(bomEncoding === 'utf-8' ? 3 : 2)
-  const enc = bomEncoding ?? fallbackEncoding // "the byte order mark is more authoritative than anything else"
+  const enc = bomEncoding ?? normalizeEncoding(fallbackEncoding) // "the byte order mark is more authoritative than anything else"
 
   if (enc === 'utf-8') return utf8toStringLoose(u8)
   if (enc === 'utf-16le' || enc === 'utf-16be') {
diff --git a/tests/encoding/generic.test.js b/tests/encoding/generic.test.js
@@ -111,6 +111,10 @@ describe('legacyHookDecode', () => {
       ['feffd70020', '\uD700\uFFFD'],
       ['feffd80820', '\uFFFD\uFFFD'],
     ],
+    // non-normalized names
+    Utf8: [['c280', '\x80']],
+    unicodefeff: [['c280', '\u80C2']],
+    UnicodeFFFE: [['c280', '\uC280']],
   }
 
   test('null encoding', (t) => {