ExodusOSS
diff --git a/‎README.md‎
Lines changed: 35 additions & 0 deletions b/‎README.md‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎fallback/percent.js‎
Lines changed: 31 additions & 0 deletions b/‎fallback/percent.js‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 8 additions & 1 deletion b/‎package.json‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎tests/whatwg.browser.test.js‎
Lines changed: 146 additions & 0 deletions b/‎tests/whatwg.browser.test.js‎
Lines changed: 146 additions & 0 deletions
@@ -841,6 +841,41 @@ do not provide sufficiently complete / non-buggy `TextDecoder` APIs.
 > but they are fixing them and the expected update window is short.\
 > If you want to circumvent browser bugs, use full `@exodus/bytes/encoding.js` import.
 
+### `@exodus/bytes/whatwg.js`
+
+WHATWG helpers
+
+```js
+import '@exodus/bytes/encoding.js' // For full legacy multi-byte encodings support
+import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
+```
+
+#### `percentEncodeAfterEncoding(encoding, input, percentEncodeSet, spaceAsPlus = false)`
+
+Implements [percent-encode after encoding](https://url.spec.whatwg.org/#string-percent-encode-after-encoding)
+per WHATWG URL specification.
+
+> [!IMPORTANT]
+> You must import `@exodus/bytes/encoding.js` for this API to accept legacy multi-byte encodings.
+
+Encodings `utf16-le`, `utf16-be`, and `replacement` are not accepted.
+
+[C0 control percent-encode set](https://url.spec.whatwg.org/#c0-control-percent-encode-set) is
+always percent-encoded.
+
+`percentEncodeSet` is an addition to that, and must be a string of unique increasing codepoints
+in range 0x20 - 0x7e, e.g. `' "#<>'`.
+
+This method accepts [DOMStrings](https://webidl.spec.whatwg.org/#idl-DOMString) and converts them
+to [USVStrings](https://webidl.spec.whatwg.org/#idl-USVString).
+This is different from e.g. `encodeURI` and `encodeURIComponent` which throw on surrogates:
+```js
+> percentEncodeAfterEncoding('utf8', '\ud800', ' "#$%&+,/:;<=>?@[\\]^`{|}') // component
+'%EF%BF%BD'
+> encodeURIComponent('\ud800')
+Uncaught URIError: URI malformed
+```
+
 ## Changelog
 
 See [GitHub Releases](https://github.com/ExodusOSS/bytes/releases) tab
 
@@ -0,0 +1,31 @@
+import { decodeAscii, encodeLatin1 } from './latin1.js'
+import { decode2string } from './_utils.js'
+
+const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
+const percentMap = new Map()
+let hex, base
+
+export function percentEncoder(set, spaceAsPlus = false) {
+  if (typeof set !== 'string' || /[^\x20-\x7E]/.test(set)) throw new TypeError(ERR)
+  if (typeof spaceAsPlus !== 'boolean') throw new TypeError('spaceAsPlus must be boolean')
+  const id = set + +spaceAsPlus
+  const cached = percentMap.get(id)
+  if (cached) return cached
+
+  const n = encodeLatin1(set).sort() // string checked above to be ascii
+  if (decodeAscii(n) !== set || new Set(n).size !== n.length) throw new TypeError(ERR)
+
+  if (!base) {
+    hex = Array.from({ length: 256 }, (_, i) => `%${i.toString(16).padStart(2, '0').toUpperCase()}`)
+    base = hex.map((h, i) => (i < 0x20 || i > 0x7e ? h : String.fromCharCode(i)))
+  }
+
+  const map = base.slice() // copy
+  for (const c of n) map[c] = hex[c]
+  if (spaceAsPlus) map[0x20] = '+' // overrides whatever percentEncodeSet thinks about it
+
+  // Input is not typechecked, for internal use only
+  const percentEncode = (u8, start = 0, end = u8.length) => decode2string(u8, start, end, map)
+  percentMap.set(id, percentEncode)
+  return percentEncode
+}
@@ -27,7 +27,7 @@
     "test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
     "test:hermes": "exodus-test --engine=hermes:bundle",
     "test:quickjs": "exodus-test --engine=quickjs:bundle",
-    "test:xs": "exodus-test --engine=xs:bundle",
+    "test:xs": "EXODUS_TEST_IGNORE='tests/whatwg.browser.test.js' exodus-test --engine=xs:bundle",
     "test:engine262": "exodus-test --engine=engine262:bundle",
     "test:deno": "exodus-test --engine=deno:pure",
     "test:bun": "exodus-test --engine=bun:pure",
@@ -71,6 +71,7 @@
     "/fallback/encoding.util.js",
     "/fallback/hex.js",
     "/fallback/latin1.js",
+    "/fallback/percent.js",
     "/fallback/multi-byte.encodings.cjs",
     "/fallback/multi-byte.encodings.json",
     "/fallback/multi-byte.js",
@@ -120,6 +121,8 @@
     "/utf8.js",
     "/utf8.d.ts",
     "/utf8.node.js",
+    "/whatwg.js",
+    "/whatwg.d.ts",
     "/wif.js",
     "/wif.d.ts"
   ],
@@ -200,6 +203,10 @@
       "node": "./utf8.node.js",
       "default": "./utf8.js"
     },
+    "./whatwg.js": {
+      "types": "./whatwg.d.ts",
+      "default": "./whatwg.js"
+    },
     "./wif.js": {
       "types": "./wif.d.ts",
       "default": "./wif.js"
 
@@ -0,0 +1,146 @@
+import '@exodus/bytes/encoding.js'
+import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
+import { keccakprg } from '@noble/hashes/sha3-addons.js'
+import { describe, test, before, after } from 'node:test'
+import { labels } from './encoding/fixtures/encodings.cjs'
+
+// The test uses https:// URL query, which is special
+const specialquery = ' "#\'<>' // https://url.spec.whatwg.org/#special-query-percent-encode-set
+
+const invalid = new Set(['replacement', 'utf-16le', 'utf-16be']) // https://encoding.spec.whatwg.org/#get-an-encoder
+
+const { window, document } = globalThis
+
+const range = (length, start) => Array.from({ length }, (_, i) => String.fromCodePoint(start + i))
+const strings = [
+  ...range(256, 0x20).filter((x) => x !== ' ' && x !== '#'), // we directly set to href
+  ...range(256, 0)
+    .filter((x) => x !== '#' && x !== '\t' && x !== '\n' && x !== '\r')
+    .map((x) => `${x}*`),
+  ...range(256, 0)
+    .filter((x) => x !== '#' && x !== '\t' && x !== '\n' && x !== '\r')
+    .map((x) => `*${x}*`),
+
+  String.fromCodePoint(0xfe_ff),
+  String.fromCodePoint(0xff_fd),
+  String.fromCodePoint(0xff_fe),
+  String.fromCodePoint(0xff_ff),
+  String.fromCodePoint(0x1_00_00),
+  String.fromCodePoint(0x2_f8_a6), // max big5
+  String.fromCodePoint(0x2_f8_a7),
+  String.fromCodePoint(0x1_10_00),
+
+  String.fromCodePoint(42, 0x1_00_00, 0x1_10_00, 42),
+  String.fromCodePoint(42, 0x1_00_00, 44, 0x1_10_00, 42),
+  String.fromCodePoint(42, 0x1_00_00, 0x1_10_00, 42),
+  String.fromCodePoint(42, 0x1_00_00, 44, 0x1_10_00, 42),
+
+  String.fromCharCode(0x20, 0x22, 0x3c, 0x3e, 0x60),
+  String.fromCharCode(0x20, 0x22, 0x24, 0x3c, 0x3e),
+  String.fromCharCode(0x3f, 0x5e, 0x60, 0x7b, 0x7d),
+  String.fromCharCode(0x2f, 0x3a, 0x3b, 0x3d, 0x40, 0x5b, 0x5c, 0x5d, 0x7c),
+  String.fromCharCode(0x24, 0x25, 0x26, 0x2b, 0x2c),
+  String.fromCharCode(0x21, 0x27, 0x28, 0x29, 0x7e),
+
+  String.fromCharCode(0x61, 0x62, 0xd8_00, 0x77, 0x78),
+  String.fromCharCode(0xd8_00, 0xd8_00),
+  String.fromCharCode(0x61, 0x62, 0xdf_ff, 0x77, 0x78),
+  String.fromCharCode(0xdf_ff, 0xd8_00),
+
+  range(0x2_00, 0x24).join(''), // from # + 1
+  range(0x2_00, 0xf6_00).join(''), // user-defined
+  range(0x2_00, 0xff_00).join(''),
+  range(0x20_00, 0x24).join(''),
+  range(0x20_00, 0xf0_00).join(''),
+  range(0x20_00, 0xf_f0_00).join(''),
+  'hello' + range(0x20_00, 0xf0_00).join('') + 'abc',
+]
+
+const fixedPRG = keccakprg() // We don't add any entropy, so it spills out predicatable results
+for (let i = 1; i <= 32; i++) {
+  const u8 = fixedPRG.randomBytes(1024)
+  const u16 = new Uint16Array(u8.buffer, u8.byteOffset, u8.byteLength / 2)
+  const u32 = new Uint32Array(u8.buffer, u8.byteOffset, u8.byteLength / 4)
+  const chunk = [
+    String.fromCharCode.apply(String, u8),
+    String.fromCharCode.apply(String, u16),
+    String.fromCodePoint(...u32.map((x) => x % 0x11_00_00)),
+  ].map(
+    (x) =>
+      x
+        .trim()
+        .replaceAll(/[\t\n\r#]/g, '')
+        .replaceAll(/[\x00-\x20]+$/g, '') // eslint-disable-line no-control-regex
+  )
+  strings.push(...chunk)
+}
+
+// Passes on Chromium, Servo. Webkit is incorrect. Firefox somewhy fails on CI only
+const skip =
+  !document ||
+  !window ||
+  process.env.EXODUS_TEST_PLATFORM === 'webkit' ||
+  process.env.EXODUS_TEST_PLATFORM === 'firefox'
+
+describe('percent-encode after encoding matches browser', { skip }, () => {
+  let handle
+  const onmessage = (event) => handle(event.data)
+  const iframe = document.createElement('iframe')
+
+  before(() => {
+    window.addEventListener('message', onmessage)
+    document.body.append(iframe)
+  })
+
+  after(() => {
+    window.removeEventListener('message', onmessage)
+    iframe.remove()
+  })
+
+  for (const encoding of labels) {
+    if (invalid.has(encoding)) continue
+    test(encoding, async (t) => {
+      let ok = 0
+      const loaded = new Promise((resolve) => (handle = resolve))
+      const html = `
+        <!DOCTYPE html>
+        <script>
+        var a = document.createElement('a');
+        window.parent.postMessage('', '*');
+        window.addEventListener('message', (e) => {
+          a.href = 'https://example.com/?' + e.data
+          window.parent.postMessage(a.search.slice(1), '*')
+        })
+        </script>`
+      iframe.src = `data:text/html;charset=${encoding},${encodeURI(html)}`
+      await loaded
+
+      for (const str of strings) {
+        const promise = new Promise((resolve) => (handle = resolve))
+        iframe.contentWindow.postMessage(str, '*')
+        const actual = percentEncodeAfterEncoding(encoding, str, specialquery)
+        t.assert.strictEqual(actual, await promise, `${encoding} #${ok + 1}`)
+        ok++
+      }
+
+      t.assert.strictEqual(ok, strings.length)
+    })
+  }
+})
+
+// Ensures that behavior mathches everywhere with snapshots
+// Combined with the above check, we know that snapshots match reference browser platforms
+describe('percent-encode after encoding matches snapshot', () => {
+  for (const encoding of labels) {
+    if (invalid.has(encoding)) continue
+    test(encoding, async (t) => {
+      const res = []
+      for (const str of strings) res.push(percentEncodeAfterEncoding(encoding, str, specialquery))
+      if (t.assert.snapshot) {
+        t.assert.snapshot(res)
+      } else {
+        t.skip('Snapshots are not supported')
+      }
+    })
+  }
+})