Skip to content

Commit 2bcc065

Browse files
authored
feat: add percentEncodeAfterEncoding (#37)
* feat: add percentEncodeAfterEncoding * test: add whatwg cross-tests with browsers
1 parent 92ef515 commit 2bcc065

File tree

10 files changed

+33419
-22
lines changed

10 files changed

+33419
-22
lines changed

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,41 @@ do not provide sufficiently complete / non-buggy `TextDecoder` APIs.
841841
> but they are fixing them and the expected update window is short.\
842842
> If you want to circumvent browser bugs, use full `@exodus/bytes/encoding.js` import.
843843
844+
### `@exodus/bytes/whatwg.js`
845+
846+
WHATWG helpers
847+
848+
```js
849+
import '@exodus/bytes/encoding.js' // For full legacy multi-byte encodings support
850+
import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
851+
```
852+
853+
#### `percentEncodeAfterEncoding(encoding, input, percentEncodeSet, spaceAsPlus = false)`
854+
855+
Implements [percent-encode after encoding](https://url.spec.whatwg.org/#string-percent-encode-after-encoding)
856+
per WHATWG URL specification.
857+
858+
> [!IMPORTANT]
859+
> You must import `@exodus/bytes/encoding.js` for this API to accept legacy multi-byte encodings.
860+
861+
Encodings `utf16-le`, `utf16-be`, and `replacement` are not accepted.
862+
863+
[C0 control percent-encode set](https://url.spec.whatwg.org/#c0-control-percent-encode-set) is
864+
always percent-encoded.
865+
866+
`percentEncodeSet` is an addition to that, and must be a string of unique increasing codepoints
867+
in range 0x20 - 0x7e, e.g. `' "#<>'`.
868+
869+
This method accepts [DOMStrings](https://webidl.spec.whatwg.org/#idl-DOMString) and converts them
870+
to [USVStrings](https://webidl.spec.whatwg.org/#idl-USVString).
871+
This is different from e.g. `encodeURI` and `encodeURIComponent` which throw on surrogates:
872+
```js
873+
> percentEncodeAfterEncoding('utf8', '\ud800', ' "#$%&+,/:;<=>?@[\\]^`{|}') // component
874+
'%EF%BF%BD'
875+
> encodeURIComponent('\ud800')
876+
Uncaught URIError: URI malformed
877+
```
878+
844879
## Changelog
845880
846881
See [GitHub Releases](https://github.com/ExodusOSS/bytes/releases) tab

fallback/percent.js

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import { decodeAscii, encodeLatin1 } from './latin1.js'
2+
import { decode2string } from './_utils.js'
3+
4+
const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
5+
const percentMap = new Map()
6+
let hex, base
7+
8+
export function percentEncoder(set, spaceAsPlus = false) {
9+
if (typeof set !== 'string' || /[^\x20-\x7E]/.test(set)) throw new TypeError(ERR)
10+
if (typeof spaceAsPlus !== 'boolean') throw new TypeError('spaceAsPlus must be boolean')
11+
const id = set + +spaceAsPlus
12+
const cached = percentMap.get(id)
13+
if (cached) return cached
14+
15+
const n = encodeLatin1(set).sort() // string checked above to be ascii
16+
if (decodeAscii(n) !== set || new Set(n).size !== n.length) throw new TypeError(ERR)
17+
18+
if (!base) {
19+
hex = Array.from({ length: 256 }, (_, i) => `%${i.toString(16).padStart(2, '0').toUpperCase()}`)
20+
base = hex.map((h, i) => (i < 0x20 || i > 0x7e ? h : String.fromCharCode(i)))
21+
}
22+
23+
const map = base.slice() // copy
24+
for (const c of n) map[c] = hex[c]
25+
if (spaceAsPlus) map[0x20] = '+' // overrides whatever percentEncodeSet thinks about it
26+
27+
// Input is not typechecked, for internal use only
28+
const percentEncode = (u8, start = 0, end = u8.length) => decode2string(u8, start, end, map)
29+
percentMap.set(id, percentEncode)
30+
return percentEncode
31+
}

package.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
"test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
2828
"test:hermes": "exodus-test --engine=hermes:bundle",
2929
"test:quickjs": "exodus-test --engine=quickjs:bundle",
30-
"test:xs": "exodus-test --engine=xs:bundle",
30+
"test:xs": "EXODUS_TEST_IGNORE='tests/whatwg.browser.test.js' exodus-test --engine=xs:bundle",
3131
"test:engine262": "exodus-test --engine=engine262:bundle",
3232
"test:deno": "exodus-test --engine=deno:pure",
3333
"test:bun": "exodus-test --engine=bun:pure",
@@ -71,6 +71,7 @@
7171
"/fallback/encoding.util.js",
7272
"/fallback/hex.js",
7373
"/fallback/latin1.js",
74+
"/fallback/percent.js",
7475
"/fallback/multi-byte.encodings.cjs",
7576
"/fallback/multi-byte.encodings.json",
7677
"/fallback/multi-byte.js",
@@ -120,6 +121,8 @@
120121
"/utf8.js",
121122
"/utf8.d.ts",
122123
"/utf8.node.js",
124+
"/whatwg.js",
125+
"/whatwg.d.ts",
123126
"/wif.js",
124127
"/wif.d.ts"
125128
],
@@ -200,6 +203,10 @@
200203
"node": "./utf8.node.js",
201204
"default": "./utf8.js"
202205
},
206+
"./whatwg.js": {
207+
"types": "./whatwg.d.ts",
208+
"default": "./whatwg.js"
209+
},
203210
"./wif.js": {
204211
"types": "./wif.d.ts",
205212
"default": "./wif.js"

tests/whatwg.browser.test.js

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import '@exodus/bytes/encoding.js'
2+
import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
3+
import { keccakprg } from '@noble/hashes/sha3-addons.js'
4+
import { describe, test, before, after } from 'node:test'
5+
import { labels } from './encoding/fixtures/encodings.cjs'
6+
7+
// The test uses https:// URL query, which is special
8+
const specialquery = ' "#\'<>' // https://url.spec.whatwg.org/#special-query-percent-encode-set
9+
10+
const invalid = new Set(['replacement', 'utf-16le', 'utf-16be']) // https://encoding.spec.whatwg.org/#get-an-encoder
11+
12+
const { window, document } = globalThis
13+
14+
const range = (length, start) => Array.from({ length }, (_, i) => String.fromCodePoint(start + i))
15+
const strings = [
16+
...range(256, 0x20).filter((x) => x !== ' ' && x !== '#'), // we directly set to href
17+
...range(256, 0)
18+
.filter((x) => x !== '#' && x !== '\t' && x !== '\n' && x !== '\r')
19+
.map((x) => `${x}*`),
20+
...range(256, 0)
21+
.filter((x) => x !== '#' && x !== '\t' && x !== '\n' && x !== '\r')
22+
.map((x) => `*${x}*`),
23+
24+
String.fromCodePoint(0xfe_ff),
25+
String.fromCodePoint(0xff_fd),
26+
String.fromCodePoint(0xff_fe),
27+
String.fromCodePoint(0xff_ff),
28+
String.fromCodePoint(0x1_00_00),
29+
String.fromCodePoint(0x2_f8_a6), // max big5
30+
String.fromCodePoint(0x2_f8_a7),
31+
String.fromCodePoint(0x1_10_00),
32+
33+
String.fromCodePoint(42, 0x1_00_00, 0x1_10_00, 42),
34+
String.fromCodePoint(42, 0x1_00_00, 44, 0x1_10_00, 42),
35+
String.fromCodePoint(42, 0x1_00_00, 0x1_10_00, 42),
36+
String.fromCodePoint(42, 0x1_00_00, 44, 0x1_10_00, 42),
37+
38+
String.fromCharCode(0x20, 0x22, 0x3c, 0x3e, 0x60),
39+
String.fromCharCode(0x20, 0x22, 0x24, 0x3c, 0x3e),
40+
String.fromCharCode(0x3f, 0x5e, 0x60, 0x7b, 0x7d),
41+
String.fromCharCode(0x2f, 0x3a, 0x3b, 0x3d, 0x40, 0x5b, 0x5c, 0x5d, 0x7c),
42+
String.fromCharCode(0x24, 0x25, 0x26, 0x2b, 0x2c),
43+
String.fromCharCode(0x21, 0x27, 0x28, 0x29, 0x7e),
44+
45+
String.fromCharCode(0x61, 0x62, 0xd8_00, 0x77, 0x78),
46+
String.fromCharCode(0xd8_00, 0xd8_00),
47+
String.fromCharCode(0x61, 0x62, 0xdf_ff, 0x77, 0x78),
48+
String.fromCharCode(0xdf_ff, 0xd8_00),
49+
50+
range(0x2_00, 0x24).join(''), // from # + 1
51+
range(0x2_00, 0xf6_00).join(''), // user-defined
52+
range(0x2_00, 0xff_00).join(''),
53+
range(0x20_00, 0x24).join(''),
54+
range(0x20_00, 0xf0_00).join(''),
55+
range(0x20_00, 0xf_f0_00).join(''),
56+
'hello' + range(0x20_00, 0xf0_00).join('') + 'abc',
57+
]
58+
59+
const fixedPRG = keccakprg() // We don't add any entropy, so it spills out predicatable results
60+
for (let i = 1; i <= 32; i++) {
61+
const u8 = fixedPRG.randomBytes(1024)
62+
const u16 = new Uint16Array(u8.buffer, u8.byteOffset, u8.byteLength / 2)
63+
const u32 = new Uint32Array(u8.buffer, u8.byteOffset, u8.byteLength / 4)
64+
const chunk = [
65+
String.fromCharCode.apply(String, u8),
66+
String.fromCharCode.apply(String, u16),
67+
String.fromCodePoint(...u32.map((x) => x % 0x11_00_00)),
68+
].map(
69+
(x) =>
70+
x
71+
.trim()
72+
.replaceAll(/[\t\n\r#]/g, '')
73+
.replaceAll(/[\x00-\x20]+$/g, '') // eslint-disable-line no-control-regex
74+
)
75+
strings.push(...chunk)
76+
}
77+
78+
// Passes on Chromium, Servo. Webkit is incorrect. Firefox somewhy fails on CI only
79+
const skip =
80+
!document ||
81+
!window ||
82+
process.env.EXODUS_TEST_PLATFORM === 'webkit' ||
83+
process.env.EXODUS_TEST_PLATFORM === 'firefox'
84+
85+
describe('percent-encode after encoding matches browser', { skip }, () => {
86+
let handle
87+
const onmessage = (event) => handle(event.data)
88+
const iframe = document.createElement('iframe')
89+
90+
before(() => {
91+
window.addEventListener('message', onmessage)
92+
document.body.append(iframe)
93+
})
94+
95+
after(() => {
96+
window.removeEventListener('message', onmessage)
97+
iframe.remove()
98+
})
99+
100+
for (const encoding of labels) {
101+
if (invalid.has(encoding)) continue
102+
test(encoding, async (t) => {
103+
let ok = 0
104+
const loaded = new Promise((resolve) => (handle = resolve))
105+
const html = `
106+
<!DOCTYPE html>
107+
<script>
108+
var a = document.createElement('a');
109+
window.parent.postMessage('', '*');
110+
window.addEventListener('message', (e) => {
111+
a.href = 'https://example.com/?' + e.data
112+
window.parent.postMessage(a.search.slice(1), '*')
113+
})
114+
</script>`
115+
iframe.src = `data:text/html;charset=${encoding},${encodeURI(html)}`
116+
await loaded
117+
118+
for (const str of strings) {
119+
const promise = new Promise((resolve) => (handle = resolve))
120+
iframe.contentWindow.postMessage(str, '*')
121+
const actual = percentEncodeAfterEncoding(encoding, str, specialquery)
122+
t.assert.strictEqual(actual, await promise, `${encoding} #${ok + 1}`)
123+
ok++
124+
}
125+
126+
t.assert.strictEqual(ok, strings.length)
127+
})
128+
}
129+
})
130+
131+
// Ensures that behavior mathches everywhere with snapshots
132+
// Combined with the above check, we know that snapshots match reference browser platforms
133+
describe('percent-encode after encoding matches snapshot', () => {
134+
for (const encoding of labels) {
135+
if (invalid.has(encoding)) continue
136+
test(encoding, async (t) => {
137+
const res = []
138+
for (const str of strings) res.push(percentEncodeAfterEncoding(encoding, str, specialquery))
139+
if (t.assert.snapshot) {
140+
t.assert.snapshot(res)
141+
} else {
142+
t.skip('Snapshots are not supported')
143+
}
144+
})
145+
}
146+
})

0 commit comments

Comments
 (0)