|
| 1 | +import '@exodus/bytes/encoding.js' |
| 2 | +import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js' |
| 3 | +import { describe, test } from 'node:test' |
| 4 | +import { labels } from './encoding/fixtures/encodings.cjs' |
| 5 | + |
| 6 | +const jsuri = ' "%<>[\\]^`{|}' // https://tc39.es/ecma262/#sec-encodeuri-uri |
| 7 | +const jsuricomponent = ' "#$%&+,/:;<=>?@[\\]^`{|}' // https://tc39.es/ecma262/#sec-encodeuricomponent-uricomponent |
| 8 | +const fragment = ' "<>`' // https://url.spec.whatwg.org/#fragment-percent-encode-set |
| 9 | +const query = ' "#<>' // https://url.spec.whatwg.org/#query-percent-encode-set |
| 10 | +const specialquery = ' "#\'<>' // https://url.spec.whatwg.org/#special-query-percent-encode-set |
| 11 | +const path = ' "#<>?^`{}' // https://url.spec.whatwg.org/#path-percent-encode-set |
| 12 | +const userinfo = ' "#/:;<=>?@[\\]^`{|}' // https://url.spec.whatwg.org/#userinfo-percent-encode-set |
| 13 | +const component = ' "#$%&+,/:;<=>?@[\\]^`{|}' // https://url.spec.whatwg.org/#component-percent-encode-set |
| 14 | +const form = ' !"#$%&\'()+,/:;<=>?@[\\]^`{|}~' // https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set |
| 15 | + |
| 16 | +const sets = ['', userinfo, jsuri, jsuricomponent] |
| 17 | +const invalid = ['replacement', 'utf-16le', 'utf-16be'] // https://encoding.spec.whatwg.org/#get-an-encoder |
| 18 | + |
| 19 | +const slowEngine = |
| 20 | + process.env.EXODUS_TEST_PLATFORM === 'quickjs' || |
| 21 | + process.env.EXODUS_TEST_PLATFORM === 'xs' || |
| 22 | + process.env.EXODUS_TEST_PLATFORM === 'engine262' |
| 23 | + |
| 24 | +test('perncent-encode sets coherence', (t) => { |
| 25 | + const eq = (a, b) => t.assert.deepStrictEqual([...a], [...b].sort()) |
| 26 | + // https://tc39.es/ecma262/#sec-encodeuri-uri step 2 |
| 27 | + eq(jsuricomponent, jsuri + ';/?:@&=+$,#') |
| 28 | + // https://url.spec.whatwg.org/#fragment-percent-encode-set |
| 29 | + eq(fragment, String.fromCharCode(0x20, 0x22, 0x3c, 0x3e, 0x60)) |
| 30 | + // https://url.spec.whatwg.org/#query-percent-encode-set |
| 31 | + eq(query, String.fromCharCode(0x20, 0x22, 0x23, 0x3c, 0x3e)) |
| 32 | + // https://url.spec.whatwg.org/#special-query-percent-encode-set |
| 33 | + eq(specialquery, query + String.fromCharCode(0x27)) |
| 34 | + // https://url.spec.whatwg.org/#path-percent-encode-set |
| 35 | + eq(path, query + String.fromCharCode(0x3f, 0x5e, 0x60, 0x7b, 0x7d)) |
| 36 | + // https://url.spec.whatwg.org/#userinfo-percent-encode-set |
| 37 | + eq(userinfo, path + String.fromCharCode(0x2f, 0x3a, 0x3b, 0x3d, 0x40, 0x5b, 0x5c, 0x5d, 0x7c)) |
| 38 | + // https://url.spec.whatwg.org/#component-percent-encode-set |
| 39 | + eq(component, userinfo + String.fromCharCode(0x24, 0x25, 0x26, 0x2b, 0x2c)) |
| 40 | + t.assert.strictEqual(jsuricomponent, component) |
| 41 | + // https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set |
| 42 | + eq(form, component + String.fromCharCode(0x21, 0x27, 0x28, 0x29, 0x7e)) |
| 43 | +}) |
| 44 | + |
| 45 | +describe('percent-encode after encoding', () => { |
| 46 | + const f = percentEncodeAfterEncoding |
| 47 | + |
| 48 | + // https://url.spec.whatwg.org/#example-percent-encode-operations |
| 49 | + test('examples from spec', (t) => { |
| 50 | + // At https://github.com/whatwg/url/commit/5c50135f8304dc8cb9bb49367b364699cc5bb031 |
| 51 | + t.assert.strictEqual(f('Shift_JIS', ' ', userinfo), '%20') |
| 52 | + t.assert.strictEqual(f('Shift_JIS', '≡', userinfo), '%81%DF') |
| 53 | + t.assert.strictEqual(f('Shift_JIS', '‽', userinfo), '%26%238253%3B') |
| 54 | + t.assert.strictEqual(f('ISO-2022-JP', '¥', userinfo), '%1B(J%5C%1B(B') |
| 55 | + t.assert.strictEqual( |
| 56 | + f('Shift_JIS', '1+1 ≡ 2%20‽', userinfo, true), |
| 57 | + '1+1+%81%DF+2%20%26%238253%3B' |
| 58 | + ) |
| 59 | + t.assert.strictEqual(f('UTF-8', '≡', userinfo), '%E2%89%A1') |
| 60 | + t.assert.strictEqual(f('UTF-8', '‽', userinfo), '%E2%80%BD') |
| 61 | + t.assert.strictEqual(f('UTF-8', 'Say what‽', userinfo), 'Say%20what%E2%80%BD') |
| 62 | + |
| 63 | + // At https://github.com/whatwg/url/pull/896 |
| 64 | + t.assert.strictEqual(f('Shift_JIS', ' ', specialquery), '%20') |
| 65 | + t.assert.strictEqual(f('Shift_JIS', '≡', specialquery), '%81%DF') |
| 66 | + t.assert.strictEqual(f('Shift_JIS', '‽', specialquery), '%26%238253%3B') |
| 67 | + t.assert.strictEqual(f('ISO-2022-JP', '¥', specialquery), '%1B(J\\%1B(B') |
| 68 | + t.assert.strictEqual( |
| 69 | + f('Shift_JIS', '1+1 ≡ 2%20‽', form, true), |
| 70 | + '1%2B1+%81%DF+2%2520%26%238253%3B' |
| 71 | + ) |
| 72 | + t.assert.strictEqual(f('UTF-8', '≡', userinfo), '%E2%89%A1') |
| 73 | + t.assert.strictEqual(f('UTF-8', '‽', userinfo), '%E2%80%BD') |
| 74 | + t.assert.strictEqual(f('UTF-8', 'Say what‽', userinfo), 'Say%20what%E2%80%BD') |
| 75 | + }) |
| 76 | + |
| 77 | + // https://encoding.spec.whatwg.org/#get-an-encoder |
| 78 | + describe('throws on unknown, utf-16 and replacement', () => { |
| 79 | + for (const encoding of [...invalid, 'what', 'UTF-16', 'unicode']) { |
| 80 | + test(encoding, (t) => { |
| 81 | + for (const set of sets) { |
| 82 | + t.assert.throws(() => f(encoding, '', set), /encoding/) |
| 83 | + t.assert.throws(() => f(encoding, ' ', set), /encoding/) |
| 84 | + t.assert.throws(() => f(encoding, ' ', set, true), /encoding/) |
| 85 | + t.assert.throws(() => f(encoding, '\uFFFD', set, true), /encoding/) |
| 86 | + } |
| 87 | + }) |
| 88 | + } |
| 89 | + }) |
| 90 | + |
| 91 | + describe('all valid encodings are recognized', () => { |
| 92 | + for (const encoding of labels) { |
| 93 | + if (invalid.includes(encoding)) continue |
| 94 | + test(encoding, (t) => { |
| 95 | + for (const set of sets) { |
| 96 | + t.assert.strictEqual(f(encoding, '', set), '') |
| 97 | + // Even non-ASCII encodings passthrough on a lone space |
| 98 | + t.assert.strictEqual(f(encoding, ' ', set), set.includes(' ') ? '%20' : ' ') |
| 99 | + t.assert.strictEqual(f(encoding, ' ', set, true), '+') |
| 100 | + } |
| 101 | + }) |
| 102 | + } |
| 103 | + }) |
| 104 | + |
| 105 | + describe('replaces non-scalarvalue', () => { |
| 106 | + for (const encoding of labels) { |
| 107 | + if (invalid.includes(encoding)) continue |
| 108 | + test(encoding, (t) => { |
| 109 | + const a = f(encoding, '\uFFFD', userinfo) |
| 110 | + const b = f(encoding, '\uFFFD', jsuri) |
| 111 | + for (let cp = 0xd8_00; cp < 0xe0_00; cp++) { |
| 112 | + const s = String.fromCodePoint(cp) |
| 113 | + t.assert.strictEqual(f(encoding, s, userinfo), a) |
| 114 | + t.assert.strictEqual(f(encoding, s, jsuri), b) |
| 115 | + } |
| 116 | + }) |
| 117 | + } |
| 118 | + }) |
| 119 | + |
| 120 | + describe('encodeURI / encodeURIComponent', () => { |
| 121 | + describe('ASCII supersets', (t) => { |
| 122 | + const ascii = Array.from({ length: 128 }, (_, i) => String.fromCharCode(i)).join('') |
| 123 | + for (const encoding of labels) { |
| 124 | + if (invalid.includes(encoding)) continue |
| 125 | + if (encoding === 'iso-2022-jp') continue // not an ASCII superset |
| 126 | + test(encoding, (t) => { |
| 127 | + t.assert.strictEqual(f(encoding, ascii, jsuricomponent), encodeURIComponent(ascii)) |
| 128 | + t.assert.strictEqual(f(encoding, ascii, jsuri), encodeURI(ascii)) |
| 129 | + for (let i = 0; i < 128; i++) { |
| 130 | + const s = String.fromCharCode(i) |
| 131 | + t.assert.strictEqual(f(encoding, s, jsuricomponent), encodeURIComponent(s)) |
| 132 | + t.assert.strictEqual(f(encoding, s, jsuri), encodeURI(s)) |
| 133 | + } |
| 134 | + }) |
| 135 | + } |
| 136 | + }) |
| 137 | + |
| 138 | + test('UTF-8: full Unicode', (t) => { |
| 139 | + const MAX = slowEngine ? 0x1_ff_ff : 0x10_ff_ff // Max Unicode codepoint |
| 140 | + for (let cp = 0; cp <= MAX; cp++) { |
| 141 | + if (cp >= 0xd8_00 && cp < 0xe0_00) continue |
| 142 | + const s = String.fromCodePoint(cp) |
| 143 | + t.assert.strictEqual(f('utf8', s, jsuricomponent), encodeURIComponent(s)) |
| 144 | + t.assert.strictEqual(f('utf8', s, jsuri), encodeURI(s)) |
| 145 | + } |
| 146 | + }) |
| 147 | + }) |
| 148 | +}) |
0 commit comments