33
44import { utf16toString , utf16toStringLoose } from '@exodus/bytes/utf16.js'
55import { utf8fromStringLoose , utf8toString , utf8toStringLoose } from '@exodus/bytes/utf8.js'
6- import { createSinglebyteDecoder } from '@exodus/bytes/single-byte.js'
6+ import { createSinglebyteDecoder , createSinglebyteEncoder } from '@exodus/bytes/single-byte.js'
77import labels from './encoding.labels.js'
88import { fromSource , getBOMEncoding , normalizeEncoding , E_ENCODING } from './encoding.api.js'
99import { unfinishedBytes , mergePrefix } from './encoding.util.js'
10+ import { percentEncoder } from './percent.js'
11+ import { encodeMap } from './single-byte.js'
12+ import { E_STRICT_UNICODE } from './utf8.js'
13+ import { E_STRING } from './_utils.js'
1014
1115export { labelToName , getBOMEncoding , normalizeEncoding } from './encoding.api.js'
1216
@@ -15,10 +19,11 @@ const E_MULTI =
1519 'Legacy multi-byte encodings are disabled in /encoding-lite.js, use /encoding.js for full encodings range support'
1620const replacementChar = '\uFFFD'
1721const multibyteSet = new Set ( [ 'big5' , 'euc-kr' , 'euc-jp' , 'iso-2022-jp' , 'shift_jis' , 'gbk' , 'gb18030' ] ) // prettier-ignore
18- let createMultibyteDecoder
22+ let createMultibyteDecoder , multibyteEncoder
1923
20- export function setMultibyteDecoder ( createDecoder ) {
24+ export function setMultibyte ( createDecoder , createEncoder ) {
2125 createMultibyteDecoder = createDecoder
26+ multibyteEncoder = createEncoder
2227}
2328
2429const define = ( obj , key , value ) => Object . defineProperty ( obj , key , { value, writable : false } )
@@ -311,3 +316,65 @@ export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
311316
312317 return createSinglebyteDecoder ( enc , true ) ( u8 )
313318}
319+
320+ // https://url.spec.whatwg.org/#string-percent-encode-after-encoding
321+ // Codepoints below 0x20, 0x7F specifically, and above 0x7F (non-ASCII) are always encoded
322+ // > A C0 control is a code point in the range U+0000 NULL to U+001F INFORMATION SEPARATOR ONE, inclusive.
323+ // > The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~).
324+ // Throws on non-well-formed (non scalar-value) strings as they are not supposed to be used here (and would be inconsistent)
325+ export function percentEncodeAfterEncoding ( encoding , input , percentEncodeSet , spaceAsPlus = false ) {
326+ const e = normalizeEncoding ( encoding )
327+ // Ref: https://encoding.spec.whatwg.org/#get-an-encoder
328+ if ( e === 'replacement' || e === 'utf16-le' || e === 'utf16-be' ) throw new RangeError ( E_ENCODING )
329+
330+ const percent = percentEncoder ( percentEncodeSet , spaceAsPlus )
331+ if ( e === 'utf-8' ) return percent ( utf8fromStringLoose ( input ) )
332+
333+ const multi = multibyteSet . has ( e )
334+ const fatal = multi ? multibyteEncoder ( e ) : createSinglebyteEncoder ( e )
335+ try {
336+ return percent ( fatal ( input ) )
337+ } catch { }
338+
339+ let res = ''
340+ let last = 0
341+ if ( multi ) {
342+ const escaping = multibyteEncoder ( e , ( cp , u , i ) => {
343+ if ( cp >= 0xd8_00 && cp < 0xe0_00 ) throw new SyntaxError ( E_STRICT_UNICODE )
344+ res += `${ percent ( u , last , i ) } %26%23${ cp } %3B` // &#cp;
345+ last = i
346+ return 0 // no bytes emitted
347+ } )
348+
349+ const u = escaping ( input ) // has side effects on res
350+ res += percent ( u , last )
351+ } else {
352+ if ( typeof input !== 'string' ) throw new TypeError ( E_STRING ) // all other paths have their own validation
353+ const m = encodeMap ( e )
354+ const len = input . length
355+ const u = new Uint8Array ( len )
356+ for ( let i = 0 ; i < len ; i ++ ) {
357+ const x = input . charCodeAt ( i )
358+ const b = m [ x ]
359+ if ( ! b && x ) {
360+ let cp = x
361+ if ( x >= 0xd8_00 && x < 0xe0_00 ) {
362+ if ( x >= 0xdc_00 || i + 1 === len ) throw new SyntaxError ( E_STRICT_UNICODE )
363+ const x1 = input . charCodeAt ( i + 1 )
364+ if ( x1 < 0xdc_00 || x1 >= 0xe0_00 ) throw new SyntaxError ( E_STRICT_UNICODE )
365+ cp = 0x1_00_00 + ( ( x1 & 0x3_ff ) | ( ( x & 0x3_ff ) << 10 ) )
366+ i ++
367+ }
368+
369+ res += `${ percent ( u , last , i ) } %26%23${ cp } %3B` // &#cp;
370+ last = i + 1 // skip current
371+ }
372+
373+ u [ i ] = b
374+ }
375+
376+ res += percent ( u , last )
377+ }
378+
379+ return res
380+ }
0 commit comments