11import { fromBase64url } from '@exodus/bytes/base64.js'
22import { utf16toString } from '@exodus/bytes/utf16.js'
33import loadEncodings from './multi-byte.encodings.cjs'
4- import { to16input } from './utf16.js'
54
65export const sizes = {
76 jis0208 : 11_104 ,
@@ -40,7 +39,7 @@ function loadBase64(str) {
4039 return y
4140}
4241
43- function unwrap ( res , t , pos , highMode = false ) {
42+ function unwrap ( res , t , pos ) {
4443 let code = 0
4544 for ( let i = 0 ; i < t . length ; i ++ ) {
4645 let x = t [ i ]
@@ -55,35 +54,26 @@ function unwrap(res, t, pos, highMode = false) {
5554 code += t [ ++ i ]
5655 }
5756
58- if ( highMode ) {
59- for ( let k = 0 ; k < x ; k ++ , pos ++ , code ++ ) {
60- if ( code <= 0xff_ff ) {
61- res [ pos ] = code
62- } else {
63- const c = String . fromCodePoint ( code )
64- res [ pos ] = ( c . charCodeAt ( 0 ) << 16 ) | c . charCodeAt ( 1 )
65- }
57+ for ( let k = 0 ; k < x ; k ++ , pos ++ , code ++ ) {
58+ if ( code <= 0xff_ff ) {
59+ res [ pos ] = code
60+ } else {
61+ const c = String . fromCodePoint ( code )
62+ res [ pos ] = ( c . charCodeAt ( 0 ) << 16 ) | c . charCodeAt ( 1 )
6663 }
67- } else {
68- for ( let k = 0 ; k < x ; k ++ , pos ++ , code ++ ) res [ pos ] = code
6964 }
7065 }
7166 } else if ( x [ 0 ] === '$' && Object . hasOwn ( indices , x ) ) {
72- pos = unwrap ( res , indices [ x ] , pos , highMode ) // self-reference using shared chunks
73- } else if ( highMode ) {
74- const s = [ ... utf16toString ( loadBase64 ( x ) , 'uint8-le' ) ] // splits by codepoints
75- let c
76- for ( let i = 0 ; i < s . length ; ) {
77- c = s [ i ++ ]
67+ pos = unwrap ( res , indices [ x ] , pos ) // self-reference using shared chunks
68+ } else {
69+ let last
70+ // splits by codepoints
71+ for ( const c of utf16toString ( loadBase64 ( x ) , 'uint8-le' ) ) {
72+ last = c
7873 res [ pos ++ ] = c . length === 1 ? c . charCodeAt ( 0 ) : ( c . charCodeAt ( 0 ) << 16 ) | c . charCodeAt ( 1 )
7974 }
8075
81- code = c . codePointAt ( 0 ) + 1
82- } else {
83- const u16 = to16input ( loadBase64 ( x ) , true ) // data is little-endian
84- res . set ( u16 , pos )
85- pos += u16 . length
86- code = u16 [ u16 . length - 1 ] + 1
76+ code = last . codePointAt ( 0 ) + 1
8777 }
8878 }
8979
@@ -108,9 +98,8 @@ export function getTable(id) {
10898 let a = - 1
10999 res = new Uint16Array ( indices [ id ] . map ( ( x ) => ( a += x + 1 ) ) )
110100 } else if ( id === 'big5' ) {
111- if ( ! Object . hasOwn ( sizes , id ) ) throw new Error ( 'Unknown encoding' )
112- res = new Uint32Array ( sizes [ id ] ) // array of strings or undefined
113- unwrap ( res , indices [ id ] , 0 , true )
101+ res = new Uint32Array ( sizes [ id ] ) // single or double charcodes
102+ unwrap ( res , indices [ id ] , 0 )
114103 // Pointer code updates are embedded into the table
115104 // These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157
116105 res [ 1133 ] = 0xca_03_04
@@ -120,7 +109,7 @@ export function getTable(id) {
120109 } else {
121110 if ( ! Object . hasOwn ( sizes , id ) ) throw new Error ( 'Unknown encoding' )
122111 res = new Uint16Array ( sizes [ id ] )
123- unwrap ( res , indices [ id ] , 0 , false )
112+ unwrap ( res , indices [ id ] , 0 )
124113 }
125114
126115 indices [ id ] = null // gc
0 commit comments