@@ -3,6 +3,7 @@ import { toBase64url } from '@exodus/bytes/base64.js'
33import { utf16fromString } from '@exodus/bytes/utf16.js'
44import { join } from 'node:path'
55import assert from 'node:assert/strict'
6+ import { gzipSync } from 'node:zlib'
67
78// const splitChunks = new Set(['jis0208', 'jis0212', 'big5']) // pretty-print into chunks, non-continious anyway
89
@@ -42,7 +43,7 @@ for (const file of readdirSync(import.meta.dirname)) {
4243 const match = file . match ( / ^ i n d e x - ( [ a - z 0 - 9 - ] + ) \. t x t $ / u)
4344 if ( ! match ) continue
4445 const encoding = match [ 1 ]
45- if ( encoding . endsWith ( '-ranges' ) ) continue
46+ if ( encoding . endsWith ( '-ranges' ) || encoding === 'iso-2022-jp-katakana ' ) continue
4647 const non16bit = encoding === 'big5'
4748 const text = readFileSync ( join ( import . meta. dirname , file ) , 'utf8' )
4849 let max = 0
@@ -127,6 +128,7 @@ function encodeString(s, lastconseq) {
127128 return str . length * 1.5 < partsstr . length && str . length < partsstr . length - 3 ? [ str ] : parts
128129}
129130
131+ let final = '{\n'
130132for ( const [ encoding , chars ] of Object . entries ( encodings ) ) {
131133 const list = [ ]
132134 let str = chars
@@ -222,7 +224,13 @@ for (const [encoding, chars] of Object.entries(encodings)) {
222224 if ( tmp . length > 0 ) list2 . push ( tmp )
223225
224226 const dump = list2 . join ( ',\n ' )
225- console . log ( `const ${ encoding } = [\n ${ dump } \n] \n`)
227+ final += ` ${ JSON . stringify ( encoding ) } : [\n ${ dump } \n ], \n`
226228}
227229
228- console . error ( [ ...stats ] . sort ( ( a , b ) => b [ 1 ] - a [ 1 ] ) )
230+ final += '}'
231+
232+ // console.error([...stats].sort((a, b) => b[1] - a[1]))
233+
234+ console . log ( final )
235+ console . error ( `Raw size: ${ final . length } ` )
236+ console . error ( `Gzip size: ${ gzipSync ( final ) . length } ` )
0 commit comments