Skip to content

Commit 64e0c59

Browse files
committed
Make GraphemeCategory enum tree-shakable
1 parent e50d821 commit 64e0c59

4 files changed

Lines changed: 145 additions & 126 deletions

File tree

scripts/unicode.js

Lines changed: 61 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -430,28 +430,63 @@ let printTableRaw = (f, name, table, format) => {
430430

431431
/**
432432
* @param {WriteStream} f
433-
* @param {CategorizedUnicodeRange[]} breakTable
434-
* @param {string[]} breakCats
433+
* @param {CategorizedUnicodeRange[]} ranges
434+
* @param {string[]} cats
435+
* @param {string} catsModule
435436
* @param {string} name
436437
* @returns
437438
*/
438-
let printBreakModule = (f, breakTable, breakCats, name) => {
439-
let cats = ['Any', ...breakCats.toSorted()];
440-
439+
let printDataModule = (f, ranges, cats, catsModule, name) => {
441440
let capitalName = capitalize(name);
442441
let typeName = `${capitalName}Category`;
443-
let keyTypeName = `${typeName}Key`;
444442
let numTypeName = `${typeName}Num`;
445443
let rangeTypeName = `${typeName}Range`;
446444

445+
/** @type {Record<string, number>} */
446+
let inversed = {};
447+
cats.forEach((cat, idx) => {
448+
inversed[cat] = idx;
449+
});
450+
447451
f.write(preamble);
448452
f.write(`
449453
import { decodeUnicodeData } from './core.js';
450454
451455
/**
456+
* @typedef {import('./${catsModule}').${numTypeName}} ${numTypeName}
452457
* @typedef {import('./core.js').UnicodeDataEncoding} UnicodeDataEncoding
458+
* @typedef {import('./core.js').CategorizedUnicodeRange<${numTypeName}>} ${rangeTypeName}
453459
*/
460+
`,
461+
);
454462

463+
f.write(`
464+
/**
465+
* @type {${rangeTypeName}[]}
466+
*/
467+
export const ${name}_ranges = decodeUnicodeData(
468+
/** @type {UnicodeDataEncoding} */
469+
('${encodeUnicodeData(ranges.map(range => [range[0], range[1], 0]))}'),
470+
'${ranges.map(range => inversed[range[2]].toString(36)).join('')}',
471+
);
472+
`,
473+
);
474+
};
475+
476+
/**
477+
* @param {WriteStream} f
478+
* @param {string[]} cats
479+
* @param {string} name
480+
* @returns
481+
*/
482+
let printCategoryModule = (f, cats, name) => {
483+
let capitalName = capitalize(name);
484+
let typeName = `${capitalName}Category`;
485+
let keyTypeName = `${typeName}Key`;
486+
let numTypeName = `${typeName}Num`;
487+
488+
f.write(preamble);
489+
f.write(`
455490
/**
456491
`,
457492
);
@@ -471,13 +506,6 @@ import { decodeUnicodeData } from './core.js';
471506

472507
f.write(`
473508
/**
474-
* @typedef {import('./core.js').CategorizedUnicodeRange<${numTypeName}>} ${rangeTypeName}
475-
*/
476-
`,
477-
);
478-
479-
f.write(`
480-
/**
481509
* @typedef {(
482510
`,
483511
);
@@ -489,33 +517,15 @@ import { decodeUnicodeData } from './core.js';
489517

490518
f.write(`
491519
/**
492-
* Grapheme category enum
493-
*
494-
* Note:
495-
* The object isn't actually frozen
496-
* because using \`Object.freeze\` increases 800 bytes on Brotli compression.
497-
*
498-
* @type {Readonly<Record<${keyTypeName}, ${numTypeName}>>}
520+
* ${capitalName}_Break property values
499521
*/
500-
export const ${typeName} = {
522+
export const ${typeName} = /** @type {const} */ ({
501523
`.trimStart(),
502524
);
503525
for (let cat of cats) {
504526
f.write(` ${cat}: ${inversed[cat]},\n`);
505527
}
506-
f.write('};\n');
507-
508-
f.write(`
509-
/**
510-
* @type {${rangeTypeName}[]}
511-
*/
512-
export const ${name}_ranges = decodeUnicodeData(
513-
/** @type {UnicodeDataEncoding} */
514-
('${encodeUnicodeData(breakTable.map(row => [row[0], row[1], 0]))}'),
515-
'${breakTable.map(row => inversed[row[2]].toString(36)).join('')}',
516-
);
517-
`,
518-
);
528+
f.write('});\n');
519529
};
520530

521531
/**
@@ -865,12 +875,27 @@ let graphemeTableOptimized = graphemeTable.filter(([from, to, cat]) => {
865875
return true;
866876
});
867877

878+
let graphemeCategories =
879+
['Any', ...Object.keys(graphemeCats).concat(['Extended_Pictographic']).toSorted()];
880+
881+
let graphemCatsModule = '_grapheme_categories.js'
882+
883+
await emitSrc(
884+
graphemCatsModule,
885+
async f => printCategoryModule(
886+
f,
887+
graphemeCategories,
888+
'grapheme',
889+
),
890+
);
891+
868892
await emitSrc(
869893
'_grapheme_data.js',
870-
async f => printBreakModule(
894+
async f => printDataModule(
871895
f,
872896
graphemeTableOptimized,
873-
Object.keys(graphemeCats).concat(['Extended_Pictographic']),
897+
graphemeCategories,
898+
graphemCatsModule,
874899
'grapheme',
875900
),
876901
);

src/_grapheme_categories.js

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
// The following code was generated by "scripts/unicode.js",
2+
// DO NOT EDIT DIRECTLY.
3+
//
4+
// @ts-check
5+
6+
/**
7+
* @typedef {0} GC_Any
8+
* @typedef {1} GC_CR
9+
* @typedef {2} GC_Control
10+
* @typedef {3} GC_Extend
11+
* @typedef {4} GC_Extended_Pictographic
12+
* @typedef {5} GC_L
13+
* @typedef {6} GC_LF
14+
* @typedef {7} GC_LV
15+
* @typedef {8} GC_LVT
16+
* @typedef {9} GC_Prepend
17+
* @typedef {10} GC_Regional_Indicator
18+
* @typedef {11} GC_SpacingMark
19+
* @typedef {12} GC_T
20+
* @typedef {13} GC_V
21+
* @typedef {14} GC_ZWJ
22+
* @typedef {(
23+
* | GC_Any
24+
* | GC_CR
25+
* | GC_Control
26+
* | GC_Extend
27+
* | GC_Extended_Pictographic
28+
* | GC_L
29+
* | GC_LF
30+
* | GC_LV
31+
* | GC_LVT
32+
* | GC_Prepend
33+
* | GC_Regional_Indicator
34+
* | GC_SpacingMark
35+
* | GC_T
36+
* | GC_V
37+
* | GC_ZWJ
38+
* )} GraphemeCategoryNum
39+
*/
40+
41+
/**
42+
* @typedef {(
43+
* | 'Any'
44+
* | 'CR'
45+
* | 'Control'
46+
* | 'Extend'
47+
* | 'Extended_Pictographic'
48+
* | 'L'
49+
* | 'LF'
50+
* | 'LV'
51+
* | 'LVT'
52+
* | 'Prepend'
53+
* | 'Regional_Indicator'
54+
* | 'SpacingMark'
55+
* | 'T'
56+
* | 'V'
57+
* | 'ZWJ'
58+
* )} GraphemeCategoryKey
59+
*/
60+
61+
/**
62+
* Grapheme_Break property values
63+
*/
64+
export const GraphemeCategory = /** @type {const} */ ({
65+
Any: 0,
66+
CR: 1,
67+
Control: 2,
68+
Extend: 3,
69+
Extended_Pictographic: 4,
70+
L: 5,
71+
LF: 6,
72+
LV: 7,
73+
LVT: 8,
74+
Prepend: 9,
75+
Regional_Indicator: 10,
76+
SpacingMark: 11,
77+
T: 12,
78+
V: 13,
79+
ZWJ: 14,
80+
});

src/_grapheme_data.js

Lines changed: 1 addition & 85 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/grapheme.js

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
// @ts-check
1515

1616
import { findUnicodeRangeIndex } from './core.js';
17-
import { GraphemeCategory, grapheme_ranges } from './_grapheme_data.js';
17+
import { grapheme_ranges } from './_grapheme_data.js';
1818
import { consonant_ranges } from './_incb_data.js';
1919

20+
export { GraphemeCategory } from './_grapheme_categories.js';
21+
2022
/**
21-
* @typedef {import('./_grapheme_data.js').GC_Any} GC_Any
22-
*
2323
* @typedef {import('./_grapheme_data.js').GraphemeCategoryNum} GraphemeCategoryNum
2424
* @typedef {import('./_grapheme_data.js').GraphemeCategoryRange} GraphemeCategoryRange
2525
*
@@ -31,8 +31,6 @@ import { consonant_ranges } from './_incb_data.js';
3131
* @typedef {import('./core.js').Segmenter<GraphemeSegmentExtra>} GraphemeSegmenter
3232
*/
3333

34-
export { GraphemeCategory };
35-
3634
const BMP_MAX = 0xFFFF;
3735

3836
/**

0 commit comments

Comments
 (0)