Skip to content

Commit a701856

Browse files
committed
feat(bootstrap): add Unicode property escape transforms for --with-intl=none
- Integrate Unicode property escape transformations from babel-plugin-with-intl-none - Add transformUnicodePropertyEscapes() to esbuild-plugin-smol-transform - Transform \p{Default_Ignorable_Code_Point}, \p{Control}, \p{Format}, \p{Mark} to character class approximations - Remove /u and /v flags after transformation for ICU-free compatibility - Fixes MODULE_NOT_FOUND error in smol builds caused by Unicode regex requiring ICU This resolves the build failure where bootstrap-smol.js contained Unicode property escapes that require ICU support, but smol binaries use --with-intl=none to save 6-8MB.
1 parent 681d155 commit a701856

3 files changed

Lines changed: 178 additions & 83 deletions

File tree

packages/bootstrap/.config/esbuild-plugin-smol-transform.mjs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,105 @@ export function smolTransformPlugin() {
117117
}
118118
}
119119

120+
// Transform Unicode property escapes for --with-intl=none compatibility.
121+
// This is CRITICAL for smol builds which disable ICU to save 6-8MB.
122+
content = transformUnicodePropertyEscapes(content)
123+
120124
// Update the output content.
121125
output.contents = Buffer.from(content, 'utf8')
122126
}
123127
})
124128
},
125129
}
126130
}
131+
132+
/**
133+
* Transform Unicode property escapes in regex patterns for ICU-free environments.
134+
* Based on babel-plugin-with-intl-none.mjs transformations.
135+
*
136+
* @param {string} content - Source code to transform
137+
* @returns {string} Transformed source code
138+
*/
139+
function transformUnicodePropertyEscapes(content) {
140+
let transformed = content
141+
142+
// Map of Unicode property escapes to basic character class alternatives.
143+
const unicodePropertyMap = {
144+
__proto__: null,
145+
// Letter categories.
146+
'Letter': 'a-zA-Z',
147+
'L': 'a-zA-Z',
148+
'Alpha': 'a-zA-Z',
149+
'Alphabetic': 'a-zA-Z',
150+
// Number categories.
151+
'Number': '0-9',
152+
'N': '0-9',
153+
'Digit': '0-9',
154+
'Nd': '0-9',
155+
// Whitespace.
156+
'Space': '\\s',
157+
'White_Space': '\\s',
158+
// ASCII range.
159+
'ASCII': '\\x00-\\x7F',
160+
// Control characters (basic approximation).
161+
'Control': '\\x00-\\x1F\\x7F-\\x9F',
162+
'Cc': '\\x00-\\x1F\\x7F-\\x9F',
163+
// Format characters (approximate with zero-width space).
164+
'Format': '\\u200B-\\u200D\\uFEFF',
165+
'Cf': '\\u200B-\\u200D\\uFEFF',
166+
// Mark categories (combining marks - approximate).
167+
'Mark': '\\u0300-\\u036F',
168+
'M': '\\u0300-\\u036F',
169+
// Default_Ignorable_Code_Point (approximate with common invisibles).
170+
'Default_Ignorable_Code_Point': '\\u00AD\\u034F\\u061C\\u115F-\\u1160\\u17B4-\\u17B5\\u180B-\\u180D\\u200B-\\u200F\\u202A-\\u202E\\u2060-\\u206F\\u3164\\uFE00-\\uFE0F\\uFEFF\\uFFA0\\uFFF0-\\uFFF8',
171+
}
172+
173+
// Transform \p{Property} inside character classes [...].
174+
// Example: /[\p{Letter}\p{Number}]+/u → /[a-zA-Z0-9]+/
175+
transformed = transformed.replace(
176+
/\[([^\]]*\\p\{[^}]+\}[^\]]*)\]/g,
177+
(_match, charClass) => {
178+
let newCharClass = charClass
179+
180+
// Replace each \p{Property} with its character class equivalent.
181+
for (const [prop, replacement] of Object.entries(unicodePropertyMap)) {
182+
const escapedProp = prop.replace(/[\\{}]/g, '\\$&')
183+
newCharClass = newCharClass.replace(
184+
new RegExp(`\\\\p\\{${escapedProp}\\}`, 'g'),
185+
replacement,
186+
)
187+
}
188+
189+
return `[${newCharClass}]`
190+
},
191+
)
192+
193+
// Transform standalone \p{Property} (not inside character class).
194+
// Example: /\p{Letter}+/u → /[a-zA-Z]+/
195+
for (const [prop, replacement] of Object.entries(unicodePropertyMap)) {
196+
const escapedProp = prop.replace(/[\\{}]/g, '\\$&')
197+
// Match \p{Property} that's NOT inside square brackets.
198+
// This is a simplified approach - proper parsing would be better.
199+
transformed = transformed.replace(
200+
new RegExp(`\\\\p\\{${escapedProp}\\}`, 'g'),
201+
`[${replacement}]`,
202+
)
203+
}
204+
205+
// Remove /u and /v flags from regexes that used Unicode property escapes.
206+
// This is safe because we've replaced them with basic character classes.
207+
// Match regex literals: /pattern/flags
208+
transformed = transformed.replace(
209+
/\/([^/\\]|\\.)+\/([gimsuvy]+)/g,
210+
(match, _pattern, flags) => {
211+
// Only remove u/v flags if the regex originally had Unicode escapes.
212+
if (flags.includes('u') || flags.includes('v')) {
213+
const newFlags = flags.replace(/[uv]/g, '')
214+
return match.slice(0, -flags.length) + newFlags
215+
}
216+
return match
217+
},
218+
)
219+
220+
return transformed
221+
}

packages/bootstrap/dist/bootstrap-npm.js

Lines changed: 41 additions & 41 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/bootstrap/dist/bootstrap-smol.js

Lines changed: 42 additions & 42 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)