@@ -117,10 +117,105 @@ export function smolTransformPlugin() {
117117 }
118118 }
119119
120+ // Transform Unicode property escapes for --with-intl=none compatibility.
121+ // This is CRITICAL for smol builds which disable ICU to save 6-8MB.
122+ content = transformUnicodePropertyEscapes ( content )
123+
120124 // Update the output content.
121125 output . contents = Buffer . from ( content , 'utf8' )
122126 }
123127 } )
124128 } ,
125129 }
126130}
131+
132+ /**
133+ * Transform Unicode property escapes in regex patterns for ICU-free environments.
134+ * Based on babel-plugin-with-intl-none.mjs transformations.
135+ *
136+ * @param {string } content - Source code to transform
137+ * @returns {string } Transformed source code
138+ */
139+ function transformUnicodePropertyEscapes ( content ) {
140+ let transformed = content
141+
142+ // Map of Unicode property escapes to basic character class alternatives.
143+ const unicodePropertyMap = {
144+ __proto__ : null ,
145+ // Letter categories.
146+ 'Letter' : 'a-zA-Z' ,
147+ 'L' : 'a-zA-Z' ,
148+ 'Alpha' : 'a-zA-Z' ,
149+ 'Alphabetic' : 'a-zA-Z' ,
150+ // Number categories.
151+ 'Number' : '0-9' ,
152+ 'N' : '0-9' ,
153+ 'Digit' : '0-9' ,
154+ 'Nd' : '0-9' ,
155+ // Whitespace.
156+ 'Space' : '\\s' ,
157+ 'White_Space' : '\\s' ,
158+ // ASCII range.
159+ 'ASCII' : '\\x00-\\x7F' ,
160+ // Control characters (basic approximation).
161+ 'Control' : '\\x00-\\x1F\\x7F-\\x9F' ,
162+ 'Cc' : '\\x00-\\x1F\\x7F-\\x9F' ,
163+ // Format characters (approximate with zero-width space).
164+ 'Format' : '\\u200B-\\u200D\\uFEFF' ,
165+ 'Cf' : '\\u200B-\\u200D\\uFEFF' ,
166+ // Mark categories (combining marks - approximate).
167+ 'Mark' : '\\u0300-\\u036F' ,
168+ 'M' : '\\u0300-\\u036F' ,
169+ // Default_Ignorable_Code_Point (approximate with common invisibles).
170+ 'Default_Ignorable_Code_Point' : '\\u00AD\\u034F\\u061C\\u115F-\\u1160\\u17B4-\\u17B5\\u180B-\\u180D\\u200B-\\u200F\\u202A-\\u202E\\u2060-\\u206F\\u3164\\uFE00-\\uFE0F\\uFEFF\\uFFA0\\uFFF0-\\uFFF8' ,
171+ }
172+
173+ // Transform \p{Property} inside character classes [...].
174+ // Example: /[\p{Letter}\p{Number}]+/u → /[a-zA-Z0-9]+/
175+ transformed = transformed . replace (
176+ / \[ ( [ ^ \] ] * \\ p \{ [ ^ } ] + \} [ ^ \] ] * ) \] / g,
177+ ( _match , charClass ) => {
178+ let newCharClass = charClass
179+
180+ // Replace each \p{Property} with its character class equivalent.
181+ for ( const [ prop , replacement ] of Object . entries ( unicodePropertyMap ) ) {
182+ const escapedProp = prop . replace ( / [ \\ { } ] / g, '\\$&' )
183+ newCharClass = newCharClass . replace (
184+ new RegExp ( `\\\\p\\{${ escapedProp } \\}` , 'g' ) ,
185+ replacement ,
186+ )
187+ }
188+
189+ return `[${ newCharClass } ]`
190+ } ,
191+ )
192+
193+ // Transform standalone \p{Property} (not inside character class).
194+ // Example: /\p{Letter}+/u → /[a-zA-Z]+/
195+ for ( const [ prop , replacement ] of Object . entries ( unicodePropertyMap ) ) {
196+ const escapedProp = prop . replace ( / [ \\ { } ] / g, '\\$&' )
197+ // Match \p{Property} that's NOT inside square brackets.
198+ // This is a simplified approach - proper parsing would be better.
199+ transformed = transformed . replace (
200+ new RegExp ( `\\\\p\\{${ escapedProp } \\}` , 'g' ) ,
201+ `[${ replacement } ]` ,
202+ )
203+ }
204+
205+ // Remove /u and /v flags from regexes that used Unicode property escapes.
206+ // This is safe because we've replaced them with basic character classes.
207+ // Match regex literals: /pattern/flags
208+ transformed = transformed . replace (
209+ / \/ ( [ ^ / \\ ] | \\ .) + \/ ( [ g i m s u v y ] + ) / g,
210+ ( match , _pattern , flags ) => {
211+ // Only remove u/v flags if the regex originally had Unicode escapes.
212+ if ( flags . includes ( 'u' ) || flags . includes ( 'v' ) ) {
213+ const newFlags = flags . replace ( / [ u v ] / g, '' )
214+ return match . slice ( 0 , - flags . length ) + newFlags
215+ }
216+ return match
217+ } ,
218+ )
219+
220+ return transformed
221+ }
0 commit comments