@@ -207,8 +207,10 @@ export function correctTranslatedContentStrings(
207207 if ( context . code === 'zh' ) {
208208 content = content . replaceAll ( '{% 数据variables' , '{% data variables' )
209209 content = content . replaceAll ( '{% 数据 variables' , '{% data variables' )
210- content = content . replaceAll ( '{% 数据可重用' , '{% data reusables' )
210+ // Order matters: the more specific `s.` variant must run first to
211+ // avoid the broader rule producing a double-s (`reusabless`).
211212 content = content . replaceAll ( '{% 数据可重用s.' , '{% data reusables.' )
213+ content = content . replaceAll ( '{% 数据可重用' , '{% data reusables' )
212214 content = content . replaceAll ( '{% 其他 %}' , '{% else %}' )
213215 content = content . replaceAll ( '{% 原始 %}' , '{% raw %}' )
214216 // Chinese `如果` = "if": `{ 如果 X %}` → `{% if X %}`
@@ -224,6 +226,7 @@ export function correctTranslatedContentStrings(
224226
225227 if ( context . code === 'ru' ) {
226228 content = content . replaceAll ( '[«AUTOTITLE»](' , '[AUTOTITLE](' )
229+ content = content . replaceAll ( '[АВТОЗАГОЛОВОК](' , '[AUTOTITLE](' )
227230 content = content . replaceAll ( '{% данных variables' , '{% data variables' )
228231 content = content . replaceAll ( '{% данных, variables' , '{% data variables' )
229232 content = content . replaceAll ( '{% данными variables' , '{% data variables' )
@@ -265,7 +268,15 @@ export function correctTranslatedContentStrings(
265268 content = content . replaceAll ( '{% конечным %}' , '{% endif %}' )
266269 // `{% конец %}` after `{% raw %}` means `{% endraw %}`, not `{% endif %}`.
267270 // Handle this BEFORE the generic `{% конец %}` → `{% endif %}` fallback.
268- content = content . replace ( / ( \{ % r a w % \} [ ^ ] * ?) \{ % к о н е ц % \} / g, '$1{% endraw %}' )
271+ // We use a split-based approach instead of `[^]*?` regex to avoid
272+ // catastrophic backtracking on large content (~20s on 150KB inputs).
273+ if ( content . includes ( '{% конец %}' ) && content . includes ( '{% raw %}' ) ) {
274+ const parts = content . split ( '{% raw %}' )
275+ for ( let i = 1 ; i < parts . length ; i ++ ) {
276+ parts [ i ] = parts [ i ] . replace ( '{% конец %}' , '{% endraw %}' )
277+ }
278+ content = parts . join ( '{% raw %}' )
279+ }
269280 content = content . replaceAll ( '{% конец %}' , '{% endif %}' )
270281 // Cyrillic transliteration of `elsif` (lossy → else, since version param is lost)
271282 content = content . replaceAll ( '{% Эльсиф %}' , '{% else %}' )
@@ -314,11 +325,21 @@ export function correctTranslatedContentStrings(
314325 // Rearranged `{% data VARIABLE_PATH %}` → `VARIABLE_PATH %данн... {% }`
315326 // The translation moved `data` (as `данных`/`данными`/`данные`) after the path
316327 // and split `% }` into `{% }` or `{% }`. Reconstruct the original tag.
317- content = content . replace ( / ( [ \w . - ] + \. [ \w . - ] + \. [ \w _ ] + ) % д а н н \w * [ ^ { ] * \{ % \s + \} / g, '{% data $1 %}' )
318- // Variant where `% }` appears BEFORE `данных`: `PATH % }данных {% .`
319- content = content . replace ( / ( [ \w . - ] + \. [ \w . - ] + \. [ \w _ ] + ) % \} д а н н \w * \s * \{ % \s * \. / g, '{% data $1 %}.' )
320- // Variant where path precedes `%{% data }`: `PATH %{% data }.`
321- content = content . replace ( / ( [ \w . - ] + \. [ \w . - ] + \. [ \w _ ] + ) % \{ % d a t a \s + \} / g, '{% data $1 %}' )
328+ // Guard: these regexes start with [\w.-]+ which backtracks O(n²) on large word-char strings.
329+ if ( content . includes ( '%данн' ) ) {
330+ content = content . replace (
331+ / ( [ \w . - ] + \. [ \w . - ] + \. [ \w _ ] + ) % д а н н \w * [ ^ { ] * \{ % \s + \} / g,
332+ '{% data $1 %}' ,
333+ )
334+ content = content . replace (
335+ / ( [ \w . - ] + \. [ \w . - ] + \. [ \w _ ] + ) % \} д а н н \w * \s * \{ % \s * \. / g,
336+ '{% data $1 %}.' ,
337+ )
338+ }
339+ if ( content . includes ( '%{% data' ) ) {
340+ // Variant where path precedes `%{% data }`: `PATH %{% data }.`
341+ content = content . replace ( / ( [ \w . - ] + \. [ \w . - ] + \. [ \w _ ] + ) % \{ % d a t a \s + \} / g, '{% data $1 %}' )
342+ }
322343
323344 // Translated octicon names
324345 content = content . replaceAll (
@@ -400,6 +421,10 @@ export function correctTranslatedContentStrings(
400421 content = content . replaceAll ( '{% Tipp %}' , '{% tip %}' )
401422 content = content . replaceAll ( '{%- Tipp %}' , '{%- tip %}' )
402423 content = content . replaceAll ( '{%- Tipp -%}' , '{%- tip -%}' )
424+ // Translated for-loop keywords: `für VARNAME in COLLECTION`
425+ content = content . replace ( / \{ % - ? f ü r ( \w + ) i n / g, ( match ) => {
426+ return match . replace ( 'für' , 'for' )
427+ } )
403428 }
404429
405430 // --- Generic fixes (all languages) ---
@@ -472,7 +497,15 @@ export function correctTranslatedContentStrings(
472497 content = content . replaceAll ( '{% %} de dados variables.' , '{% data variables.' )
473498
474499 // Fix `{% %}` used as `{% endraw %}` (follows raw content with Liquid expressions).
475- content = content . replace ( / ( \{ % r a w % \} [ ^ ] * ?) \{ % % \} / g, '$1{% endraw %}' )
500+ // We use a split-based approach instead of `[^]*?` regex to avoid
501+ // catastrophic backtracking on large content (~20s on 150KB inputs).
502+ if ( content . includes ( '{% %}' ) && content . includes ( '{% raw %}' ) ) {
503+ const parts = content . split ( '{% raw %}' )
504+ for ( let i = 1 ; i < parts . length ; i ++ ) {
505+ parts [ i ] = parts [ i ] . replace ( '{% %}' , '{% endraw %}' )
506+ }
507+ content = parts . join ( '{% raw %}' )
508+ }
476509
477510 // Fix `{% %}` used as `{% else %}` when it appears between ifversion and
478511 // endif on the same line: `{% ifversion X %}A{% %}B{% endif %}`.
@@ -509,33 +542,49 @@ export function correctTranslatedContentStrings(
509542 // Recover linebreaks that translations lose after Liquid closing tags.
510543 // Compares each `{% ... %} ` in the translation against the English
511544 // to see if it should be `{% ... %}\n` instead.
512- content = content . replace ( / \{ % ( .+ ?) % \} / g, ( match ) => {
513- if ( match . lastIndexOf ( '{%' ) > 0 ) return match
514- const withLinebreak = `${ match . slice ( 0 , - 1 ) } \n`
515- if ( englishContent . includes ( withLinebreak ) && ! englishContent . includes ( match ) ) {
516- return withLinebreak
545+ // Pre-build a Set of English Liquid-tag-with-linebreak strings so we
546+ // avoid O(tags × contentLength) repeated `String.includes()` scans.
547+ if ( englishContent ) {
548+ const englishLinebreaks = new Set < string > ( )
549+ const englishSpaces = new Set < string > ( )
550+ for ( const m of englishContent . matchAll ( / \{ % .+ ?% \} [ \n ] / g) ) {
551+ if ( m [ 0 ] . endsWith ( '\n' ) ) englishLinebreaks . add ( m [ 0 ] )
552+ else englishSpaces . add ( m [ 0 ] )
517553 }
518- return match
519- } )
520- // Special case: `{% endif %} | ` → `{% endif %}\n| ` when English has it.
521- content = content . replace ( / \{ % e n d i f % \} \| / g, ( match ) => {
522- const potentiallyBetter = '{% endif %}\n| '
523- if ( englishContent . includes ( potentiallyBetter ) ) {
524- return potentiallyBetter
554+ if ( englishLinebreaks . size > 0 ) {
555+ content = content . replace ( / \{ % ( .+ ?) % \} / g, ( match ) => {
556+ if ( match . lastIndexOf ( '{%' ) > 0 ) return match
557+ const withLinebreak = `${ match . slice ( 0 , - 1 ) } \n`
558+ if ( englishLinebreaks . has ( withLinebreak ) && ! englishSpaces . has ( match ) ) {
559+ return withLinebreak
560+ }
561+ return match
562+ } )
563+ // Special case: `{% endif %} | ` → `{% endif %}\n| ` when English has it.
564+ if ( englishContent . includes ( '{% endif %}\n| ' ) ) {
565+ content = content . replace ( / \{ % e n d i f % \} \| / g, '{% endif %}\n| ' )
566+ }
525567 }
526- return match
527- } )
568+ }
528569
529570 // Collapsed Markdown table rows — restore linebreaks between `|` cells.
530571 content = content . replaceAll ( ' | | ' , ' |\n| ' )
531572
532573 // Final catch-all: earlier normalizations (e.g. space-in-braces regex) can
533574 // recreate `{{% KEYWORD` patterns after the per-keyword fixes already ran.
534575 // Strip the extra `{` for known Liquid tag names.
576+ // Note: keywords without a trailing space (e.g. `raw`, `endif`) need `\b`
577+ // to ensure a space is not required, while still matching correctly.
535578 content = content . replace (
536- / \{ \{ ( % \s * (?: d a t a | i f v e r s i o n | e l s i f | e n d i f | e l s e | e l s e \b | o c t i c o n | n o t e | e n d n o t e | t i p | e n d t i p | r a w | e n d r a w | c o m m e n t | e n d c o m m e n t | f o r | e n d f o r | a s s i g n | v s c o d e | e n d v s c o d e | v i s u a l s t u d i o | e n d v i s u a l s t u d i o | r o w h e a d e r s | e n d r o w h e a d e r s ) ) / g,
579+ / \{ \{ ( % \s * (?: d a t a | i f v e r s i o n | e l s i f | e n d i f \b | e l s e \b | o c t i c o n | n o t e \b | e n d n o t e \b | t i p \b | e n d t i p \b | r a w \b | e n d r a w \b | c o m m e n t \b | e n d c o m m e n t \b | f o r | e n d f o r \b | a s s i g n | v s c o d e \b | e n d v s c o d e \b | v i s u a l s t u d i o \b | e n d v i s u a l s t u d i o \b | r o w h e a d e r s \b | e n d r o w h e a d e r s \b ) ) / g,
537580 '{$1' ,
538581 )
539582
583+ // After the catch-all, `{%raw` (no space) can appear. Normalize to `{% raw`.
584+ content = content . replaceAll ( '{%raw %}' , '{% raw %}' )
585+ content = content . replaceAll ( '{%raw -%}' , '{% raw -%}' )
586+ content = content . replaceAll ( '{%endraw %}' , '{% endraw %}' )
587+ content = content . replaceAll ( '{%endraw -%}' , '{% endraw -%}' )
588+
540589 return content
541590}
0 commit comments