Skip to content

Commit 5dc5ad8

Browse files
heiskrCopilot
andauthored
🌎 Add translation corruption correction patterns (#60653)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 56fbef1 commit 5dc5ad8

File tree

2 files changed

+466
-0
lines changed

2 files changed

+466
-0
lines changed

src/languages/lib/correct-translation-content.ts

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,20 @@ export function correctTranslatedContentStrings(
3131
content = content.replaceAll('{% de datos variables', '{% data variables')
3232
content = content.replaceAll('{% datos reusables', '{% data reusables')
3333
content = content.replaceAll('{% data reutilizables.', '{% data reusables.')
34+
// `{% datos reutilizables.` — fully translated "data reusables" path
35+
content = content.replaceAll('{% datos reutilizables.', '{% data reusables.')
36+
// `{% datos repositorios.` — translated "repositories" path segment
37+
content = content.replaceAll('{% datos repositorios.', '{% data reusables.repositories.')
38+
// `{% datos de variables.` — reversed word order with extra "de"
39+
content = content.replaceAll('{% datos de variables.', '{% data variables.')
40+
// `{% variables de datos.` — reversed word order "variables of data"
41+
content = content.replaceAll('{% variables de datos.', '{% data variables.')
42+
// `{% Datos ` — capitalized "datos" = data
43+
content = content.replaceAll('{% Datos variables', '{% data variables')
44+
// `{% dato ` — singular form of "datos" = data
45+
content = content.replaceAll('{% dato variables', '{% data variables')
46+
// `{% variables.` — missing "data" prefix
47+
content = content.replaceAll('{% variables.', '{% data variables.')
3448
// Translated Liquid keywords
3549
content = content.replaceAll('{% comentario %}', '{% comment %}')
3650
content = content.replaceAll('{% si ', '{% if ')
@@ -113,6 +127,9 @@ export function correctTranslatedContentStrings(
113127
// `{% それ以外の %}` — truncated form of "in the other case" = else
114128
content = content.replaceAll('{% それ以外の %}', '{% else %}')
115129
content = content.replaceAll('{%- それ以外の %}', '{%- else %}')
130+
// `{%- それ以外 %}` — further-truncated form (missing の/場合) = else
131+
content = content.replaceAll('{% それ以外 %}', '{% else %}')
132+
content = content.replaceAll('{%- それ以外 %}', '{%- else %}')
116133
// `{% それ以外の場合 ifversion X %}` → `{% elsif X %}` (confused elsif + ifversion)
117134
content = content.replace(/\{% ifversion\s+(.+?)\s*%\}/g, '{% elsif $1 %}')
118135
// `{%- "supported" %}` → `{%- when "supported" %}` (missing `when`)
@@ -193,6 +210,12 @@ export function correctTranslatedContentStrings(
193210
return `${dash} assign ${varName} = ${value.trim()} ${closeDash}`
194211
},
195212
)
213+
// `{% 行ヘッダー %}` — "row headers" = rowheaders
214+
content = content.replaceAll('{% 行ヘッダー %}', '{% rowheaders %}')
215+
content = content.replaceAll('{%- 行ヘッダー %}', '{%- rowheaders %}')
216+
// `{% ウィンドウズ %}` — "Windows" = windows (platform tag)
217+
content = content.replaceAll('{% ウィンドウズ %}', '{% windows %}')
218+
content = content.replaceAll('{%- ウィンドウズ %}', '{%- windows %}')
196219
}
197220

198221
if (context.code === 'pt') {
@@ -216,10 +239,35 @@ export function correctTranslatedContentStrings(
216239
content = content.replaceAll('{% %de dados reusables.', '{% data reusables.')
217240
content = content.replaceAll('{% %de dados variables.', '{% data variables.')
218241
content = content.replaceAll('{% %móvel }', '{% mobile %}')
242+
// `{% variáveis de dados.` — reversed word order for "data variables" in Portuguese
243+
content = content.replaceAll('{% variáveis de dados.', '{% data variables.')
244+
content = content.replaceAll('{% variáveis de dados ', '{% data variables ')
245+
// `{% dados variáveis.` — alternate word order "data variables"
246+
content = content.replaceAll('{% dados variáveis.', '{% data variables.')
247+
// `{% janelas %}` — Portuguese "windows" = windows (platform tag)
248+
content = content.replaceAll('{% janelas %}', '{% windows %}')
249+
content = content.replaceAll('{%- janelas %}', '{%- windows %}')
250+
// `{% observação %}` — Portuguese "note" = note
251+
content = content.replaceAll('{% observação %}', '{% note %}')
252+
content = content.replaceAll('{%- observação %}', '{%- note %}')
253+
// `{% comentário %}` — Portuguese "comment" = comment
254+
content = content.replaceAll('{% comentário %}', '{% comment %}')
255+
// `{% nota de fim %}` — Portuguese "end note" = endnote
256+
content = content.replaceAll('{% nota de fim %}', '{% endnote %}')
257+
content = content.replaceAll('{%- nota de fim %}', '{%- endnote %}')
258+
// `{% Dados variables` — capitalized "Dados"
259+
content = content.replaceAll('{% Dados variables', '{% data variables')
260+
content = content.replaceAll('{%- Dados variables', '{%- data variables')
219261
// Catch "ou" between any plan names in ifversion/elsif/if tags
220262
content = content.replace(/\{%-? (?:ifversion|elsif|if) [^%]*?ou [^%]*?%\}/g, (match) => {
221263
return match.replace(/ ou /g, ' or ')
222264
})
265+
// Fully translated reusable path in audit log article:
266+
// `{% dados agrupados por categoria.complemento.audit_log.reference-grouped-by-category %}`
267+
content = content.replaceAll(
268+
'{% dados agrupados por categoria.complemento.audit_log.reference-grouped-by-category %}',
269+
'{% data reusables.audit_log.reference-grouped-by-category %}',
270+
)
223271
}
224272

225273
if (context.code === 'zh') {
@@ -243,6 +291,16 @@ export function correctTranslatedContentStrings(
243291
content = content.replace(/\{%-? (?:ifversion|elsif|if) [^%]*?[^%]*?%\}/g, (match) => {
244292
return match.replace(/ /g, ' or ')
245293
})
294+
// `{% 行标题 %}` — "row headers" = rowheaders
295+
content = content.replaceAll('{% 行标题 %}', '{% rowheaders %}')
296+
content = content.replaceAll('{%- 行标题 %}', '{%- rowheaders %}')
297+
// `{% 数据变量.` — "data variables" = data variables
298+
content = content.replaceAll('{% 数据变量.', '{% data variables.')
299+
// `{% Windows 操作系统 %}` — "Windows OS" = windows platform tag
300+
content = content.replaceAll('{% Windows 操作系统 %}', '{% windows %}')
301+
content = content.replaceAll('{%- Windows 操作系统 %}', '{%- windows %}')
302+
// `{% Windows终端 %}` — "Windows terminal" = windows platform tag
303+
content = content.replaceAll('{% Windows终端 %}', '{% windows %}')
246304
}
247305

248306
if (context.code === 'ru') {
@@ -287,6 +345,13 @@ export function correctTranslatedContentStrings(
287345
})
288346
content = content.replaceAll('{% endif _%}', '{% endif %}')
289347
content = content.replaceAll('{% конечным %}', '{% endif %}')
348+
// `{%- конец %}` — dash-trimmed form of "end" = endif
349+
content = content.replaceAll('{%- конец %}', '{%- endif %}')
350+
// `{%- конец для %}` — "end for" = endfor
351+
content = content.replaceAll('{%- конец для %}', '{%- endfor %}')
352+
// `{% заголовки строк %}` — "row headers" = rowheaders (opener; `{% endrowheaders %}` stays in English)
353+
content = content.replaceAll('{% заголовки строк %}', '{% rowheaders %}')
354+
content = content.replaceAll('{%- заголовки строк %}', '{%- rowheaders %}')
290355
// `{% конец %}` after `{% raw %}` means `{% endraw %}`, not `{% endif %}`.
291356
// Handle this BEFORE the generic `{% конец %}` → `{% endif %}` fallback.
292357
// We use a split-based approach instead of `[^]*?` regex to avoid
@@ -330,6 +395,14 @@ export function correctTranslatedContentStrings(
330395
content = content.replaceAll('{% запроса %}', '{% endraw %}')
331396
// `{% Mac %}` — capitalized mac platform tag
332397
content = content.replaceAll('{% Mac %}', '{% mac %}')
398+
// `{% Endwindows %}` — capitalized endwindows
399+
content = content.replaceAll('{% Endwindows %}', '{% endwindows %}')
400+
content = content.replaceAll('{%- Endwindows %}', '{%- endwindows %}')
401+
// `{% Elsif ` — capitalized elsif
402+
content = content.replace(/\{% Elsif /g, '{% elsif ')
403+
// `{% Linux %}` — capitalized linux platform tag
404+
content = content.replaceAll('{% Linux %}', '{% linux %}')
405+
content = content.replaceAll('{%- Linux %}', '{%- linux %}')
333406
// Fix double quotes in Russian YAML files that cause parsing errors
334407
content = content.replace(/href=""https:\/\//g, 'href="https://')
335408

@@ -382,10 +455,17 @@ export function correctTranslatedContentStrings(
382455
content = content.replaceAll('{% %brut }', '{% raw %}')
383456
content = content.replaceAll('{% redessiner %}', '{% endraw %}')
384457
content = content.replaceAll('{% données ', '{% data ')
458+
// `{% Données ` — capitalized form
459+
content = content.replaceAll('{% Données variables', '{% data variables')
460+
content = content.replaceAll('{% Données réutilisables.', '{% data reusables.')
385461
// Catch remaining "ou" between any plan names in ifversion/elsif/if tags
386462
content = content.replace(/\{%-? (?:ifversion|elsif|if) [^%]*?ou [^%]*?%\}/g, (match) => {
387463
return match.replace(/ ou /g, ' or ')
388464
})
465+
// French "et" for "and" in ifversion/elsif/if tags
466+
content = content.replace(/\{%-? (?:ifversion|elsif|if) [^%]*?\bet\b[^%]*?%\}/g, (match) => {
467+
return match.replace(/ et /g, ' and ')
468+
})
389469
// French guillemets «/» → " inside if/ifversion/elsif tags
390470
content = content.replace(/\{%-?\s*(?:if|ifversion|elsif)\s[^%]*?[«»][^%]*?%\}/g, (match) => {
391471
return match.replace(/«\s*/g, '"').replace(/\s*»/g, '"')
@@ -407,6 +487,12 @@ export function correctTranslatedContentStrings(
407487
// `{% sinon %}` / `{%- sinon %}` — French "otherwise" = else
408488
content = content.replaceAll('{% sinon %}', '{% else %}')
409489
content = content.replaceAll('{%- sinon %}', '{%- else %}')
490+
// `{% note de fin %}` / `{%- note de fin %}` — "end note" = endnote
491+
content = content.replaceAll('{% note de fin %}', '{% endnote %}')
492+
content = content.replaceAll('{%- note de fin %}', '{%- endnote %}')
493+
// `{% éclipse %}` — French accent on "eclipse" platform tag
494+
content = content.replaceAll('{% éclipse %}', '{% eclipse %}')
495+
content = content.replaceAll('{%- éclipse %}', '{%- eclipse %}')
410496
// Remove orphaned {% endif %} tags when no ifversion/elsif opener exists in the content.
411497
// Caused by translations where only the closing tag survived (e.g. user-api.md reusable).
412498
if (
@@ -431,6 +517,7 @@ export function correctTranslatedContentStrings(
431517
// Extra `%` before data: `{% % data` → `{% data`
432518
content = content.replaceAll('{% % data', '{% data')
433519
content = content.replaceAll('{% 기타 %}', '{% else %}')
520+
content = content.replaceAll('{%- 기타 %}', '{%- else %}')
434521
content = content.replaceAll('{% 참고 %}', '{% note %}')
435522
content = content.replaceAll('{% 원시 %}', '{% raw %}')
436523
// Catch "또는" between any plan names in ifversion/elsif/if tags
@@ -445,6 +532,16 @@ export function correctTranslatedContentStrings(
445532

446533
// Korean translation of github-glossary.md
447534
content = content.replaceAll('{{ 용어집.term }}', '{{ glossary.term }}')
535+
// `{% 데이터 재사용.` — Korean translation of "data reusables" path
536+
content = content.replaceAll('{% 데이터 재사용.', '{% data reusables.')
537+
// `{% datavariable` — compound missing space and plural: "datavariable" → "data variables"
538+
content = content.replaceAll('{% datavariable', '{% data variables')
539+
// `{% 행 머리글 %}` — "row headers" = rowheaders
540+
content = content.replaceAll('{% 행 머리글 %}', '{% rowheaders %}')
541+
content = content.replaceAll('{%- 행 머리글 %}', '{%- rowheaders %}')
542+
// `{% 윈도우즈 %}` — Korean transliteration of "windows"
543+
content = content.replaceAll('{% 윈도우즈 %}', '{% windows %}')
544+
content = content.replaceAll('{%- 윈도우즈 %}', '{%- windows %}')
448545
}
449546

450547
if (context.code === 'de') {
@@ -459,6 +556,23 @@ export function correctTranslatedContentStrings(
459556
content = content.replaceAll('{% Data wiederverwendbare.', '{% data reusables.')
460557
// `wiederverwendbar.` (without trailing 'e') — alternate German form
461558
content = content.replaceAll('{% Daten wiederverwendbar.', '{% data reusables.')
559+
// `daten wiederverwendbars.` — lowercase with trailing 's'
560+
content = content.replaceAll('{% daten wiederverwendbars.', '{% data reusables.')
561+
// `daten wiederverwendbar.` / `daten wiederverwendbare.` — without trailing 's'
562+
content = content.replaceAll('{% daten wiederverwendbar.', '{% data reusables.')
563+
content = content.replaceAll('{% daten wiederverwendbare.', '{% data reusables.')
564+
// `{%- Daten variables` — dash variant
565+
content = content.replaceAll('{%- Daten variables', '{%- data variables')
566+
// `{% Daten Variablen.` — both German words for "data variables"
567+
content = content.replaceAll('{% Daten Variablen.', '{% data variables.')
568+
// `{% daten reusables` — lowercase with English "reusables"
569+
content = content.replaceAll('{% daten reusables', '{% data reusables')
570+
// `{% unformatierte %}` — "unformatted" = raw
571+
content = content.replaceAll('{% unformatierte %}', '{% raw %}')
572+
content = content.replaceAll('{%- unformatierte %}', '{%- raw %}')
573+
// `Datenvariablen.` — German compound word for "data variables" (no space)
574+
content = content.replaceAll('{% Datenvariablen.', '{% data variables.')
575+
content = content.replaceAll('{%- Datenvariablen.', '{%- data variables.')
462576
content = content.replaceAll('{%-Daten variables', '{%- data variables')
463577
content = content.replaceAll('{%-Daten-variables', '{%- data variables')
464578
content = content.replaceAll('{%- ifversion fpt oder ghec %}', '{%- ifversion fpt or ghec %}')
@@ -484,9 +598,86 @@ export function correctTranslatedContentStrings(
484598
// `{% ansonsten %}` / `{%- ansonsten %}` — "otherwise" = else
485599
content = content.replaceAll('{% ansonsten %}', '{% else %}')
486600
content = content.replaceAll('{%- ansonsten %}', '{%- else %}')
601+
// `{% andernfalls %}` / `{% sonst %}` — "otherwise/else" = else
602+
content = content.replaceAll('{% andernfalls %}', '{% else %}')
603+
content = content.replaceAll('{%- andernfalls %}', '{%- else %}')
604+
content = content.replaceAll('{% sonst %}', '{% else %}')
605+
content = content.replaceAll('{%- sonst %}', '{%- else %}')
606+
// `{% andernfalls ifversion X %}` / `{% sonst ifversion X %}` → `{% elsif X %}`
607+
content = content.replace(/\{% andernfalls ifversion\s+(.+?)\s*%\}/g, '{% elsif $1 %}')
608+
content = content.replace(/\{% sonst ifversion\s+(.+?)\s*%\}/g, '{% elsif $1 %}')
487609
// `{% Zeilenkopfzeilen %}` — "row headers" = rowheaders
488610
content = content.replaceAll('{% Zeilenkopfzeilen %}', '{% rowheaders %}')
489611
content = content.replaceAll('{%- Zeilenkopfzeilen %}', '{%- rowheaders %}')
612+
// `{% Rohdaten %}` — German "raw data" = raw
613+
content = content.replaceAll('{% Rohdaten %}', '{% raw %}')
614+
content = content.replaceAll('{%- Rohdaten %}', '{%- raw %}')
615+
content = content.replaceAll('{%- Rohdaten -%}', '{%- raw -%}')
616+
// `{% okticon ` — "octicon" transliterated to "okticon"
617+
content = content.replaceAll('{% okticon ', '{% octicon ')
618+
// `{% Endnotiz %}` — "end note" = endnote
619+
content = content.replaceAll('{% Endnotiz %}', '{% endnote %}')
620+
content = content.replaceAll('{%- Endnotiz %}', '{%- endnote %}')
621+
// `{% endifen %}` — garbled "endif" = endif
622+
content = content.replaceAll('{% endifen %}', '{% endif %}')
623+
content = content.replaceAll('{%- endifen %}', '{%- endif %}')
624+
// `{% Endifen %}` — capitalized variant
625+
content = content.replaceAll('{% Endifen %}', '{% endif %}')
626+
content = content.replaceAll('{%- Endifen %}', '{%- endif %}')
627+
// `{% Endif %}` — capitalized endif
628+
content = content.replaceAll('{% Endif %}', '{% endif %}')
629+
content = content.replaceAll('{%- Endif %}', '{%- endif %}')
630+
content = content.replaceAll('{%- Endif -%}', '{%- endif -%}')
631+
// `{% Dateninstanz` — "data instance" = data
632+
content = content.replaceAll('{% Dateninstanz ', '{% data ')
633+
// `{% ifversion-Sicherheitskonfigurationen %}` — hyphenated compound
634+
content = content.replaceAll(
635+
'{% ifversion-Sicherheitskonfigurationen %}',
636+
'{% ifversion security-configurations %}',
637+
)
638+
content = content.replaceAll(
639+
'{%- ifversion-Sicherheitskonfigurationen %}',
640+
'{%- ifversion security-configurations %}',
641+
)
642+
// `{% ifversion-Unterprobleme %}` — hyphenated compound
643+
content = content.replaceAll('{% ifversion-Unterprobleme %}', '{% ifversion sub-issues %}')
644+
content = content.replaceAll('{%- ifversion-Unterprobleme %}', '{%- ifversion sub-issues %}')
645+
// `{% ifversion-Sicherheitskampagnen %}` — hyphenated compound
646+
content = content.replaceAll(
647+
'{% ifversion-Sicherheitskampagnen %}',
648+
'{% ifversion security-campaigns %}',
649+
)
650+
content = content.replaceAll(
651+
'{%- ifversion-Sicherheitskampagnen %}',
652+
'{%- ifversion security-campaigns %}',
653+
)
654+
// `{% ifversion-repo-policy-rules %}` — missing space before feature flag
655+
content = content.replaceAll(
656+
'{% ifversion-repo-policy-rules %}',
657+
'{% ifversion repo-policy-rules %}',
658+
)
659+
content = content.replaceAll(
660+
'{%- ifversion-repo-policy-rules %}',
661+
'{%- ifversion repo-policy-rules %}',
662+
)
663+
// `{% ifversion-enterprise-installed-apps %}` — missing space before feature flag
664+
content = content.replaceAll(
665+
'{% ifversion-enterprise-installed-apps %}',
666+
'{% ifversion enterprise-installed-apps %}',
667+
)
668+
content = content.replaceAll(
669+
'{%- ifversion-enterprise-installed-apps %}',
670+
'{%- ifversion enterprise-installed-apps %}',
671+
)
672+
// `{% Windows %}` — capitalized platform tag
673+
content = content.replaceAll('{% Windows %}', '{% windows %}')
674+
content = content.replaceAll('{%- Windows %}', '{%- windows %}')
675+
// `{% Linux %}` — capitalized platform tag
676+
content = content.replaceAll('{% Linux %}', '{% linux %}')
677+
content = content.replaceAll('{%- Linux %}', '{%- linux %}')
678+
// `{% Eclipse %}` — capitalized platform tag
679+
content = content.replaceAll('{% Eclipse %}', '{% eclipse %}')
680+
content = content.replaceAll('{%- Eclipse %}', '{%- eclipse %}')
490681
}
491682

492683
// --- Generic fixes (all languages) ---
@@ -500,6 +691,14 @@ export function correctTranslatedContentStrings(
500691
// Capitalized Liquid keyword: `{% Data ` → `{% data `
501692
content = content.replaceAll('{% Data ', '{% data ')
502693

694+
// Capitalized platform tags (cross-language)
695+
content = content.replaceAll('{% Windows %}', '{% windows %}')
696+
content = content.replaceAll('{%- Windows %}', '{%- windows %}')
697+
content = content.replaceAll('{% Linux %}', '{% linux %}')
698+
content = content.replaceAll('{%- Linux %}', '{%- linux %}')
699+
content = content.replaceAll('{% Eclipse %}', '{% eclipse %}')
700+
content = content.replaceAll('{%- Eclipse %}', '{%- eclipse %}')
701+
503702
// These run after per-language fixes so that e.g. `{{% данных variables`
504703
// first becomes `{{% data variables` and then gets caught here.
505704

0 commit comments

Comments
 (0)