Skip to content

Commit c18b0e7

Browse files
authored
Fix translation corruption patterns causing search scraping timeouts (#60544)
1 parent 98dfc84 commit c18b0e7

File tree

2 files changed

+80
-0
lines changed

2 files changed

+80
-0
lines changed

src/languages/lib/correct-translation-content.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,13 @@ export function correctTranslatedContentStrings(
188188
content = content.replaceAll('{% dados variables', '{% data variables')
189189
content = content.replaceAll('{% de dados variables', '{% data variables')
190190
content = content.replaceAll('{% dados reusables', '{% data reusables')
191+
// Fully translated reusables path: `{% dados reutilizáveis.X.Y %}` → `{% data reusables.X.Y %}`
192+
content = content.replaceAll('{% dados reutilizáveis.', '{% data reusables.')
193+
// Translated path segment inside reusables path: `repositórios` → `repositories`
194+
content = content.replaceAll(
195+
'{% data reusables.repositórios.',
196+
'{% data reusables.repositories.',
197+
)
191198
content = content.replaceAll('{{% dados ', '{% data ')
192199
content = content.replaceAll('{{% datas ', '{% data ')
193200
content = content.replaceAll('{% senão %}', '{% else %}')
@@ -375,6 +382,18 @@ export function correctTranslatedContentStrings(
375382
content = content.replaceAll('{% conseil %}', '{% tip %}')
376383
content = content.replaceAll('{%- conseil %}', '{%- tip %}')
377384
content = content.replaceAll('{%- conseil -%}', '{%- tip -%}')
385+
// Remove orphaned {% endif %} tags when no ifversion/elsif opener exists in the content.
386+
// Caused by translations where only the closing tag survived (e.g. user-api.md reusable).
387+
if (
388+
!content.includes('{% ifversion ') &&
389+
!content.includes('{%- ifversion ') &&
390+
!content.includes('{% elsif ') &&
391+
!content.includes('{%- elsif ')
392+
) {
393+
content = content.replaceAll('{% endif %}', '')
394+
content = content.replaceAll('{%- endif %}', '')
395+
content = content.replaceAll('{%- endif -%}', '')
396+
}
378397
}
379398

380399
if (context.code === 'ko') {
@@ -403,6 +422,11 @@ export function correctTranslatedContentStrings(
403422
content = content.replaceAll('{% daten variables', '{% data variables')
404423
content = content.replaceAll('{% Data variables', '{% data variables')
405424
content = content.replaceAll('{% Daten reusables', '{% data reusables')
425+
content = content.replaceAll('{% Data reusables', '{% data reusables')
426+
// `wiederverwendbare` is German for "reusables" — fix translated reusables paths
427+
content = content.replaceAll('{% data wiederverwendbare.', '{% data reusables.')
428+
content = content.replaceAll('{% Daten wiederverwendbare.', '{% data reusables.')
429+
content = content.replaceAll('{% Data wiederverwendbare.', '{% data reusables.')
406430
content = content.replaceAll('{%-Daten variables', '{%- data variables')
407431
content = content.replaceAll('{%-Daten-variables', '{%- data variables')
408432
content = content.replaceAll('{%- ifversion fpt oder ghec %}', '{%- ifversion fpt or ghec %}')
@@ -474,6 +498,8 @@ export function correctTranslatedContentStrings(
474498

475499
// Corrupted `{ endif %}%` → `{% endif %}` (delimiters shuffled)
476500
content = content.replaceAll('{ endif %}%', '{% endif %}')
501+
// Corrupted `{ endif% %}` → `{% endif %}` (percent placed after keyword instead of after brace)
502+
content = content.replaceAll('{ endif% %}', '{% endif %}')
477503
// Empty tag `{%}` (no space, no name) — typically `{% else %}`
478504
content = content.replace(/\{%\}(?!})/g, '{% else %}')
479505
// `{% }` or `{% }` (tag with just `}` or spaces as name) — almost always `{% endif %}`

src/languages/tests/correct-translation-content.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,11 @@ describe('correctTranslatedContentStrings', () => {
211211
'{%- assign supportLevel = entry.support -%}',
212212
)
213213
})
214+
215+
test('fixes garbled endif with percent placed after keyword', () => {
216+
// `{ endif% %}` — percent appears after "endif" instead of after the opening brace
217+
expect(fix('some content\n{ endif% %}\nmore', 'ja')).toBe('some content\n{% endif %}\nmore')
218+
})
214219
})
215220

216221
// ─── PORTUGUESE (pt) ───────────────────────────────────────────────
@@ -264,6 +269,21 @@ describe('correctTranslatedContentStrings', () => {
264269
expect(fix('{% ifversion fpt ou ghec %}', 'pt')).toBe('{% ifversion fpt or ghec %}')
265270
expect(fix('{%- elsif fpt ou ghec %}', 'pt')).toBe('{%- elsif fpt or ghec %}')
266271
})
272+
273+
test('fixes fully translated reutilizáveis reusables path', () => {
274+
// `reutilizáveis` is Portuguese for "reusables"
275+
expect(fix('{% dados reutilizáveis.repositórios.reaction_list %}', 'pt')).toBe(
276+
'{% data reusables.repositories.reaction_list %}',
277+
)
278+
expect(fix('{% dados reutilizáveis.foo.bar %}', 'pt')).toBe('{% data reusables.foo.bar %}')
279+
})
280+
281+
test('fixes translated repositórios path segment', () => {
282+
// `repositórios` is Portuguese for "repositories"
283+
expect(fix('{% data reusables.repositórios.reaction_list %}', 'pt')).toBe(
284+
'{% data reusables.repositories.reaction_list %}',
285+
)
286+
})
267287
})
268288

269289
// ─── CHINESE (zh) ──────────────────────────────────────────────────
@@ -492,6 +512,25 @@ describe('correctTranslatedContentStrings', () => {
492512
expect(fix('{%- conseil %}', 'fr')).toBe('{%- tip %}')
493513
expect(fix('{%- conseil -%}', 'fr')).toBe('{%- tip -%}')
494514
})
515+
516+
test('removes orphaned endif when no matching ifversion/elsif opener exists', () => {
517+
// Caused by translations where only the closing tag survived (e.g. user-api.md reusable)
518+
expect(fix('Some content\n{% endif %}\nMore content', 'fr')).toBe(
519+
'Some content\n\nMore content',
520+
)
521+
expect(fix('Line one\n{%- endif %}\nLine two', 'fr')).toBe('Line one\n\nLine two')
522+
expect(fix('Text {%- endif -%} more', 'fr')).toBe('Text more')
523+
})
524+
525+
test('preserves endif when matching ifversion opener is present', () => {
526+
const input = '{% ifversion ghec %}content{% endif %}'
527+
expect(fix(input, 'fr')).toBe(input)
528+
})
529+
530+
test('preserves endif when elsif opener is present', () => {
531+
const input = '{% ifversion fpt %}a{% elsif ghec %}b{% endif %}'
532+
expect(fix(input, 'fr')).toBe(input)
533+
})
495534
})
496535

497536
// ─── KOREAN (ko) ──────────────────────────────────────────────────
@@ -586,6 +625,21 @@ describe('correctTranslatedContentStrings', () => {
586625
)
587626
expect(fix('{% für entry in list %}', 'de')).toBe('{% for entry in list %}')
588627
})
628+
629+
test('fixes wiederverwendbare reusables path', () => {
630+
// `wiederverwendbare` is German for "reusables"
631+
expect(fix('{% data wiederverwendbare.audit_log.reference %}', 'de')).toBe(
632+
'{% data reusables.audit_log.reference %}',
633+
)
634+
expect(fix('{% Daten wiederverwendbare.audit_log.reference %}', 'de')).toBe(
635+
'{% data reusables.audit_log.reference %}',
636+
)
637+
// Full real-world example: `{% Data wiederverwendbare.audit_log.referenz-nach-kategorie-gruppiert %}`
638+
// The `{% Data ` → `{% data ` fix runs before this, so by the time we check:
639+
expect(
640+
fix('{% Data wiederverwendbare.audit_log.referenz-nach-kategorie-gruppiert %}', 'de'),
641+
).toBe('{% data reusables.audit_log.referenz-nach-kategorie-gruppiert %}')
642+
})
589643
})
590644

591645
// ─── GENERIC FIXES ────────────────────────────────────────────────

0 commit comments

Comments
 (0)