@@ -1914,8 +1914,13 @@ export function correctTranslatedContentStrings(
19141914 else englishSpaces . add ( m [ 0 ] )
19151915 }
19161916 if ( englishLinebreaks . size > 0 ) {
1917- content = content . replace ( / \{ % ( .+ ?) % \} / g, ( match ) => {
1917+ content = content . replace ( / \{ % ( .+ ?) % \} / g, ( match , _p1 , offset , string ) => {
19181918 if ( match . lastIndexOf ( '{%' ) > 0 ) return match
1919+ // Don't inject a linebreak when the tag is inside a heading line — doing
1920+ // so would split `#### {% data X %} Japanese text` into a heading with
1921+ // no content followed by a loose paragraph of Japanese text.
1922+ const lineStart = ( string as string ) . lastIndexOf ( '\n' , offset ) + 1
1923+ if ( / ^ [ \t ] { 0 , 3 } # { 1 , 6 } / . test ( ( string as string ) . slice ( lineStart , offset ) ) ) return match
19191924 const withLinebreak = `${ match . slice ( 0 , - 1 ) } \n`
19201925 if ( englishLinebreaks . has ( withLinebreak ) && ! englishSpaces . has ( match ) ) {
19211926 return withLinebreak
@@ -2058,11 +2063,12 @@ export function correctTranslatedContentStrings(
20582063 * Rejoin marker lines that the translation pipeline split from their content.
20592064 *
20602065 * Translators sometimes leave a heading marker (`#`/`##`/...), blockquote
2061- * marker (`>`), or the opening `**` of a bold span (immediately following a
2062- * list/heading/blockquote/table marker) on its own line, with the rest of
2063- * the content pushed to the next line as deeply indented text. This breaks
2064- * rendering (empty headings, broken blockquotes, unrendered bold, unexpanded
2065- * Liquid and `[AUTOTITLE]` links).
2066+ * marker (`>`), ordered-list marker (`1.`, `2.`, ...), or the opening `**`
2067+ * of a bold span (immediately following a list/heading/blockquote/table
2068+ * marker) on its own line, with the rest of the content pushed to the next
2069+ * line as deeply indented text. This breaks rendering (empty headings, broken
2070+ * blockquotes, broken ordered lists rendered as code blocks, unrendered bold,
2071+ * unexpanded Liquid and `[AUTOTITLE]` links).
20662072 *
20672073 * Conservative thresholds:
20682074 * - Marker line has 0–3 leading spaces (CommonMark heading/blockquote rule).
@@ -2081,11 +2087,21 @@ function joinDanglingMarkers(content: string): string {
20812087 // Marker-only line patterns (run only against non-fenced, non-frontmatter lines).
20822088 const headingOnly = / ^ ( [ \t ] { 0 , 3 } ) ( # { 1 , 6 } ) [ \t ] * $ /
20832089 const blockquoteOnly = / ^ ( [ \t ] { 0 , 3 } > ) [ \t ] * $ /
2090+ // Ordered-list marker alone on a line: `1. \n content`.
2091+ const orderedListOnly = / ^ ( [ \t ] { 0 , 3 } \d + \. ) [ \t ] * $ /
20842092 // Bold-open after a list/heading/blockquote/table marker (no other content).
20852093 const markerThenBoldOnly =
20862094 / ^ ( [ \t ] { 0 , 3 } (?: [ * + - ] | \d + \. ) [ \t ] + | [ \t ] { 0 , 3 } > [ \t ] + | [ \t ] { 0 , 3 } # { 1 , 6 } [ \t ] + | \| [ \t ] * ) \* \* [ \t ] * $ /
20872095 // Continuation: 6+ leading spaces and at least one non-whitespace character.
2096+ // Used when checking whether the *next* line is a deeply-indented continuation
2097+ // after a recognised marker.
20882098 const deepIndented = / ^ [ \t ] { 6 , } ( \S .* ) $ /
2099+ // Standalone deeply-indented paragraph: 9+ leading spaces. Translation
2100+ // artifacts consistently use 14 spaces; legitimate list-continuation content
2101+ // uses at most 6 spaces (confirmed by corpus analysis). The 9+ threshold
2102+ // keeps the two populations well separated and is fence-safe after the
2103+ // improved fence detection above.
2104+ const veryDeepIndented = / ^ [ \t ] { 9 , } ( \S .* ) $ /
20892105
20902106 for ( let i = 0 ; i < lines . length ; i ++ ) {
20912107 const line = lines [ i ]
@@ -2108,7 +2124,12 @@ function joinDanglingMarkers(content: string): string {
21082124 }
21092125
21102126 // CommonMark fenced code block: 0–3 leading spaces, then 3+ ` or ~.
2111- const fenceMatch = line . match ( / ^ [ \t ] { 0 , 3 } ( ` { 3 , } | ~ { 3 , } ) / )
2127+ // CommonMark permits fences to be indented 0–3 spaces at the document
2128+ // level, but inside a list item a fence can appear at 4+ spaces of
2129+ // leading indentation. Use `^[ \t]*` so that code blocks nested inside
2130+ // list items (e.g. ` ```json`) are correctly recognised and their
2131+ // content is not inadvertently stripped by the selfStrip pass below.
2132+ const fenceMatch = line . match ( / ^ [ \t ] * ( ` { 3 , } | ~ { 3 , } ) / )
21122133 if ( fenceMatch ) {
21132134 const marker = fenceMatch [ 1 ]
21142135 if ( ! inFence ) {
@@ -2129,6 +2150,21 @@ function joinDanglingMarkers(content: string): string {
21292150 continue
21302151 }
21312152
2153+ // A line that itself starts with 9+ spaces and is not inside a code fence
2154+ // is a translation-pipeline corruption artifact: the pipeline indented an
2155+ // entire paragraph line, causing CommonMark to render it as an indented
2156+ // code block (4+ spaces at the document level = code block). Strip the
2157+ // leading whitespace so the content renders as a normal paragraph.
2158+ // Marker-only lines (headings `# `, blockquotes `> `, list items `1. `)
2159+ // always have ≤3 leading spaces, so they are never misidentified here.
2160+ // The 9+ threshold (vs the 6+ used for nextDeep) ensures that legitimate
2161+ // list-continuation lines (which use ≤6 spaces) are never stripped.
2162+ const selfStrip = line . match ( veryDeepIndented )
2163+ if ( selfStrip ) {
2164+ out . push ( selfStrip [ 1 ] )
2165+ continue
2166+ }
2167+
21322168 const next = i + 1 < lines . length ? lines [ i + 1 ] : undefined
21332169 const nextDeep = next !== undefined ? next . match ( deepIndented ) : null
21342170 if ( ! nextDeep ) {
@@ -2151,6 +2187,13 @@ function joinDanglingMarkers(content: string): string {
21512187 continue
21522188 }
21532189
2190+ const ol = line . match ( orderedListOnly )
2191+ if ( ol ) {
2192+ out . push ( `${ ol [ 1 ] } ${ nextContent } ` )
2193+ i ++
2194+ continue
2195+ }
2196+
21542197 const boldOpen = line . match ( markerThenBoldOnly )
21552198 if ( boldOpen ) {
21562199 out . push ( `${ boldOpen [ 1 ] } **${ nextContent } ` )
0 commit comments