@@ -476,7 +476,25 @@ async function translateFileWithSections(client, file, systemPrompt, localesDir,
476476 const s = sections [ i ] ;
477477 const cH = 'sha256:' + crypto . createHash ( 'sha256' ) . update ( s . content ) . digest ( 'hex' ) ;
478478 const pH = 'sha256:' + crypto . createHash ( 'sha256' ) . update ( stripCodeBlocks ( s . content ) ) . digest ( 'hex' ) ;
479- seeded [ s . id ] = { contentHash : cH , proseHash : pH , translation : translatedSecs [ i ] . content } ;
479+
480+ // Check whether code blocks in the English source match those in the zh translation.
481+ // Code blocks are never translated, so a mismatch means the English source changed
482+ // since the zh translation was produced. In that case we must NOT mark the section
483+ // as up-to-date — instead we seed it as "prose cached, code stale" so that the
484+ // patchCodeBlocks path runs on the next iteration, updating code without Claude.
485+ const enBlocks = extractCodeBlocks ( s . content ) ;
486+ const zhBlocks = extractCodeBlocks ( translatedSecs [ i ] . content ) ;
487+ const codeUnchanged = enBlocks . length === zhBlocks . length &&
488+ enBlocks . every ( ( b , j ) => b === zhBlocks [ j ] ) ;
489+
490+ if ( codeUnchanged ) {
491+ seeded [ s . id ] = { contentHash : cH , proseHash : pH , translation : translatedSecs [ i ] . content } ;
492+ } else {
493+ // Fake the "old" contentHash so that cH won't match on the next pass,
494+ // while proseHash stays current so patchCodeBlocks triggers (not Claude).
495+ const oldHash = 'sha256:' + crypto . createHash ( 'sha256' ) . update ( translatedSecs [ i ] . content ) . digest ( 'hex' ) ;
496+ seeded [ s . id ] = { contentHash : oldHash , proseHash : pH , translation : translatedSecs [ i ] . content } ;
497+ }
480498 }
481499 storedData = { sections : seeded } ;
482500 console . log ( ` ↺ ${ basename } : seeded section cache from existing translation` ) ;
@@ -1262,7 +1280,10 @@ function splitIntoSections(content) {
12621280/**
12631281 * Split a section that is too large into paragraph-sized chunks separated by
12641282 * blank lines (respecting code block boundaries). Each chunk gets a stable
1265- * positional ID: `<parentId>-p1`, `<parentId>-p2`, etc.
1283+ * content-hash-based ID: `<parentId>-p<8-char-hash>`.
1284+ * Using content hashes rather than positional counters means inserting or
1285+ * deleting a paragraph does not shift the IDs of surrounding chunks, so
1286+ * unchanged paragraphs always hit the translation cache.
12661287 * If the section cannot be split (e.g. one giant code block), returns it as-is.
12671288 */
12681289function splitByParagraphBlocks ( section ) {
@@ -1295,18 +1316,20 @@ function splitByParagraphBlocks(section) {
12951316 // Merge consecutive paragraph blocks into chunks that stay under the threshold
12961317 const chunks = [ ] ;
12971318 let current = '' ;
1298- let idx = 1 ;
12991319 for ( const block of rawBlocks ) {
13001320 const candidate = current ? `${ current } \n${ block } ` : block ;
13011321 if ( current && candidate . length > PARAGRAPH_FALLBACK_CHARS ) {
1302- chunks . push ( { id : ` ${ section . id } -p ${ idx } ` , content : current } ) ;
1303- idx ++ ;
1322+ const h = crypto . createHash ( 'sha256' ) . update ( current ) . digest ( 'hex' ) . slice ( 0 , 8 ) ;
1323+ chunks . push ( { id : ` ${ section . id } -p ${ h } ` , content : current } ) ;
13041324 current = block ;
13051325 } else {
13061326 current = candidate ;
13071327 }
13081328 }
1309- if ( current ) chunks . push ( { id : `${ section . id } -p${ idx } ` , content : current } ) ;
1329+ if ( current ) {
1330+ const h = crypto . createHash ( 'sha256' ) . update ( current ) . digest ( 'hex' ) . slice ( 0 , 8 ) ;
1331+ chunks . push ( { id : `${ section . id } -p${ h } ` , content : current } ) ;
1332+ }
13101333
13111334 return chunks . length > 1 ? chunks : [ section ] ;
13121335}
0 commit comments