Skip to content

Commit 21e79c1

Browse files
committed
Merge branch 'localization' into develop
2 parents 3573eec + 2ebda9f commit 21e79c1

1 file changed

Lines changed: 29 additions & 6 deletions

File tree

scripts/translate.mjs

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,25 @@ async function translateFileWithSections(client, file, systemPrompt, localesDir,
476476
const s = sections[i];
477477
const cH = 'sha256:' + crypto.createHash('sha256').update(s.content).digest('hex');
478478
const pH = 'sha256:' + crypto.createHash('sha256').update(stripCodeBlocks(s.content)).digest('hex');
479-
seeded[s.id] = { contentHash: cH, proseHash: pH, translation: translatedSecs[i].content };
479+
480+
// Check whether code blocks in the English source match those in the zh translation.
481+
// Code blocks are never translated, so a mismatch means the English source changed
482+
// since the zh translation was produced. In that case we must NOT mark the section
483+
// as up-to-date — instead we seed it as "prose cached, code stale" so that the
484+
// patchCodeBlocks path runs on the next iteration, updating code without Claude.
485+
const enBlocks = extractCodeBlocks(s.content);
486+
const zhBlocks = extractCodeBlocks(translatedSecs[i].content);
487+
const codeUnchanged = enBlocks.length === zhBlocks.length &&
488+
enBlocks.every((b, j) => b === zhBlocks[j]);
489+
490+
if (codeUnchanged) {
491+
seeded[s.id] = { contentHash: cH, proseHash: pH, translation: translatedSecs[i].content };
492+
} else {
493+
// Fake the "old" contentHash so that cH won't match on the next pass,
494+
// while proseHash stays current so patchCodeBlocks triggers (not Claude).
495+
const oldHash = 'sha256:' + crypto.createHash('sha256').update(translatedSecs[i].content).digest('hex');
496+
seeded[s.id] = { contentHash: oldHash, proseHash: pH, translation: translatedSecs[i].content };
497+
}
480498
}
481499
storedData = { sections: seeded };
482500
console.log(` ↺ ${basename}: seeded section cache from existing translation`);
@@ -1262,7 +1280,10 @@ function splitIntoSections(content) {
12621280
/**
12631281
* Split a section that is too large into paragraph-sized chunks separated by
12641282
* blank lines (respecting code block boundaries). Each chunk gets a stable
1265-
* positional ID: `<parentId>-p1`, `<parentId>-p2`, etc.
1283+
* content-hash-based ID: `<parentId>-p<8-char-hash>`.
1284+
* Using content hashes rather than positional counters means inserting or
1285+
* deleting a paragraph does not shift the IDs of surrounding chunks, so
1286+
* unchanged paragraphs always hit the translation cache.
12661287
* If the section cannot be split (e.g. one giant code block), returns it as-is.
12671288
*/
12681289
function splitByParagraphBlocks(section) {
@@ -1295,18 +1316,20 @@ function splitByParagraphBlocks(section) {
12951316
// Merge consecutive paragraph blocks into chunks that stay under the threshold
12961317
const chunks = [];
12971318
let current = '';
1298-
let idx = 1;
12991319
for (const block of rawBlocks) {
13001320
const candidate = current ? `${current}\n${block}` : block;
13011321
if (current && candidate.length > PARAGRAPH_FALLBACK_CHARS) {
1302-
chunks.push({ id: `${section.id}-p${idx}`, content: current });
1303-
idx++;
1322+
const h = crypto.createHash('sha256').update(current).digest('hex').slice(0, 8);
1323+
chunks.push({ id: `${section.id}-p${h}`, content: current });
13041324
current = block;
13051325
} else {
13061326
current = candidate;
13071327
}
13081328
}
1309-
if (current) chunks.push({ id: `${section.id}-p${idx}`, content: current });
1329+
if (current) {
1330+
const h = crypto.createHash('sha256').update(current).digest('hex').slice(0, 8);
1331+
chunks.push({ id: `${section.id}-p${h}`, content: current });
1332+
}
13101333

13111334
return chunks.length > 1 ? chunks : [section];
13121335
}

0 commit comments

Comments
 (0)