Skip to content

Commit 4605527

Browse files
committed
Merge branch 'localization' into develop
2 parents 24ea67e + 16932d7 commit 4605527

1 file changed

Lines changed: 96 additions & 10 deletions

File tree

scripts/translate.mjs

Lines changed: 96 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -339,20 +339,29 @@ async function translateForLang(client, lang, localesDir, hashesDir, systemPromp
339339

340340
// Determine which files actually need translation
341341
const toTranslate = [];
342+
const toSeed = []; // unchanged files that have no section cache yet
342343
for (const file of files) {
343344
const basename = path.basename(file, '.mdx');
344345
const translatedPath = path.join(localesDir, `${basename}.mdx`);
345346

346347
if (flagIncremental) {
347348
const currentHash = await fileHash(file);
348349
const storedHash = await getStoredHash(basename, hashesDir);
349-
if (storedHash === currentHash) continue;
350+
if (storedHash === currentHash) {
351+
// File unchanged — but check if section cache is missing (so we can seed it now
352+
// without Claude rather than waiting for the next real change to trigger 20+ Claude calls).
353+
const hashFilePath = path.join(hashesDir, `${basename}.json`);
354+
let cachedData = null;
355+
try { cachedData = JSON.parse(await fs.readFile(hashFilePath, 'utf-8')); } catch { /* ok */ }
356+
if (!cachedData?.sections) toSeed.push(file);
357+
continue;
358+
}
350359

351360
if (!storedHash) {
352361
// No hash file — check whether a translation already exists.
353362
// This happens when .hashes was deleted or the GH Action cache was cold.
354-
// Write the current hash and skip so we don't retranslate an already-done file.
355-
// The section cache will be seeded lazily on the next real content change.
363+
// Write the current hash and queue for section seeding so the next snippet
364+
// change doesn't trigger a full retranslation.
356365
try {
357366
await fs.access(translatedPath);
358367
await fs.mkdir(hashesDir, { recursive: true });
@@ -361,7 +370,8 @@ async function translateForLang(client, lang, localesDir, hashesDir, systemPromp
361370
JSON.stringify({ fileHash: currentHash }),
362371
'utf-8'
363372
);
364-
continue; // already translated — just record the hash
373+
toSeed.push(file);
374+
continue; // already translated — record hash + queue section seeding
365375
} catch { /* no translation exists → fall through and translate */ }
366376
}
367377

@@ -378,6 +388,17 @@ async function translateForLang(client, lang, localesDir, hashesDir, systemPromp
378388
}
379389
}
380390

391+
// Seed section caches for up-to-date files that have no section-level cache.
392+
// This runs synchronously before translation so that subsequent incremental runs
393+
// on any of these files can use patchCodeBlocks instead of retranslating everything.
394+
if (toSeed.length > 0 && flagIncremental && syncMode) {
395+
for (const file of toSeed) {
396+
try {
397+
await seedSectionCache(file, localesDir, hashesDir, lang);
398+
} catch { /* seeding is best-effort — don't abort the run */ }
399+
}
400+
}
401+
381402
if (toTranslate.length === 0) {
382403
console.log(`${tag} Nothing to translate — all articles are up to date.`);
383404
return;
@@ -444,6 +465,67 @@ async function translateSync(client, files, systemPrompt, localesDir, hashesDir,
444465
console.log(`\n${tag} Done: ${translated} translated, ${errors} errors.`);
445466
}
446467

468+
// ---------------------------------------------------------------------------
469+
// Section cache seeding (no Claude — heading sections only)
470+
// ---------------------------------------------------------------------------
471+
472+
// Seeds the section-level cache for a file that already has a zh translation but no section
473+
// cache. Only heading-level sections are seeded (para chunks are intentionally skipped — they
474+
// will be translated by Claude on the first incremental run that touches the file, and cached
475+
// correctly from that point onward). Running this eagerly means the NEXT snippet-only change
476+
// triggers patchCodeBlocks for heading sections rather than a full Claude retranslation.
477+
async function seedSectionCache(file, localesDir, hashesDir, lang) {
478+
const basename = path.basename(file, '.mdx');
479+
const content = await fs.readFile(file, 'utf-8');
480+
const translatedPath = path.join(localesDir, `${basename}.mdx`);
481+
482+
let existingTranslation;
483+
try {
484+
existingTranslation = await fs.readFile(translatedPath, 'utf-8');
485+
} catch {
486+
return; // no zh file to seed from
487+
}
488+
489+
const rawSections = splitIntoSections(content);
490+
const sections = deduplicateSectionIds(rawSections);
491+
492+
const engHeadSecs = splitIntoSections(content, { paragraphFallback: false });
493+
const zhHeadSecs = splitIntoSections(existingTranslation, { paragraphFallback: false });
494+
if (engHeadSecs.length !== zhHeadSecs.length) return; // mismatch — can't seed safely
495+
496+
const zhByHeadId = new Map(engHeadSecs.map((s, i) => [s.id, zhHeadSecs[i].content]));
497+
498+
const seeded = {};
499+
for (const s of sections) {
500+
if (/-p[0-9a-f]{8}$/.test(s.id)) continue; // skip para chunks
501+
502+
const zhContent = zhByHeadId.get(s.id);
503+
if (!zhContent) continue;
504+
505+
const cH = 'sha256:' + crypto.createHash('sha256').update(s.content).digest('hex');
506+
const pH = 'sha256:' + crypto.createHash('sha256').update(stripCodeBlocks(s.content)).digest('hex');
507+
508+
const enBlocks = extractCodeBlocks(s.content);
509+
const zhBlocks = extractCodeBlocks(zhContent);
510+
const codeUnchanged = enBlocks.length === zhBlocks.length && enBlocks.every((b, j) => b === zhBlocks[j]);
511+
512+
if (codeUnchanged) {
513+
seeded[s.id] = { contentHash: cH, proseHash: pH, translation: zhContent };
514+
} else {
515+
const oldHash = 'sha256:' + crypto.createHash('sha256').update(zhContent).digest('hex');
516+
seeded[s.id] = { contentHash: oldHash, proseHash: pH, translation: zhContent };
517+
}
518+
}
519+
520+
if (Object.keys(seeded).length === 0) return;
521+
522+
const currentHash = await fileHash(file);
523+
const hashFile = path.join(hashesDir, `${basename}.json`);
524+
await fs.mkdir(hashesDir, { recursive: true });
525+
await fs.writeFile(hashFile, JSON.stringify({ fileHash: currentHash, sections: seeded }), 'utf-8');
526+
console.log(` ⟳ ${basename}: seeded section cache (${Object.keys(seeded).length} heading sections)`);
527+
}
528+
447529
// ---------------------------------------------------------------------------
448530
// Section-level incremental translation (--incremental only)
449531
// ---------------------------------------------------------------------------
@@ -461,15 +543,15 @@ async function translateFileWithSections(client, file, systemPrompt, localesDir,
461543
try {
462544
storedData = JSON.parse(await fs.readFile(hashFile, 'utf-8'));
463545
// Detect stale section caches that need re-seeding:
464-
// 1. Old positional paragraph IDs ("h2-foo-p1") — replaced by content-hash IDs ("h2-foo-pa3f7c1b2")
465-
// 2. Heading-only cache that no longer covers paragraph chunks (e.g. article grew, or
466-
// paragraph splitting was added after the cache was built)
546+
// Only clear when old positional paragraph IDs are present ("h2-foo-p1" → replaced by
547+
// content-hash IDs "h2-foo-pa3f7c1b2"). Heading-only caches (no para chunk IDs) are
548+
// intentionally preserved: seeding populates heading sections, para chunks get their cache
549+
// entries on the first Claude run, and the lookup naturally misses for uncached para chunks
550+
// without needing an explicit stale invalidation.
467551
if (storedData?.sections) {
468552
const cacheIds = Object.keys(storedData.sections);
469553
const hasOldPositionalIds = cacheIds.some(id => /-p\d+$/.test(id));
470-
const cacheHasParaChunks = cacheIds.some(id => /-p[0-9a-f]{8}$/.test(id));
471-
const sectionsNeedParaChunks = sections.some(s => /-p[0-9a-f]{8}$/.test(s.id));
472-
if (hasOldPositionalIds || (sectionsNeedParaChunks && !cacheHasParaChunks)) {
554+
if (hasOldPositionalIds) {
473555
storedData = { fileHash: storedData.fileHash };
474556
}
475557
}
@@ -560,13 +642,17 @@ async function translateFileWithSections(client, file, systemPrompt, localesDir,
560642

561643
if (cached?.contentHash === contentHash) {
562644
// No change at all — use cached translation
645+
console.log(` · ${section.id} → cache hit`);
563646
translation = cached.translation;
564647
} else if (cached?.proseHash === proseHash && cached?.translation) {
565648
// Only code blocks changed — patch existing translation without calling Claude
649+
console.log(` · ${section.id} → patch (code changed)`);
566650
translation = patchCodeBlocks(cached.translation, extractCodeBlocks(section.content));
567651
patchCount++;
568652
} else {
569653
// Prose changed (or first translation) — call Claude
654+
const reason = !cached ? 'no cache' : `prose changed (cached=${cached.proseHash?.slice(0,12)}, current=${proseHash.slice(0,12)})`;
655+
console.log(` · ${section.id} → claude (${reason})`);
570656
let response;
571657
try {
572658
response = await withRetry(() => client.messages.create({

0 commit comments

Comments
 (0)