Skip to content

Commit 361f7f2

Browse files
committed
Improvements
1 parent ad02d30 commit 361f7f2

7 files changed

Lines changed: 253 additions & 43 deletions

File tree

.github/workflows/translate.yml

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ jobs:
1717

1818
steps:
1919
- uses: actions/checkout@v4
20+
with:
21+
# Need parent commit to compute the diff
22+
fetch-depth: 2
2023

2124
# Hashes are gitignored but must persist between runs so --incremental
22-
# only re-translates files that actually changed since the last run.
25+
# only re-translates sections that actually changed within a file.
2326
# The key includes the commit SHA so each push creates a new entry;
2427
# restore-keys fetches the most recent previous entry as a fallback.
2528
- name: Restore translation hash cache
@@ -37,10 +40,46 @@ jobs:
3740
- name: Install dependencies
3841
run: npm ci
3942

43+
# Split the diff into added/modified files (to translate) and deleted files (to clean up).
44+
- name: Get changed translatable files
45+
id: diff
46+
run: |
47+
CHANGED=$(git diff --name-only --diff-filter=AMR HEAD^1 HEAD \
48+
| grep -E '^(src/content/docs/.+\.mdx|src/data/sidebars/.+\.json|src/api-reference/specs/[^./]+\.yaml)$' \
49+
| tr '\n' ',' | sed 's/,$//')
50+
DELETED=$(git diff --name-only --diff-filter=D HEAD^1 HEAD \
51+
| grep -E '^(src/content/docs/.+\.mdx|src/api-reference/specs/[^./]+\.yaml)$' \
52+
| tr '\n' ',' | sed 's/,$//')
53+
echo "files=$CHANGED" >> $GITHUB_OUTPUT
54+
echo "deleted=$DELETED" >> $GITHUB_OUTPUT
55+
4056
- name: Translate changed files
57+
if: steps.diff.outputs.files != ''
4158
env:
4259
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
43-
run: node scripts/translate.mjs --incremental
60+
run: node scripts/translate.mjs --incremental --only-files "${{ steps.diff.outputs.files }}"
61+
62+
# Remove translated files and hash caches for deleted source content.
63+
# Sidebars are intentionally excluded: deleting a sidebar JSON is rare and
64+
# rebuildSidebarLabels will naturally omit it on the next sidebar update.
65+
- name: Clean up deleted translations
66+
if: steps.diff.outputs.deleted != ''
67+
run: |
68+
for SRC_PATH in $(echo "${{ steps.diff.outputs.deleted }}" | tr ',' '\n'); do
69+
BASENAME=$(basename "$SRC_PATH")
70+
EXT="${BASENAME##*.}"
71+
NAME="${BASENAME%.*}"
72+
for LOCALE_DIR in src/locales/*/; do
73+
if [ "$EXT" = "mdx" ]; then
74+
rm -f "${LOCALE_DIR}${NAME}.mdx"
75+
rm -f "${LOCALE_DIR}.hashes/${NAME}.json"
76+
elif [ "$EXT" = "yaml" ]; then
77+
# Remove all localized variants: adapty-api.zh.yaml, etc.
78+
rm -f src/api-reference/specs/${NAME}.*.yaml
79+
rm -f "${LOCALE_DIR}.hashes/api-specs/${NAME}.json"
80+
fi
81+
done
82+
done
4483
4584
# Commit any new or updated locale files back to main.
4685
# This push triggers s3-deploy-production.yml, so the deploy that follows

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"version": "0.0.1",
55
"scripts": {
66
"dev": "npm run prebuild && npm run build:md && astro dev",
7-
"prebuild": "mkdir -p public/api-specs && rm -rf public/FF_img public/img_webhook_flows public/img && [ -d src/assets/shared/img ] && cp -r src/assets/shared/img public/ || true && [ -d src/assets/shared/FF_img ] && cp -r src/assets/shared/FF_img public/ || true && [ -d src/content/docs/version-3.0/img_webhook_flows ] && cp -r src/content/docs/version-3.0/img_webhook_flows public/ || true && [ -d src/api-reference/specs ] && cp -r src/api-reference/specs/* public/api-specs/ || true && node scripts/translate.mjs --incremental",
7+
"prebuild": "mkdir -p public/api-specs && rm -rf public/FF_img public/img_webhook_flows public/img && [ -d src/assets/shared/img ] && cp -r src/assets/shared/img public/ || true && [ -d src/assets/shared/FF_img ] && cp -r src/assets/shared/FF_img public/ || true && [ -d src/content/docs/version-3.0/img_webhook_flows ] && cp -r src/content/docs/version-3.0/img_webhook_flows public/ || true && [ -d src/api-reference/specs ] && cp -r src/api-reference/specs/* public/api-specs/ || true",
88
"build": "astro build && npm run build:md:prod",
99
"build:md": "node scripts/generate-md.mjs && node scripts/generate-llms.mjs && node scripts/generate-platform-llms-full.mjs",
1010
"build:md:prod": "node scripts/generate-md.mjs ../build && node scripts/generate-llms.mjs ../build && node scripts/generate-platform-llms-full.mjs ../build",

scripts/translate.mjs

Lines changed: 139 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
* ANTHROPIC_API_KEY=sk-... node scripts/translate.mjs --lang zh --api-specs --file adapty-api # single API spec
1414
* ANTHROPIC_API_KEY=sk-... node scripts/translate.mjs --incremental # all locales, changed files only (build pipeline)
1515
* ANTHROPIC_API_KEY=sk-... node scripts/translate.mjs --lang zh --incremental # single locale, changed files only
16+
* ANTHROPIC_API_KEY=sk-... node scripts/translate.mjs --incremental --only-files "src/content/docs/foo.mdx,src/data/sidebars/ios.json"
17+
* # incremental, but only check these paths (CI git-diff mode)
1618
*/
1719

1820
import fs from 'node:fs/promises';
@@ -70,6 +72,22 @@ const flagResume = resumeIdx !== -1;
7072
// Explicit batch ID passed after --resume (may be absent — auto-read from file in main())
7173
const resumeArgValue = flagResume ? args[resumeIdx + 1] : null;
7274

75+
const onlyFilesIdx = args.indexOf('--only-files');
76+
const onlyFilePaths = onlyFilesIdx !== -1
77+
? args[onlyFilesIdx + 1].split(',').map(s => s.trim()).filter(Boolean)
78+
: null;
79+
80+
// Derive per-category ID sets from --only-files (null = no filter)
81+
const onlyDocIds = onlyFilePaths
82+
? new Set(onlyFilePaths.filter(p => p.startsWith('src/content/docs/') && p.endsWith('.mdx')).map(p => path.basename(p, '.mdx')))
83+
: null;
84+
const onlySidebarNames = onlyFilePaths
85+
? new Set(onlyFilePaths.filter(p => p.startsWith('src/data/sidebars/') && p.endsWith('.json')).map(p => path.basename(p, '.json')))
86+
: null;
87+
const onlySpecIds = onlyFilePaths
88+
? new Set(onlyFilePaths.filter(p => /^src\/api-reference\/specs\/[^./]+\.yaml$/.test(p)).map(p => path.basename(p, '.yaml')))
89+
: null;
90+
7391
// Targeted operations require an explicit --lang
7492
if ((flagResume || fileId || fileIds || sidebarName || platform) && !lang) {
7593
console.error('[translate] --lang <code> is required when using --resume, --file, --ids, --sidebar, or --platform');
@@ -161,11 +179,7 @@ async function loadGlossary(lang) {
161179
const dict = JSON.parse(await fs.readFile(dictPath, 'utf-8'));
162180
const lines = Object.entries(dict)
163181
.filter(([_, translations]) => lang in translations)
164-
.map(([en, translations]) => {
165-
const tr = translations[lang];
166-
const note = translations['_note'] ? ` (${translations['_note']})` : '';
167-
return `- ${en}${tr}${note}`;
168-
});
182+
.map(([en, translations]) => `- ${en}${translations[lang]}`);
169183
if (lines.length === 0) return '';
170184
return `\nGLOSSARY — use these exact translations for product-specific terms (do not improvise):\n${lines.join('\n')}`;
171185
} catch {
@@ -209,6 +223,12 @@ async function main() {
209223
return;
210224
}
211225

226+
// --only-files: fast exit if the diff contains nothing translatable
227+
if (onlyFilePaths && onlyDocIds.size === 0 && onlySidebarNames.size === 0 && onlySpecIds.size === 0) {
228+
console.log('[translate] --only-files: no translatable files in diff — nothing to do.');
229+
return;
230+
}
231+
212232
// Determine which languages to process
213233
const langs = lang ? [lang] : await discoverLocales();
214234
if (langs.length === 0) {
@@ -229,17 +249,17 @@ async function main() {
229249
if (!flagApiSpecs) {
230250
// --sidebar targets a single sidebar only; skip article translation
231251
if (!sidebarName) {
232-
await translateForLang(client, currentLang, localesDir, hashesDir, systemPrompt, tag);
252+
await translateForLang(client, currentLang, localesDir, hashesDir, systemPrompt, tag, onlyDocIds);
233253
}
234254

235255
// Sidebars are not file/platform-specific; skip only for --file/--ids targeting
236256
if (!fileId && !fileIds) {
237-
await translateSidebarsForLang(client, currentLang, localesDir, hashesDir, targetLanguage, glossary, tag, sidebarName);
257+
await translateSidebarsForLang(client, currentLang, localesDir, hashesDir, targetLanguage, glossary, tag, sidebarName, onlySidebarNames);
238258
}
239259
}
240260

241261
if (flagApiSpecs || flagIncremental) {
242-
await translateApiSpecsForLang(client, currentLang, localesDir, hashesDir, targetLanguage, glossary, tag);
262+
await translateApiSpecsForLang(client, currentLang, localesDir, hashesDir, targetLanguage, glossary, tag, onlySpecIds);
243263
}
244264
}
245265
}
@@ -248,11 +268,17 @@ async function main() {
248268
// Per-language translation
249269
// ---------------------------------------------------------------------------
250270

251-
async function translateForLang(client, lang, localesDir, hashesDir, systemPrompt, tag) {
271+
async function translateForLang(client, lang, localesDir, hashesDir, systemPrompt, tag, onlyDocIds = null) {
252272
const allFiles = await collectMdxFiles(DOCS_DIR);
253273

274+
// Apply --only-files filter (git-diff mode): restrict to specific article IDs
275+
let files = onlyDocIds ? allFiles.filter(f => onlyDocIds.has(path.basename(f, '.mdx'))) : allFiles;
276+
if (onlyDocIds && files.length === 0) {
277+
console.log(`${tag} No matching articles from --only-files — skipping docs.`);
278+
return;
279+
}
280+
254281
// Apply --file / --ids / --platform filters
255-
let files = allFiles;
256282
if (fileId) {
257283
files = allFiles.filter(f => path.basename(f, '.mdx') === fileId);
258284
if (files.length === 0) {
@@ -614,22 +640,31 @@ async function rebuildSidebarLabels(sidebarFiles, sidebarHashesDir, localesDir)
614640
await fs.writeFile(path.join(localesDir, '_sidebar-labels.json'), JSON.stringify(merged, null, 2), 'utf-8');
615641
}
616642

617-
async function translateSidebarsForLang(client, lang, localesDir, hashesDir, targetLanguage, glossary, tag, sidebarName = null) {
643+
async function translateSidebarsForLang(client, lang, localesDir, hashesDir, targetLanguage, glossary, tag, sidebarName = null, onlySidebarNames = null) {
618644
const sidebarHashesDir = path.join(hashesDir, 'sidebars');
619645

620646
const entries = await fs.readdir(SIDEBARS_DIR, { withFileTypes: true });
621-
let sidebarFiles = entries
647+
const allSidebarFiles = entries
622648
.filter(e => e.isFile() && e.name.endsWith('.json'))
623649
.map(e => path.join(SIDEBARS_DIR, e.name));
624650

651+
// sidebarFiles = the subset to translate; allSidebarFiles = always used for the final rebuild
652+
let sidebarFiles = allSidebarFiles;
653+
625654
if (sidebarName) {
626-
const match = sidebarFiles.find(f => path.basename(f, '.json') === sidebarName);
655+
const match = allSidebarFiles.find(f => path.basename(f, '.json') === sidebarName);
627656
if (!match) {
628657
console.error(`${tag} No sidebar found with name: ${sidebarName}`);
629-
console.error(` Available: ${sidebarFiles.map(f => path.basename(f, '.json')).join(', ')}`);
658+
console.error(` Available: ${allSidebarFiles.map(f => path.basename(f, '.json')).join(', ')}`);
630659
process.exit(1);
631660
}
632661
sidebarFiles = [match];
662+
} else if (onlySidebarNames) {
663+
sidebarFiles = allSidebarFiles.filter(f => onlySidebarNames.has(path.basename(f, '.json')));
664+
if (sidebarFiles.length === 0) {
665+
console.log(`${tag} No matching sidebars from --only-files — skipping sidebars.`);
666+
return;
667+
}
633668
}
634669

635670
const toTranslate = [];
@@ -655,7 +690,7 @@ async function translateSidebarsForLang(client, lang, localesDir, hashesDir, tar
655690
if (toTranslate.length === 0) {
656691
console.log(`${tag} Sidebars: all up to date.`);
657692
// Still rebuild _sidebar-labels.json in case it was deleted
658-
await rebuildSidebarLabels(sidebarFiles, sidebarHashesDir, localesDir);
693+
await rebuildSidebarLabels(allSidebarFiles, sidebarHashesDir, localesDir);
659694
return;
660695
}
661696

@@ -714,25 +749,34 @@ async function translateSidebarsForLang(client, lang, localesDir, hashesDir, tar
714749
}
715750

716751
// Rebuild the single _sidebar-labels.json from all cached sidebar translations
717-
await rebuildSidebarLabels(sidebarFiles, sidebarHashesDir, localesDir);
752+
await rebuildSidebarLabels(allSidebarFiles, sidebarHashesDir, localesDir);
718753
}
719754

720755
// ---------------------------------------------------------------------------
721756
// API spec translation
722757
// ---------------------------------------------------------------------------
723758

724-
async function translateApiSpecsForLang(client, lang, localesDir, hashesDir, targetLanguage, glossary, tag) {
759+
async function translateApiSpecsForLang(client, lang, localesDir, hashesDir, targetLanguage, glossary, tag, onlySpecIds = null) {
725760
const apiHashesDir = path.resolve(hashesDir, 'api-specs');
726761
const systemPrompt = buildApiSpecSystemPrompt(targetLanguage) + glossary;
727762

728763
// Collect English source specs only — exclude already-localized files.
729764
// English files have exactly one dot: "adapty-api.yaml" → ["adapty-api","yaml"] (length 2).
730765
// Localized files have two dots: "adapty-api.zh.yaml" → length 3.
731766
const entries = await fs.readdir(API_SPECS_DIR, { withFileTypes: true });
732-
const specFiles = entries
767+
let specFiles = entries
733768
.filter(e => e.isFile() && e.name.endsWith('.yaml') && e.name.split('.').length === 2)
734769
.map(e => ({ name: e.name, full: path.join(API_SPECS_DIR, e.name), basename: path.basename(e.name, '.yaml') }));
735770

771+
// Apply --only-files filter
772+
if (onlySpecIds) {
773+
specFiles = specFiles.filter(s => onlySpecIds.has(s.basename));
774+
if (specFiles.length === 0) {
775+
console.log(`${tag} No matching API specs from --only-files — skipping specs.`);
776+
return;
777+
}
778+
}
779+
736780
const toTranslate = [];
737781
for (const spec of specFiles) {
738782
const outputPath = path.join(API_SPECS_DIR, `${spec.basename}.${lang}.yaml`);
@@ -805,8 +849,13 @@ function slugify(text) {
805849
.replace(/^-|-$/g, '');
806850
}
807851

852+
// Sections larger than this get split further by paragraph blocks.
853+
const PARAGRAPH_FALLBACK_CHARS = 3000;
854+
808855
/**
809-
* Split MDX content into H2-based sections.
856+
* Split MDX content into H2/H3-based sections.
857+
* Falls back to paragraph-level splitting for sections that exceed PARAGRAPH_FALLBACK_CHARS
858+
* (covers articles with no headings, or long preambles before the first heading).
810859
* Returns Array<{id: string, content: string}> where content pieces join('\n') === original.
811860
*/
812861
function splitIntoSections(content) {
@@ -828,7 +877,7 @@ function splitIntoSections(content) {
828877
continue;
829878
} else if (i === 0) {
830879
frontmatterDone = true;
831-
// fall through to H2 detection for line 0
880+
// fall through to heading detection for line 0
832881
} else if (inFrontmatter) {
833882
if (line.trim() === '---') {
834883
inFrontmatter = false;
@@ -848,20 +897,85 @@ function splitIntoSections(content) {
848897
}
849898
}
850899

851-
// H2 heading → start a new section
852-
if (codeBlockFence === null && /^## /.test(line)) {
900+
// H2 or H3 heading → start a new section
901+
if (codeBlockFence === null && /^#{2,3} /.test(line)) {
853902
sections.push({ id: currentId, content: lines.slice(sectionStart, i).join('\n') });
854903
sectionStart = i;
904+
const level = line.startsWith('### ') ? 'h3' : 'h2';
855905
const headingText = line
856-
.replace(/^## /, '')
906+
.replace(/^#{2,3} /, '')
857907
.replace(/\s*\{#[^}]+\}\s*$/, '')
858908
.trim();
859-
currentId = 'h2-' + slugify(headingText);
909+
currentId = `${level}-` + slugify(headingText);
860910
}
861911
}
862912

863913
sections.push({ id: currentId, content: lines.slice(sectionStart).join('\n') });
864-
return sections;
914+
915+
// Paragraph fallback: split large sections that have no sub-headings into
916+
// paragraph-sized chunks so we don't re-translate an entire H2+ block when
917+
// only one paragraph changed. Also handles heading-free articles.
918+
const result = [];
919+
for (const section of sections) {
920+
if (section.content.length <= PARAGRAPH_FALLBACK_CHARS) {
921+
result.push(section);
922+
} else {
923+
result.push(...splitByParagraphBlocks(section));
924+
}
925+
}
926+
return result;
927+
}
928+
929+
/**
930+
* Split a section that is too large into paragraph-sized chunks separated by
931+
* blank lines (respecting code block boundaries). Each chunk gets a stable
932+
* positional ID: `<parentId>-p1`, `<parentId>-p2`, etc.
933+
* If the section cannot be split (e.g. one giant code block), returns it as-is.
934+
*/
935+
function splitByParagraphBlocks(section) {
936+
const lines = section.content.split('\n');
937+
const rawBlocks = [];
938+
let start = 0;
939+
let codeBlockFence = null;
940+
941+
for (let i = 0; i < lines.length; i++) {
942+
const line = lines[i];
943+
944+
const fenceMatch = line.match(/^(`{3,}|~{3,})/);
945+
if (fenceMatch) {
946+
if (codeBlockFence === null) codeBlockFence = fenceMatch[1][0];
947+
else if (line[0] === codeBlockFence) codeBlockFence = null;
948+
}
949+
950+
// Blank line outside a code block = paragraph boundary
951+
if (codeBlockFence === null && line.trim() === '' && i > start) {
952+
const block = lines.slice(start, i + 1).join('\n');
953+
if (block.trim()) rawBlocks.push(block);
954+
start = i + 1;
955+
}
956+
}
957+
const tail = lines.slice(start).join('\n');
958+
if (tail.trim()) rawBlocks.push(tail);
959+
960+
if (rawBlocks.length <= 1) return [section]; // can't split further
961+
962+
// Merge consecutive paragraph blocks into chunks that stay under the threshold
963+
const chunks = [];
964+
let current = '';
965+
let idx = 1;
966+
for (const block of rawBlocks) {
967+
const candidate = current ? `${current}\n${block}` : block;
968+
if (current && candidate.length > PARAGRAPH_FALLBACK_CHARS) {
969+
chunks.push({ id: `${section.id}-p${idx}`, content: current });
970+
idx++;
971+
current = block;
972+
} else {
973+
current = candidate;
974+
}
975+
}
976+
if (current) chunks.push({ id: `${section.id}-p${idx}`, content: current });
977+
978+
return chunks.length > 1 ? chunks : [section];
865979
}
866980

867981
/** Append -2, -3 suffixes for duplicate section ids. */

0 commit comments

Comments
 (0)