1313 * ANTHROPIC_API_KEY=sk-... node scripts/translate.mjs --lang zh --api-specs --file adapty-api # single API spec
1414 * ANTHROPIC_API_KEY=sk-... node scripts/translate.mjs --incremental # all locales, changed files only (build pipeline)
1515 * ANTHROPIC_API_KEY=sk-... node scripts/translate.mjs --lang zh --incremental # single locale, changed files only
16+ * ANTHROPIC_API_KEY=sk-... node scripts/translate.mjs --incremental --only-files "src/content/docs/foo.mdx,src/data/sidebars/ios.json"
17+ * # incremental, but only check these paths (CI git-diff mode)
1618 */
1719
1820import fs from 'node:fs/promises' ;
@@ -70,6 +72,22 @@ const flagResume = resumeIdx !== -1;
7072// Explicit batch ID passed after --resume (may be absent — auto-read from file in main())
7173const resumeArgValue = flagResume ? args [ resumeIdx + 1 ] : null ;
7274
75+ const onlyFilesIdx = args . indexOf ( '--only-files' ) ;
76+ const onlyFilePaths = onlyFilesIdx !== - 1
77+ ? args [ onlyFilesIdx + 1 ] . split ( ',' ) . map ( s => s . trim ( ) ) . filter ( Boolean )
78+ : null ;
79+
80+ // Derive per-category ID sets from --only-files (null = no filter)
81+ const onlyDocIds = onlyFilePaths
82+ ? new Set ( onlyFilePaths . filter ( p => p . startsWith ( 'src/content/docs/' ) && p . endsWith ( '.mdx' ) ) . map ( p => path . basename ( p , '.mdx' ) ) )
83+ : null ;
84+ const onlySidebarNames = onlyFilePaths
85+ ? new Set ( onlyFilePaths . filter ( p => p . startsWith ( 'src/data/sidebars/' ) && p . endsWith ( '.json' ) ) . map ( p => path . basename ( p , '.json' ) ) )
86+ : null ;
87+ const onlySpecIds = onlyFilePaths
88+ ? new Set ( onlyFilePaths . filter ( p => / ^ s r c \/ a p i - r e f e r e n c e \/ s p e c s \/ [ ^ . / ] + \. y a m l $ / . test ( p ) ) . map ( p => path . basename ( p , '.yaml' ) ) )
89+ : null ;
90+
7391// Targeted operations require an explicit --lang
7492if ( ( flagResume || fileId || fileIds || sidebarName || platform ) && ! lang ) {
7593 console . error ( '[translate] --lang <code> is required when using --resume, --file, --ids, --sidebar, or --platform' ) ;
@@ -161,11 +179,7 @@ async function loadGlossary(lang) {
161179 const dict = JSON . parse ( await fs . readFile ( dictPath , 'utf-8' ) ) ;
162180 const lines = Object . entries ( dict )
163181 . filter ( ( [ _ , translations ] ) => lang in translations )
164- . map ( ( [ en , translations ] ) => {
165- const tr = translations [ lang ] ;
166- const note = translations [ '_note' ] ? ` (${ translations [ '_note' ] } )` : '' ;
167- return `- ${ en } → ${ tr } ${ note } ` ;
168- } ) ;
182+ . map ( ( [ en , translations ] ) => `- ${ en } → ${ translations [ lang ] } ` ) ;
169183 if ( lines . length === 0 ) return '' ;
170184 return `\nGLOSSARY — use these exact translations for product-specific terms (do not improvise):\n${ lines . join ( '\n' ) } ` ;
171185 } catch {
@@ -209,6 +223,12 @@ async function main() {
209223 return ;
210224 }
211225
226+ // --only-files: fast exit if the diff contains nothing translatable
227+ if ( onlyFilePaths && onlyDocIds . size === 0 && onlySidebarNames . size === 0 && onlySpecIds . size === 0 ) {
228+ console . log ( '[translate] --only-files: no translatable files in diff — nothing to do.' ) ;
229+ return ;
230+ }
231+
212232 // Determine which languages to process
213233 const langs = lang ? [ lang ] : await discoverLocales ( ) ;
214234 if ( langs . length === 0 ) {
@@ -229,17 +249,17 @@ async function main() {
229249 if ( ! flagApiSpecs ) {
230250 // --sidebar targets a single sidebar only; skip article translation
231251 if ( ! sidebarName ) {
232- await translateForLang ( client , currentLang , localesDir , hashesDir , systemPrompt , tag ) ;
252+ await translateForLang ( client , currentLang , localesDir , hashesDir , systemPrompt , tag , onlyDocIds ) ;
233253 }
234254
235255 // Sidebars are not file/platform-specific; skip only for --file/--ids targeting
236256 if ( ! fileId && ! fileIds ) {
237- await translateSidebarsForLang ( client , currentLang , localesDir , hashesDir , targetLanguage , glossary , tag , sidebarName ) ;
257+ await translateSidebarsForLang ( client , currentLang , localesDir , hashesDir , targetLanguage , glossary , tag , sidebarName , onlySidebarNames ) ;
238258 }
239259 }
240260
241261 if ( flagApiSpecs || flagIncremental ) {
242- await translateApiSpecsForLang ( client , currentLang , localesDir , hashesDir , targetLanguage , glossary , tag ) ;
262+ await translateApiSpecsForLang ( client , currentLang , localesDir , hashesDir , targetLanguage , glossary , tag , onlySpecIds ) ;
243263 }
244264 }
245265}
@@ -248,11 +268,17 @@ async function main() {
248268// Per-language translation
249269// ---------------------------------------------------------------------------
250270
251- async function translateForLang ( client , lang , localesDir , hashesDir , systemPrompt , tag ) {
271+ async function translateForLang ( client , lang , localesDir , hashesDir , systemPrompt , tag , onlyDocIds = null ) {
252272 const allFiles = await collectMdxFiles ( DOCS_DIR ) ;
253273
274+ // Apply --only-files filter (git-diff mode): restrict to specific article IDs
275+ let files = onlyDocIds ? allFiles . filter ( f => onlyDocIds . has ( path . basename ( f , '.mdx' ) ) ) : allFiles ;
276+ if ( onlyDocIds && files . length === 0 ) {
277+ console . log ( `${ tag } No matching articles from --only-files — skipping docs.` ) ;
278+ return ;
279+ }
280+
254281 // Apply --file / --ids / --platform filters
255- let files = allFiles ;
256282 if ( fileId ) {
257283 files = allFiles . filter ( f => path . basename ( f , '.mdx' ) === fileId ) ;
258284 if ( files . length === 0 ) {
@@ -614,22 +640,31 @@ async function rebuildSidebarLabels(sidebarFiles, sidebarHashesDir, localesDir)
614640 await fs . writeFile ( path . join ( localesDir , '_sidebar-labels.json' ) , JSON . stringify ( merged , null , 2 ) , 'utf-8' ) ;
615641}
616642
617- async function translateSidebarsForLang ( client , lang , localesDir , hashesDir , targetLanguage , glossary , tag , sidebarName = null ) {
643+ async function translateSidebarsForLang ( client , lang , localesDir , hashesDir , targetLanguage , glossary , tag , sidebarName = null , onlySidebarNames = null ) {
618644 const sidebarHashesDir = path . join ( hashesDir , 'sidebars' ) ;
619645
620646 const entries = await fs . readdir ( SIDEBARS_DIR , { withFileTypes : true } ) ;
621- let sidebarFiles = entries
647+ const allSidebarFiles = entries
622648 . filter ( e => e . isFile ( ) && e . name . endsWith ( '.json' ) )
623649 . map ( e => path . join ( SIDEBARS_DIR , e . name ) ) ;
624650
651+ // sidebarFiles = the subset to translate; allSidebarFiles = always used for the final rebuild
652+ let sidebarFiles = allSidebarFiles ;
653+
625654 if ( sidebarName ) {
626- const match = sidebarFiles . find ( f => path . basename ( f , '.json' ) === sidebarName ) ;
655+ const match = allSidebarFiles . find ( f => path . basename ( f , '.json' ) === sidebarName ) ;
627656 if ( ! match ) {
628657 console . error ( `${ tag } No sidebar found with name: ${ sidebarName } ` ) ;
629- console . error ( ` Available: ${ sidebarFiles . map ( f => path . basename ( f , '.json' ) ) . join ( ', ' ) } ` ) ;
658+ console . error ( ` Available: ${ allSidebarFiles . map ( f => path . basename ( f , '.json' ) ) . join ( ', ' ) } ` ) ;
630659 process . exit ( 1 ) ;
631660 }
632661 sidebarFiles = [ match ] ;
662+ } else if ( onlySidebarNames ) {
663+ sidebarFiles = allSidebarFiles . filter ( f => onlySidebarNames . has ( path . basename ( f , '.json' ) ) ) ;
664+ if ( sidebarFiles . length === 0 ) {
665+ console . log ( `${ tag } No matching sidebars from --only-files — skipping sidebars.` ) ;
666+ return ;
667+ }
633668 }
634669
635670 const toTranslate = [ ] ;
@@ -655,7 +690,7 @@ async function translateSidebarsForLang(client, lang, localesDir, hashesDir, tar
655690 if ( toTranslate . length === 0 ) {
656691 console . log ( `${ tag } Sidebars: all up to date.` ) ;
657692 // Still rebuild _sidebar-labels.json in case it was deleted
658- await rebuildSidebarLabels ( sidebarFiles , sidebarHashesDir , localesDir ) ;
693+ await rebuildSidebarLabels ( allSidebarFiles , sidebarHashesDir , localesDir ) ;
659694 return ;
660695 }
661696
@@ -714,25 +749,34 @@ async function translateSidebarsForLang(client, lang, localesDir, hashesDir, tar
714749 }
715750
716751 // Rebuild the single _sidebar-labels.json from all cached sidebar translations
717- await rebuildSidebarLabels ( sidebarFiles , sidebarHashesDir , localesDir ) ;
752+ await rebuildSidebarLabels ( allSidebarFiles , sidebarHashesDir , localesDir ) ;
718753}
719754
720755// ---------------------------------------------------------------------------
721756// API spec translation
722757// ---------------------------------------------------------------------------
723758
724- async function translateApiSpecsForLang ( client , lang , localesDir , hashesDir , targetLanguage , glossary , tag ) {
759+ async function translateApiSpecsForLang ( client , lang , localesDir , hashesDir , targetLanguage , glossary , tag , onlySpecIds = null ) {
725760 const apiHashesDir = path . resolve ( hashesDir , 'api-specs' ) ;
726761 const systemPrompt = buildApiSpecSystemPrompt ( targetLanguage ) + glossary ;
727762
728763 // Collect English source specs only — exclude already-localized files.
729764 // English files have exactly one dot: "adapty-api.yaml" → ["adapty-api","yaml"] (length 2).
730765 // Localized files have two dots: "adapty-api.zh.yaml" → length 3.
731766 const entries = await fs . readdir ( API_SPECS_DIR , { withFileTypes : true } ) ;
732- const specFiles = entries
767+ let specFiles = entries
733768 . filter ( e => e . isFile ( ) && e . name . endsWith ( '.yaml' ) && e . name . split ( '.' ) . length === 2 )
734769 . map ( e => ( { name : e . name , full : path . join ( API_SPECS_DIR , e . name ) , basename : path . basename ( e . name , '.yaml' ) } ) ) ;
735770
771+ // Apply --only-files filter
772+ if ( onlySpecIds ) {
773+ specFiles = specFiles . filter ( s => onlySpecIds . has ( s . basename ) ) ;
774+ if ( specFiles . length === 0 ) {
775+ console . log ( `${ tag } No matching API specs from --only-files — skipping specs.` ) ;
776+ return ;
777+ }
778+ }
779+
736780 const toTranslate = [ ] ;
737781 for ( const spec of specFiles ) {
738782 const outputPath = path . join ( API_SPECS_DIR , `${ spec . basename } .${ lang } .yaml` ) ;
@@ -805,8 +849,13 @@ function slugify(text) {
805849 . replace ( / ^ - | - $ / g, '' ) ;
806850}
807851
852+ // Sections larger than this get split further by paragraph blocks.
853+ const PARAGRAPH_FALLBACK_CHARS = 3000 ;
854+
808855/**
809- * Split MDX content into H2-based sections.
856+ * Split MDX content into H2/H3-based sections.
857+ * Falls back to paragraph-level splitting for sections that exceed PARAGRAPH_FALLBACK_CHARS
858+ * (covers articles with no headings, or long preambles before the first heading).
810859 * Returns Array<{id: string, content: string}> where content pieces join('\n') === original.
811860 */
812861function splitIntoSections ( content ) {
@@ -828,7 +877,7 @@ function splitIntoSections(content) {
828877 continue ;
829878 } else if ( i === 0 ) {
830879 frontmatterDone = true ;
831- // fall through to H2 detection for line 0
880+ // fall through to heading detection for line 0
832881 } else if ( inFrontmatter ) {
833882 if ( line . trim ( ) === '---' ) {
834883 inFrontmatter = false ;
@@ -848,20 +897,85 @@ function splitIntoSections(content) {
848897 }
849898 }
850899
851- // H2 heading → start a new section
852- if ( codeBlockFence === null && / ^ # # / . test ( line ) ) {
900+ // H2 or H3 heading → start a new section
901+ if ( codeBlockFence === null && / ^ # { 2 , 3 } / . test ( line ) ) {
853902 sections . push ( { id : currentId , content : lines . slice ( sectionStart , i ) . join ( '\n' ) } ) ;
854903 sectionStart = i ;
904+ const level = line . startsWith ( '### ' ) ? 'h3' : 'h2' ;
855905 const headingText = line
856- . replace ( / ^ # # / , '' )
906+ . replace ( / ^ # { 2 , 3 } / , '' )
857907 . replace ( / \s * \{ # [ ^ } ] + \} \s * $ / , '' )
858908 . trim ( ) ;
859- currentId = 'h2-' + slugify ( headingText ) ;
909+ currentId = ` ${ level } -` + slugify ( headingText ) ;
860910 }
861911 }
862912
863913 sections . push ( { id : currentId , content : lines . slice ( sectionStart ) . join ( '\n' ) } ) ;
864- return sections ;
914+
915+ // Paragraph fallback: split large sections that have no sub-headings into
916+ // paragraph-sized chunks so we don't re-translate an entire H2+ block when
917+ // only one paragraph changed. Also handles heading-free articles.
918+ const result = [ ] ;
919+ for ( const section of sections ) {
920+ if ( section . content . length <= PARAGRAPH_FALLBACK_CHARS ) {
921+ result . push ( section ) ;
922+ } else {
923+ result . push ( ...splitByParagraphBlocks ( section ) ) ;
924+ }
925+ }
926+ return result ;
927+ }
928+
929+ /**
930+ * Split a section that is too large into paragraph-sized chunks separated by
931+ * blank lines (respecting code block boundaries). Each chunk gets a stable
932+ * positional ID: `<parentId>-p1`, `<parentId>-p2`, etc.
933+ * If the section cannot be split (e.g. one giant code block), returns it as-is.
934+ */
935+ function splitByParagraphBlocks ( section ) {
936+ const lines = section . content . split ( '\n' ) ;
937+ const rawBlocks = [ ] ;
938+ let start = 0 ;
939+ let codeBlockFence = null ;
940+
941+ for ( let i = 0 ; i < lines . length ; i ++ ) {
942+ const line = lines [ i ] ;
943+
944+ const fenceMatch = line . match ( / ^ ( ` { 3 , } | ~ { 3 , } ) / ) ;
945+ if ( fenceMatch ) {
946+ if ( codeBlockFence === null ) codeBlockFence = fenceMatch [ 1 ] [ 0 ] ;
947+ else if ( line [ 0 ] === codeBlockFence ) codeBlockFence = null ;
948+ }
949+
950+ // Blank line outside a code block = paragraph boundary
951+ if ( codeBlockFence === null && line . trim ( ) === '' && i > start ) {
952+ const block = lines . slice ( start , i + 1 ) . join ( '\n' ) ;
953+ if ( block . trim ( ) ) rawBlocks . push ( block ) ;
954+ start = i + 1 ;
955+ }
956+ }
957+ const tail = lines . slice ( start ) . join ( '\n' ) ;
958+ if ( tail . trim ( ) ) rawBlocks . push ( tail ) ;
959+
960+ if ( rawBlocks . length <= 1 ) return [ section ] ; // can't split further
961+
962+ // Merge consecutive paragraph blocks into chunks that stay under the threshold
963+ const chunks = [ ] ;
964+ let current = '' ;
965+ let idx = 1 ;
966+ for ( const block of rawBlocks ) {
967+ const candidate = current ? `${ current } \n${ block } ` : block ;
968+ if ( current && candidate . length > PARAGRAPH_FALLBACK_CHARS ) {
969+ chunks . push ( { id : `${ section . id } -p${ idx } ` , content : current } ) ;
970+ idx ++ ;
971+ current = block ;
972+ } else {
973+ current = candidate ;
974+ }
975+ }
976+ if ( current ) chunks . push ( { id : `${ section . id } -p${ idx } ` , content : current } ) ;
977+
978+ return chunks . length > 1 ? chunks : [ section ] ;
865979}
866980
867981/** Append -2, -3 suffixes for duplicate section ids. */
0 commit comments