@@ -27,6 +27,9 @@ def __init__(self, base_sha: str, head_sha: str, repo_root: Optional[str] = None
2727 self .source_language = self .config .get ('source_language' , 'en' )
2828 self .target_languages = self .config .get ('target_languages' , ['zh' , 'ja' ])
2929
30+ # Load and validate ignore files
31+ self .ignore_files = self ._load_ignore_files ()
32+
3033 def _load_config (self ) -> Dict :
3134 """Load translation configuration."""
3235 config_path = Path (__file__ ).parent / "config.json"
@@ -35,6 +38,55 @@ def _load_config(self) -> Dict:
3538 return json .load (f )
3639 return {}
3740
41+ def _load_ignore_files (self ) -> List [str ]:
42+ """Load and validate ignore_files configuration.
43+
44+ Validates that:
45+ - Each path starts with source language directory
46+ - No directory traversal (..)
47+ - Valid file extension (.md, .mdx)
48+
49+ Returns:
50+ List of validated ignore file paths
51+ """
52+ ignore_files = self .config .get ('ignore_files' , [])
53+ if not ignore_files :
54+ return []
55+
56+ validated = []
57+ source_dir = self .get_language_directory (self .source_language )
58+
59+ for path in ignore_files :
60+ # Must start with source language directory
61+ if not path .startswith (f"{ source_dir } /" ):
62+ print (f"Warning: Ignore path must start with '{ source_dir } /': { path } (skipping)" )
63+ continue
64+
65+ # No directory traversal
66+ if ".." in path :
67+ print (f"Warning: Invalid ignore path (contains '..'): { path } (skipping)" )
68+ continue
69+
70+ # Must have valid extension
71+ if not any (path .endswith (ext ) for ext in ['.md' , '.mdx' ]):
72+ print (f"Warning: Ignore path must end with .md or .mdx: { path } (skipping)" )
73+ continue
74+
75+ validated .append (path )
76+
77+ return validated
78+
79+ def _is_file_ignored (self , file_path : str ) -> bool :
80+ """Check if a file should be ignored from translation.
81+
82+ Args:
83+ file_path: Path to check (e.g., 'en/guides/some-file.md')
84+
85+ Returns:
86+ True if file is in ignore list, False otherwise
87+ """
88+ return file_path in self .ignore_files
89+
3890 def get_language_directory (self , lang_code : str ) -> str :
3991 """Get directory name for a language code from config."""
4092 if 'languages' in self .config and lang_code in self .config ['languages' ]:
@@ -184,16 +236,19 @@ def categorize_files(self, files: List[str]) -> Dict[str, List[str]]:
184236 if file == 'docs.json' :
185237 categories ['docs_json' ].append (file )
186238 elif file .startswith (f'{ source_dir } /' ):
187- if file .endswith (('.md' , '.mdx' )):
239+ # Check if file is in ignore list
240+ if self ._is_file_ignored (file ):
241+ categories ['other' ].append (file ) # Treat as 'other' so it's not processed
242+ elif file .endswith (('.md' , '.mdx' )):
188243 categories ['source' ].append (file )
189- elif self .is_openapi_file (file ): # NEW
244+ elif self .is_openapi_file (file ):
190245 categories ['source_openapi' ].append (file )
191246 else :
192247 categories ['other' ].append (file )
193248 elif any (file .startswith (f'{ target_dir } /' ) for target_dir in target_dirs ):
194249 if file .endswith (('.md' , '.mdx' )):
195250 categories ['translation' ].append (file )
196- elif self .is_openapi_file (file ): # NEW
251+ elif self .is_openapi_file (file ):
197252 categories ['translation_openapi' ].append (file )
198253 else :
199254 categories ['other' ].append (file )
@@ -462,6 +517,10 @@ def generate_sync_plan(self) -> Dict:
462517 docs_json_changed = True
463518 continue
464519
520+ # Skip ignored files
521+ if self .analyzer ._is_file_ignored (filepath ):
522+ continue
523+
465524 # Process source language markdown files
466525 if filepath .startswith ('en/' ) and filepath .endswith (('.md' , '.mdx' )):
467526 file_size = self .get_file_size (filepath )
0 commit comments