Skip to content

Commit 61466c3

Browse files
guchenheclaude
andauthored
support configuring ignored files in auto sync (#629)
* Add ignore_files config to exclude specific files from translation Adds ability to specify source language files that should not be translated: - New `ignore_files` array in config.json - Validation ensures paths start with source dir, have valid extension, no traversal - Filtering applied in PRAnalyzer.categorize_files() and SyncPlanGenerator.generate_sync_plan() 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * update config --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 4f36faa commit 61466c3

2 files changed

Lines changed: 66 additions & 3 deletions

File tree

tools/translate/config.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
"source_language": "en",
33
"target_languages": ["zh", "ja"],
44

5+
"ignore_files": [
6+
"en/self-host/configuration/environments.mdx"
7+
],
8+
59
"processing_limits": {
610
"max_files_per_run": 10,
711
"max_openapi_files_per_run": 5

tools/translate/pr_analyzer.py

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ def __init__(self, base_sha: str, head_sha: str, repo_root: Optional[str] = None
2727
self.source_language = self.config.get('source_language', 'en')
2828
self.target_languages = self.config.get('target_languages', ['zh', 'ja'])
2929

30+
# Load and validate ignore files
31+
self.ignore_files = self._load_ignore_files()
32+
3033
def _load_config(self) -> Dict:
3134
"""Load translation configuration."""
3235
config_path = Path(__file__).parent / "config.json"
@@ -35,6 +38,55 @@ def _load_config(self) -> Dict:
3538
return json.load(f)
3639
return {}
3740

41+
def _load_ignore_files(self) -> List[str]:
42+
"""Load and validate ignore_files configuration.
43+
44+
Validates that:
45+
- Each path starts with source language directory
46+
- No directory traversal (..)
47+
- Valid file extension (.md, .mdx)
48+
49+
Returns:
50+
List of validated ignore file paths
51+
"""
52+
ignore_files = self.config.get('ignore_files', [])
53+
if not ignore_files:
54+
return []
55+
56+
validated = []
57+
source_dir = self.get_language_directory(self.source_language)
58+
59+
for path in ignore_files:
60+
# Must start with source language directory
61+
if not path.startswith(f"{source_dir}/"):
62+
print(f"Warning: Ignore path must start with '{source_dir}/': {path} (skipping)")
63+
continue
64+
65+
# No directory traversal
66+
if ".." in path:
67+
print(f"Warning: Invalid ignore path (contains '..'): {path} (skipping)")
68+
continue
69+
70+
# Must have valid extension
71+
if not any(path.endswith(ext) for ext in ['.md', '.mdx']):
72+
print(f"Warning: Ignore path must end with .md or .mdx: {path} (skipping)")
73+
continue
74+
75+
validated.append(path)
76+
77+
return validated
78+
79+
def _is_file_ignored(self, file_path: str) -> bool:
80+
"""Check if a file should be ignored from translation.
81+
82+
Args:
83+
file_path: Path to check (e.g., 'en/guides/some-file.md')
84+
85+
Returns:
86+
True if file is in ignore list, False otherwise
87+
"""
88+
return file_path in self.ignore_files
89+
3890
def get_language_directory(self, lang_code: str) -> str:
3991
"""Get directory name for a language code from config."""
4092
if 'languages' in self.config and lang_code in self.config['languages']:
@@ -184,16 +236,19 @@ def categorize_files(self, files: List[str]) -> Dict[str, List[str]]:
184236
if file == 'docs.json':
185237
categories['docs_json'].append(file)
186238
elif file.startswith(f'{source_dir}/'):
187-
if file.endswith(('.md', '.mdx')):
239+
# Check if file is in ignore list
240+
if self._is_file_ignored(file):
241+
categories['other'].append(file) # Treat as 'other' so it's not processed
242+
elif file.endswith(('.md', '.mdx')):
188243
categories['source'].append(file)
189-
elif self.is_openapi_file(file): # NEW
244+
elif self.is_openapi_file(file):
190245
categories['source_openapi'].append(file)
191246
else:
192247
categories['other'].append(file)
193248
elif any(file.startswith(f'{target_dir}/') for target_dir in target_dirs):
194249
if file.endswith(('.md', '.mdx')):
195250
categories['translation'].append(file)
196-
elif self.is_openapi_file(file): # NEW
251+
elif self.is_openapi_file(file):
197252
categories['translation_openapi'].append(file)
198253
else:
199254
categories['other'].append(file)
@@ -462,6 +517,10 @@ def generate_sync_plan(self) -> Dict:
462517
docs_json_changed = True
463518
continue
464519

520+
# Skip ignored files
521+
if self.analyzer._is_file_ignored(filepath):
522+
continue
523+
465524
# Process source language markdown files
466525
if filepath.startswith('en/') and filepath.endswith(('.md', '.mdx')):
467526
file_size = self.get_file_size(filepath)

0 commit comments

Comments
 (0)