77 - ' docs.json'
88 - ' en/**/*.md'
99 - ' en/**/*.mdx'
10+ - ' ja-jp/**/*.md'
11+ - ' ja-jp/**/*.mdx'
12+ - ' zh-hans/**/*.md'
13+ - ' zh-hans/**/*.mdx'
1014
1115permissions :
1216 contents : read
@@ -26,10 +30,10 @@ jobs:
2630 with :
2731 python-version : ' 3.9'
2832
29- - name : Analyze documentation changes
30- id : analyze
33+ - name : Categorize and validate PR changes
34+ id : categorize
3135 run : |
32- echo "Analyzing documentation changes..."
36+ echo "Categorizing PR changes..."
3337
3438 # Get base and head commits
3539 BASE_SHA="${{ github.event.pull_request.base.sha }}"
@@ -38,16 +42,52 @@ jobs:
3842 echo "Base SHA: $BASE_SHA"
3943 echo "Head SHA: $HEAD_SHA"
4044
41- # Detect changed files
42- CHANGED_FILES=$(git diff --name-only $BASE_SHA $HEAD_SHA | grep -E '^(docs\.json|en/.*\.(md|mdx))$' || true)
45+ # Run PR analyzer
46+ cd tools/translate
47+ python pr_analyzer.py "$BASE_SHA" "$HEAD_SHA" > /tmp/pr_analysis_output.txt 2>&1
4348
44- if [ -z "$CHANGED_FILES" ]; then
45- echo "No documentation changes detected"
46- echo "has_changes=false" >> $GITHUB_OUTPUT
47- exit 0
49+ # Parse analyzer output
50+ if [ $? -eq 0 ]; then
51+ # Successful analysis
52+ source /tmp/pr_analysis_output.txt
53+ echo "PR categorization successful"
54+ echo "PR Type: $pr_type"
55+ echo "Should Skip: $should_skip"
56+
57+ # Set GitHub outputs
58+ echo "pr_type=$pr_type" >> $GITHUB_OUTPUT
59+ echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
60+
61+ if [ "$should_skip" = "true" ]; then
62+ if [ "$pr_type" = "translation" ]; then
63+ echo "✅ Translation-only PR detected. Skipping automation (direct review process)."
64+ elif [ "$pr_type" = "none" ]; then
65+ echo "✅ No relevant documentation changes detected. Skipping workflow."
66+ fi
67+ exit 0
68+ fi
69+ else
70+ # Analysis failed - likely mixed PR
71+ echo "PR categorization failed - likely mixed content PR"
72+ ERROR_MESSAGE=$(cat /tmp/pr_analysis_output.txt | grep "error_message=" | cut -d'=' -f2- || echo "Mixed content PR detected")
73+ echo "error=mixed_pr" >> $GITHUB_OUTPUT
74+ echo "error_message<<EOF" >> $GITHUB_OUTPUT
75+ echo "$ERROR_MESSAGE" >> $GITHUB_OUTPUT
76+ echo "EOF" >> $GITHUB_OUTPUT
77+ exit 1
4878 fi
79+
80+ - name : Analyze English changes for translation
81+ if : steps.categorize.outputs.pr_type == 'english'
82+ id : analyze
83+ run : |
84+ echo "Analyzing English changes for automatic translation..."
4985
50- echo "has_changes=true" >> $GITHUB_OUTPUT
86+ BASE_SHA="${{ github.event.pull_request.base.sha }}"
87+ HEAD_SHA="${{ github.event.pull_request.head.sha }}"
88+
89+ # Get all changed files (not just English ones for file analysis)
90+ CHANGED_FILES=$(git diff --name-only $BASE_SHA $HEAD_SHA)
5191
5292 # Count changes for security limits
5393 FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
@@ -72,17 +112,18 @@ jobs:
72112 "file_count": $FILE_COUNT,
73113 "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
74114 "repository": "${{ github.repository }}",
75- "ref": "${{ github.ref }}"
115+ "ref": "${{ github.ref }}",
116+ "pr_type": "english"
76117 }
77118 EOF
78119
79120 # Save changed files list
80121 echo "$CHANGED_FILES" > /tmp/changed_files.txt
81122
82- # Analyze file types and sizes
123+ # Analyze file types and sizes for English files that need translation
83124 > /tmp/file_analysis.txt
84125 while IFS= read -r file; do
85- if [ -f "$file" ]; then
126+ if [[ "$file" =~ ^en/.*\.(md|mdx)$ ]] && [ -f "$file" ]; then
86127 SIZE=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo "0")
87128 echo "$file|$SIZE" >> /tmp/file_analysis.txt
88129
@@ -96,96 +137,73 @@ jobs:
96137 fi
97138 done <<< "$CHANGED_FILES"
98139
99- echo "Analysis complete"
100-
101- - name : Check for docs.json structure changes
102- if : steps.analyze.outputs.has_changes == 'true'
103- run : |
104- # Check if docs.json was modified
105- if git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | grep -q '^docs\.json$'; then
106- echo "docs.json structure changes detected"
140+ # Check for docs.json changes
141+ if echo "$CHANGED_FILES" | grep -q '^docs\.json$'; then
107142 echo "true" > /tmp/docs_json_changed.txt
108143
109- # Extract English documentation structure changes
110- python3 - <<'EOF'
111- import json
112- import subprocess
113-
114- def get_docs_structure(sha):
115- try:
116- result = subprocess.run(
117- ["git", "show", f"{sha}:docs.json"],
118- capture_output=True,
119- text=True,
120- check=True
121- )
122- return json.loads(result.stdout)
123- except:
124- return None
125-
126- base_sha = "${{ github.event.pull_request.base.sha }}"
127- head_sha = "${{ github.event.pull_request.head.sha }}"
128-
129- base_docs = get_docs_structure(base_sha)
130- head_docs = get_docs_structure(head_sha)
131-
132- changes = {
133- "structure_changed": base_docs != head_docs if base_docs and head_docs else False,
134- "navigation_modified": False,
135- "languages_affected": []
136- }
144+ # Use PR analyzer's docs.json analysis
145+ cd tools/translate
146+ python3 - <<EOF
147+ import sys
148+ sys.path.append('.')
149+ from pr_analyzer import PRAnalyzer
137150
138- if base_docs and head_docs:
139- # Check navigation changes
140- base_nav = base_docs.get("navigation", {})
141- head_nav = head_docs.get("navigation", {})
142-
143- if base_nav != head_nav:
144- changes["navigation_modified"] = True
145-
146- # Identify affected languages
147- for lang_data in head_nav.get("languages", []):
148- if lang_data.get("language") == "en":
149- changes["languages_affected"] = ["zh-Hans", "jp"]
150- break
151+ analyzer = PRAnalyzer("$BASE_SHA", "$HEAD_SHA")
152+ docs_changes = analyzer.analyze_docs_json_changes()
151153
154+ structure_changes = {
155+ "structure_changed": docs_changes["any_docs_json_changes"],
156+ "navigation_modified": docs_changes["english_section"],
157+ "languages_affected": ["zh-hans", "ja-jp"] if docs_changes["english_section"] else []
158+ }
159+
160+ import json
152161 with open("/tmp/structure_changes.json", "w") as f:
153- json.dump(changes , f, indent=2)
162+ json.dump(structure_changes , f, indent=2)
154163 EOF
155164 else
156- echo "No docs.json changes"
157165 echo "false" > /tmp/docs_json_changed.txt
166+ echo '{"structure_changed": false, "navigation_modified": false, "languages_affected": []}' > /tmp/structure_changes.json
158167 fi
168+
169+ echo "has_changes=true" >> $GITHUB_OUTPUT
170+ echo "Analysis complete"
159171
160172 - name : Validate file paths
161173 if : steps.analyze.outputs.has_changes == 'true'
162174 run : |
163- # Security: Validate all file paths
164- while IFS= read -r file; do
165- # Check for directory traversal attempts
166- if echo "$file" | grep -q '\.\./'; then
167- echo "Error: Invalid file path detected: $file"
168- exit 1
169- fi
170-
171- # Check file extension
172- if ! echo "$file" | grep -qE '\.(md|mdx|json)$'; then
173- echo "Error: Invalid file type: $file"
174- exit 1
175- fi
176-
177- # Check path starts with allowed directories
178- if ! echo "$file" | grep -qE '^(en/|docs\.json$)'; then
179- echo "Error: File outside allowed directories: $file"
180- exit 1
175+ echo "Validating English file paths for translation..."
176+
177+ # Security: Validate English files that will be translated
178+ while IFS='|' read -r file size; do
179+ if [ -n "$file" ]; then
180+ # Check for directory traversal attempts
181+ if echo "$file" | grep -q '\.\./'; then
182+ echo "Error: Invalid file path detected: $file"
183+ exit 1
184+ fi
185+
186+ # Check file extension for English files
187+ if ! echo "$file" | grep -qE '\.(md|mdx)$'; then
188+ echo "Error: Invalid file type for translation: $file"
189+ exit 1
190+ fi
191+
192+ # Check path starts with en/ (only English files need translation)
193+ if ! echo "$file" | grep -qE '^en/'; then
194+ echo "Error: Non-English file in translation list: $file"
195+ exit 1
196+ fi
181197 fi
182- done < /tmp/changed_files .txt
198+ done < /tmp/file_analysis .txt
183199
184- echo "All file paths validated"
200+ echo "All English file paths validated for translation "
185201
186- - name : Create analysis summary
202+ - name : Create analysis summary
187203 if : steps.analyze.outputs.has_changes == 'true'
188204 run : |
205+ echo "Creating analysis summary for English changes..."
206+
189207 # Create a comprehensive analysis summary
190208 python3 - <<'EOF'
191209 import json
@@ -195,7 +213,7 @@ jobs:
195213 with open("/tmp/analysis.json") as f:
196214 analysis = json.load(f)
197215
198- # Load file analysis
216+ # Load file analysis (English files to translate)
199217 files_to_sync = []
200218 with open("/tmp/file_analysis.txt") as f:
201219 for line in f:
@@ -204,14 +222,25 @@ jobs:
204222 files_to_sync.append({
205223 "path": file_path,
206224 "size": int(size),
207- "type": "mdx" if file_path.endswith(".mdx") else "md" if file_path.endswith(".md") else "json"
225+ "type": "mdx" if file_path.endswith(".mdx") else "md"
208226 })
209227
210- # Load structure changes if exists
211- structure_changes = {}
212- if os.path.exists("/tmp/structure_changes.json"):
213- with open("/tmp/structure_changes.json") as f:
214- structure_changes = json.load(f)
228+ # Add docs.json if it changed
229+ with open("/tmp/docs_json_changed.txt") as f:
230+ docs_json_changed = f.read().strip() == "true"
231+
232+ if docs_json_changed:
233+ # Get docs.json size
234+ docs_json_size = os.path.getsize("../../docs.json")
235+ files_to_sync.append({
236+ "path": "docs.json",
237+ "size": docs_json_size,
238+ "type": "json"
239+ })
240+
241+ # Load structure changes
242+ with open("/tmp/structure_changes.json") as f:
243+ structure_changes = json.load(f)
215244
216245 # Create sync plan
217246 sync_plan = {
@@ -226,7 +255,7 @@ jobs:
226255 with open("/tmp/sync_plan.json", "w") as f:
227256 json.dump(sync_plan, f, indent=2)
228257
229- print(f"Sync plan created: {len(files_to_sync)} files to sync ")
258+ print(f"English sync plan created: {len(files_to_sync)} files to translate ")
230259 if structure_changes.get("structure_changed"):
231260 print("Documentation structure changes detected")
232261 EOF
@@ -245,3 +274,25 @@ jobs:
245274 /tmp/structure_changes.json
246275 retention-days : 1
247276
277+ - name : Report mixed PR error
278+ if : failure() && steps.categorize.outputs.error == 'mixed_pr'
279+ uses : actions/github-script@v7
280+ continue-on-error : true
281+ with :
282+ script : |
283+ const errorMessage = `${{ steps.categorize.outputs.error_message }}`;
284+
285+ try {
286+ await github.rest.issues.createComment({
287+ owner: context.repo.owner,
288+ repo: context.repo.repo,
289+ issue_number: context.issue.number,
290+ body: errorMessage
291+ });
292+ console.log('Posted mixed PR error message to PR');
293+ } catch (error) {
294+ console.log('Could not comment on PR:', error.message);
295+ console.log('Error message would have been:');
296+ console.log(errorMessage);
297+ }
298+
0 commit comments