Skip to content

Commit 69e330a

Browse files
committed
improve wf
1 parent a954d21 commit 69e330a

4 files changed

Lines changed: 979 additions & 120 deletions

File tree

.github/workflows/sync_docs_analyze.yml

Lines changed: 144 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ on:
77
- 'docs.json'
88
- 'en/**/*.md'
99
- 'en/**/*.mdx'
10+
- 'ja-jp/**/*.md'
11+
- 'ja-jp/**/*.mdx'
12+
- 'zh-hans/**/*.md'
13+
- 'zh-hans/**/*.mdx'
1014

1115
permissions:
1216
contents: read
@@ -26,10 +30,10 @@ jobs:
2630
with:
2731
python-version: '3.9'
2832

29-
- name: Analyze documentation changes
30-
id: analyze
33+
- name: Categorize and validate PR changes
34+
id: categorize
3135
run: |
32-
echo "Analyzing documentation changes..."
36+
echo "Categorizing PR changes..."
3337
3438
# Get base and head commits
3539
BASE_SHA="${{ github.event.pull_request.base.sha }}"
@@ -38,16 +42,52 @@ jobs:
3842
echo "Base SHA: $BASE_SHA"
3943
echo "Head SHA: $HEAD_SHA"
4044
41-
# Detect changed files
42-
CHANGED_FILES=$(git diff --name-only $BASE_SHA $HEAD_SHA | grep -E '^(docs\.json|en/.*\.(md|mdx))$' || true)
45+
# Run PR analyzer
46+
cd tools/translate
47+
python pr_analyzer.py "$BASE_SHA" "$HEAD_SHA" > /tmp/pr_analysis_output.txt 2>&1
4348
44-
if [ -z "$CHANGED_FILES" ]; then
45-
echo "No documentation changes detected"
46-
echo "has_changes=false" >> $GITHUB_OUTPUT
47-
exit 0
49+
# Parse analyzer output
50+
if [ $? -eq 0 ]; then
51+
# Successful analysis
52+
source /tmp/pr_analysis_output.txt
53+
echo "PR categorization successful"
54+
echo "PR Type: $pr_type"
55+
echo "Should Skip: $should_skip"
56+
57+
# Set GitHub outputs
58+
echo "pr_type=$pr_type" >> $GITHUB_OUTPUT
59+
echo "should_skip=$should_skip" >> $GITHUB_OUTPUT
60+
61+
if [ "$should_skip" = "true" ]; then
62+
if [ "$pr_type" = "translation" ]; then
63+
echo "✅ Translation-only PR detected. Skipping automation (direct review process)."
64+
elif [ "$pr_type" = "none" ]; then
65+
echo "✅ No relevant documentation changes detected. Skipping workflow."
66+
fi
67+
exit 0
68+
fi
69+
else
70+
# Analysis failed - likely mixed PR
71+
echo "PR categorization failed - likely mixed content PR"
72+
ERROR_MESSAGE=$(cat /tmp/pr_analysis_output.txt | grep "error_message=" | cut -d'=' -f2- || echo "Mixed content PR detected")
73+
echo "error=mixed_pr" >> $GITHUB_OUTPUT
74+
echo "error_message<<EOF" >> $GITHUB_OUTPUT
75+
echo "$ERROR_MESSAGE" >> $GITHUB_OUTPUT
76+
echo "EOF" >> $GITHUB_OUTPUT
77+
exit 1
4878
fi
79+
80+
- name: Analyze English changes for translation
81+
if: steps.categorize.outputs.pr_type == 'english'
82+
id: analyze
83+
run: |
84+
echo "Analyzing English changes for automatic translation..."
4985
50-
echo "has_changes=true" >> $GITHUB_OUTPUT
86+
BASE_SHA="${{ github.event.pull_request.base.sha }}"
87+
HEAD_SHA="${{ github.event.pull_request.head.sha }}"
88+
89+
# Get all changed files (not just English ones for file analysis)
90+
CHANGED_FILES=$(git diff --name-only $BASE_SHA $HEAD_SHA)
5191
5292
# Count changes for security limits
5393
FILE_COUNT=$(echo "$CHANGED_FILES" | wc -l)
@@ -72,17 +112,18 @@ jobs:
72112
"file_count": $FILE_COUNT,
73113
"timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
74114
"repository": "${{ github.repository }}",
75-
"ref": "${{ github.ref }}"
115+
"ref": "${{ github.ref }}",
116+
"pr_type": "english"
76117
}
77118
EOF
78119
79120
# Save changed files list
80121
echo "$CHANGED_FILES" > /tmp/changed_files.txt
81122
82-
# Analyze file types and sizes
123+
# Analyze file types and sizes for English files that need translation
83124
> /tmp/file_analysis.txt
84125
while IFS= read -r file; do
85-
if [ -f "$file" ]; then
126+
if [[ "$file" =~ ^en/.*\.(md|mdx)$ ]] && [ -f "$file" ]; then
86127
SIZE=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo "0")
87128
echo "$file|$SIZE" >> /tmp/file_analysis.txt
88129
@@ -96,96 +137,73 @@ jobs:
96137
fi
97138
done <<< "$CHANGED_FILES"
98139
99-
echo "Analysis complete"
100-
101-
- name: Check for docs.json structure changes
102-
if: steps.analyze.outputs.has_changes == 'true'
103-
run: |
104-
# Check if docs.json was modified
105-
if git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | grep -q '^docs\.json$'; then
106-
echo "docs.json structure changes detected"
140+
# Check for docs.json changes
141+
if echo "$CHANGED_FILES" | grep -q '^docs\.json$'; then
107142
echo "true" > /tmp/docs_json_changed.txt
108143
109-
# Extract English documentation structure changes
110-
python3 - <<'EOF'
111-
import json
112-
import subprocess
113-
114-
def get_docs_structure(sha):
115-
try:
116-
result = subprocess.run(
117-
["git", "show", f"{sha}:docs.json"],
118-
capture_output=True,
119-
text=True,
120-
check=True
121-
)
122-
return json.loads(result.stdout)
123-
except:
124-
return None
125-
126-
base_sha = "${{ github.event.pull_request.base.sha }}"
127-
head_sha = "${{ github.event.pull_request.head.sha }}"
128-
129-
base_docs = get_docs_structure(base_sha)
130-
head_docs = get_docs_structure(head_sha)
131-
132-
changes = {
133-
"structure_changed": base_docs != head_docs if base_docs and head_docs else False,
134-
"navigation_modified": False,
135-
"languages_affected": []
136-
}
144+
# Use PR analyzer's docs.json analysis
145+
cd tools/translate
146+
python3 - <<EOF
147+
import sys
148+
sys.path.append('.')
149+
from pr_analyzer import PRAnalyzer
137150
138-
if base_docs and head_docs:
139-
# Check navigation changes
140-
base_nav = base_docs.get("navigation", {})
141-
head_nav = head_docs.get("navigation", {})
142-
143-
if base_nav != head_nav:
144-
changes["navigation_modified"] = True
145-
146-
# Identify affected languages
147-
for lang_data in head_nav.get("languages", []):
148-
if lang_data.get("language") == "en":
149-
changes["languages_affected"] = ["zh-Hans", "jp"]
150-
break
151+
analyzer = PRAnalyzer("$BASE_SHA", "$HEAD_SHA")
152+
docs_changes = analyzer.analyze_docs_json_changes()
151153
154+
structure_changes = {
155+
"structure_changed": docs_changes["any_docs_json_changes"],
156+
"navigation_modified": docs_changes["english_section"],
157+
"languages_affected": ["zh-hans", "ja-jp"] if docs_changes["english_section"] else []
158+
}
159+
160+
import json
152161
with open("/tmp/structure_changes.json", "w") as f:
153-
json.dump(changes, f, indent=2)
162+
json.dump(structure_changes, f, indent=2)
154163
EOF
155164
else
156-
echo "No docs.json changes"
157165
echo "false" > /tmp/docs_json_changed.txt
166+
echo '{"structure_changed": false, "navigation_modified": false, "languages_affected": []}' > /tmp/structure_changes.json
158167
fi
168+
169+
echo "has_changes=true" >> $GITHUB_OUTPUT
170+
echo "Analysis complete"
159171
160172
- name: Validate file paths
161173
if: steps.analyze.outputs.has_changes == 'true'
162174
run: |
163-
# Security: Validate all file paths
164-
while IFS= read -r file; do
165-
# Check for directory traversal attempts
166-
if echo "$file" | grep -q '\.\./'; then
167-
echo "Error: Invalid file path detected: $file"
168-
exit 1
169-
fi
170-
171-
# Check file extension
172-
if ! echo "$file" | grep -qE '\.(md|mdx|json)$'; then
173-
echo "Error: Invalid file type: $file"
174-
exit 1
175-
fi
176-
177-
# Check path starts with allowed directories
178-
if ! echo "$file" | grep -qE '^(en/|docs\.json$)'; then
179-
echo "Error: File outside allowed directories: $file"
180-
exit 1
175+
echo "Validating English file paths for translation..."
176+
177+
# Security: Validate English files that will be translated
178+
while IFS='|' read -r file size; do
179+
if [ -n "$file" ]; then
180+
# Check for directory traversal attempts
181+
if echo "$file" | grep -q '\.\./'; then
182+
echo "Error: Invalid file path detected: $file"
183+
exit 1
184+
fi
185+
186+
# Check file extension for English files
187+
if ! echo "$file" | grep -qE '\.(md|mdx)$'; then
188+
echo "Error: Invalid file type for translation: $file"
189+
exit 1
190+
fi
191+
192+
# Check path starts with en/ (only English files need translation)
193+
if ! echo "$file" | grep -qE '^en/'; then
194+
echo "Error: Non-English file in translation list: $file"
195+
exit 1
196+
fi
181197
fi
182-
done < /tmp/changed_files.txt
198+
done < /tmp/file_analysis.txt
183199
184-
echo "All file paths validated"
200+
echo "All English file paths validated for translation"
185201
186-
- name: Create analysis summary
202+
- name: Create analysis summary
187203
if: steps.analyze.outputs.has_changes == 'true'
188204
run: |
205+
echo "Creating analysis summary for English changes..."
206+
189207
# Create a comprehensive analysis summary
190208
python3 - <<'EOF'
191209
import json
@@ -195,7 +213,7 @@ jobs:
195213
with open("/tmp/analysis.json") as f:
196214
analysis = json.load(f)
197215
198-
# Load file analysis
216+
# Load file analysis (English files to translate)
199217
files_to_sync = []
200218
with open("/tmp/file_analysis.txt") as f:
201219
for line in f:
@@ -204,14 +222,25 @@ jobs:
204222
files_to_sync.append({
205223
"path": file_path,
206224
"size": int(size),
207-
"type": "mdx" if file_path.endswith(".mdx") else "md" if file_path.endswith(".md") else "json"
225+
"type": "mdx" if file_path.endswith(".mdx") else "md"
208226
})
209227
210-
# Load structure changes if exists
211-
structure_changes = {}
212-
if os.path.exists("/tmp/structure_changes.json"):
213-
with open("/tmp/structure_changes.json") as f:
214-
structure_changes = json.load(f)
228+
# Add docs.json if it changed
229+
with open("/tmp/docs_json_changed.txt") as f:
230+
docs_json_changed = f.read().strip() == "true"
231+
232+
if docs_json_changed:
233+
# Get docs.json size
234+
docs_json_size = os.path.getsize("../../docs.json")
235+
files_to_sync.append({
236+
"path": "docs.json",
237+
"size": docs_json_size,
238+
"type": "json"
239+
})
240+
241+
# Load structure changes
242+
with open("/tmp/structure_changes.json") as f:
243+
structure_changes = json.load(f)
215244
216245
# Create sync plan
217246
sync_plan = {
@@ -226,7 +255,7 @@ jobs:
226255
with open("/tmp/sync_plan.json", "w") as f:
227256
json.dump(sync_plan, f, indent=2)
228257
229-
print(f"Sync plan created: {len(files_to_sync)} files to sync")
258+
print(f"English sync plan created: {len(files_to_sync)} files to translate")
230259
if structure_changes.get("structure_changed"):
231260
print("Documentation structure changes detected")
232261
EOF
@@ -245,3 +274,25 @@ jobs:
245274
/tmp/structure_changes.json
246275
retention-days: 1
247276

277+
- name: Report mixed PR error
278+
if: failure() && steps.categorize.outputs.error == 'mixed_pr'
279+
uses: actions/github-script@v7
280+
continue-on-error: true
281+
with:
282+
script: |
283+
const errorMessage = `${{ steps.categorize.outputs.error_message }}`;
284+
285+
try {
286+
await github.rest.issues.createComment({
287+
owner: context.repo.owner,
288+
repo: context.repo.repo,
289+
issue_number: context.issue.number,
290+
body: errorMessage
291+
});
292+
console.log('Posted mixed PR error message to PR');
293+
} catch (error) {
294+
console.log('Could not comment on PR:', error.message);
295+
console.log('Error message would have been:');
296+
console.log(errorMessage);
297+
}
298+

0 commit comments

Comments
 (0)