11name : " Bot: AI Review"
2+ run-name : " AI Review: ${{ github.event.workflow_run.head_branch || github.event.pull_request.head.ref }}"
23
34on :
45 workflow_run :
5- workflows : ["Generate Plot Previews "]
6+ workflows : ["Gen: Preview Images "]
67 types : [completed]
78
89jobs :
@@ -40,11 +41,16 @@ jobs:
4041 fi
4142
4243 PR_NUMBER=$(jq -r '.pr_number' plot_metadata.json)
43- ISSUE_NUMBER=$(jq -r '.issue_number' plot_metadata.json)
44+ ISSUE_NUMBER=$(jq -r '.issue_number // empty' plot_metadata.json)
45+ SUB_ISSUE_NUMBER=$(jq -r '.sub_issue_number // empty' plot_metadata.json)
4446 TIMESTAMP=$(jq -r '.timestamp' plot_metadata.json)
47+ LIBRARY=$(jq -r '.library // empty' plot_metadata.json)
48+
4549 echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
4650 echo "issue_number=$ISSUE_NUMBER" >> $GITHUB_OUTPUT
51+ echo "sub_issue_number=$SUB_ISSUE_NUMBER" >> $GITHUB_OUTPUT
4752 echo "timestamp=$TIMESTAMP" >> $GITHUB_OUTPUT
53+ echo "library=$LIBRARY" >> $GITHUB_OUTPUT
4854 echo "skip=false" >> $GITHUB_OUTPUT
4955
5056 - name : Get PR details
@@ -53,21 +59,38 @@ jobs:
5359 env :
5460 GH_TOKEN : ${{ secrets.GITHUB_TOKEN }}
5561 run : |
56- PR_DATA=$(gh pr view ${{ steps.metadata.outputs.pr_number }} --json headRefName,number)
62+ PR_DATA=$(gh pr view ${{ steps.metadata.outputs.pr_number }} --json headRefName,number,body )
5763 HEAD_REF=$(echo "$PR_DATA" | jq -r '.headRefName')
64+ PR_BODY=$(echo "$PR_DATA" | jq -r '.body')
5865
59- # Check if auto/ branch
66+ # Check if auto/ branch (now includes library: auto/scatter-basic/matplotlib)
6067 if [[ ! "$HEAD_REF" =~ ^auto/ ]]; then
6168 echo "Not an auto/ branch, skipping AI review"
6269 echo "skip=true" >> $GITHUB_OUTPUT
6370 exit 0
6471 fi
6572
66- # Extract spec-id from branch name (auto/scatter-basic-005)
67- SPEC_ID=$(echo "$HEAD_REF" | sed 's|auto/||')
73+ # Extract spec-id and library from branch name
74+ # Format: auto/{spec-id}/{library}
75+ SPEC_ID=$(echo "$HEAD_REF" | cut -d'/' -f2)
76+ LIBRARY=$(echo "$HEAD_REF" | cut -d'/' -f3)
77+
78+ # If library not in branch, try from metadata
79+ if [ -z "$LIBRARY" ]; then
80+ LIBRARY="${{ steps.metadata.outputs.library }}"
81+ fi
82+
83+ # Extract sub-issue from PR body if not in metadata
84+ SUB_ISSUE="${{ steps.metadata.outputs.sub_issue_number }}"
85+ if [ -z "$SUB_ISSUE" ]; then
86+ SUB_ISSUE=$(echo "$PR_BODY" | grep -oP 'Sub-Issue: #\K\d+' | head -1 || echo "")
87+ fi
88+
6889 echo "spec_id=$SPEC_ID" >> $GITHUB_OUTPUT
90+ echo "library=$LIBRARY" >> $GITHUB_OUTPUT
91+ echo "sub_issue=$SUB_ISSUE" >> $GITHUB_OUTPUT
6992 echo "skip=false" >> $GITHUB_OUTPUT
70- echo "Spec ID: $SPEC_ID"
93+ echo "Spec ID: $SPEC_ID, Library: $LIBRARY, Sub-Issue: #$SUB_ISSUE "
7194
7295 - name : Setup Google Cloud authentication
7396 if : steps.pr.outputs.skip != 'true'
@@ -85,15 +108,15 @@ jobs:
85108 if : steps.pr.outputs.skip != 'true' && steps.gcs_auth.outcome == 'success'
86109 run : |
87110 SPEC_ID="${{ steps.pr.outputs.spec_id }}"
111+ LIBRARY="${{ steps.pr.outputs.library }}"
88112 TIMESTAMP="${{ steps.metadata.outputs.timestamp }}"
89113
90114 mkdir -p plot_images
91115
92- # Download plot images for this spec from all libraries
93- gsutil -m cp -r "gs://${{ secrets.GCS_BUCKET }}/plots/${SPEC_ID}/**/*v${TIMESTAMP}*.png" plot_images/ 2>/dev/null || echo "No plot images found"
116+ # Download plot images for this specific library
117+ gsutil -m cp -r "gs://${{ secrets.GCS_BUCKET }}/plots/${SPEC_ID}/${LIBRARY}/ **/*v${TIMESTAMP}*.png" plot_images/ 2>/dev/null || echo "No plot images found"
94118
95- # List downloaded images
96- echo "📊 Downloaded plot images:"
119+ echo "Downloaded plot images:"
97120 find plot_images -name "*.png" -type f 2>/dev/null || echo "No images found"
98121
99122 - name : Check attempt count
@@ -114,6 +137,15 @@ jobs:
114137 echo "count=0" >> $GITHUB_OUTPUT
115138 fi
116139
140+ - name : Update sub-issue label to reviewing
141+ if : steps.pr.outputs.skip != 'true' && steps.pr.outputs.sub_issue != ''
142+ env :
143+ GH_TOKEN : ${{ secrets.GITHUB_TOKEN }}
144+ run : |
145+ gh issue edit ${{ steps.pr.outputs.sub_issue }} \
146+ --remove-label "testing" \
147+ --add-label "reviewing" 2>/dev/null || true
148+
117149 - name : React with eyes emoji
118150 if : steps.pr.outputs.skip != 'true' && steps.attempts.outputs.count != '3'
119151 env :
@@ -128,96 +160,116 @@ jobs:
128160 with :
129161 script : |
130162 const specId = '${{ steps.pr.outputs.spec_id }}';
163+ const library = '${{ steps.pr.outputs.library }}';
131164 const attempt = parseInt('${{ steps.attempts.outputs.count }}') + 1;
132165 const prNumber = ${{ steps.metadata.outputs.pr_number }};
133- const issueNumber = '${{ steps.metadata.outputs.issue_number }}';
166+ const subIssueNumber = '${{ steps.pr.outputs.sub_issue }}';
167+ const mainIssueNumber = '${{ steps.metadata.outputs.issue_number }}';
134168
135169 await github.rest.issues.createComment({
136170 owner: context.repo.owner,
137171 repo: context.repo.repo,
138172 issue_number: prNumber,
139173 body: `@claude
140174
141- ## 🎯 Task: AI Quality Review (Attempt ${attempt}/3)
175+ ## Task: AI Quality Review for **${library}** (Attempt ${attempt}/3)
142176
143- Tests passed and preview images are ready. Evaluate if the implementation matches the specification.
177+ Tests passed and preview images are ready. Evaluate if the **${library}** implementation matches the specification.
144178
145179 ### Your Task
146180
147181 1. **Read the spec file**: \`specs/${specId}.md\`
148182 - Note all quality criteria listed
149183 - Understand the expected visual output
150184
151- 2. **Read the implementation files**:
152- - \`plots/matplotlib/*/${specId}/default.py\`
153- - \`plots/seaborn/*/${specId}/default.py\`
185+ 2. **Read the ${library} implementation**:
186+ - \`plots/${library}/*/${specId}/default.py\`
187+
188+ 3. **Read library-specific rules**:
189+ - \`prompts/library/${library}.md\`
154190
155- 3 . **View the plot images** in \`plot_images/\` directory
191+ 4 . **View the plot images** in \`plot_images/\` directory
156192 - Use your vision capabilities to analyze each image
157193 - Compare with the spec requirements
158194
159- 4 . **Evaluate against spec requirements ** and create a checklist
195+ 5 . **Evaluate against quality criteria ** from \`prompts/quality-criteria.md\`
160196
161- 5 . **IMPORTANT: Post your verdict to Issue #${issueNumber }** (NOT the PR!) using this format:
197+ 6 . **Post your verdict to Sub- Issue #${subIssueNumber }** using this EXACT format:
162198
163199 \`\`\`markdown
164- ## 🤖 AI Review (PR #${prNumber})
200+ ## AI Review - Attempt ${attempt}/3
165201
166- ### Status: ✅ Approved / ❌ Rejected
202+ ### Quality Evaluation
203+ | Evaluator | Score | Verdict |
204+ |-----------|-------|---------|
205+ | Claude | XX/100 | approve/reject |
167206
168- ### Evaluation
169- | Criterion | Score |
170- |-----------|-------|
171- | Code Quality | X/10 |
172- | Spec Conformity | X/10 |
173- | Visual Quality | X/10 |
207+ ### Criteria Checklist
208+ - [x] VQ-001: Axes labeled correctly
209+ - [x] VQ-002: Grid is subtle
210+ - [ ] VQ-003: Elements clear ← Issue here
211+ - [x] CQ-001: Type hints present
212+ ...
174213
175- ### Spec Requirements
176- - [x] Requirement 1 from spec
177- - [x] Requirement 2 from spec
178- - [ ] Requirement 3 ← Issue here
214+ ### Issues Found
215+ 1. **VQ-003 FAILED**: Legend overlaps with data points
216+ 2. **CQ-002 PARTIAL**: Docstring missing return type
179217
180- ### Improvement Suggestions ( for future updates)
181- - [ ] Suggestion 1 (even if approved, note areas for improvement)
182- - [ ] Suggestion 2
218+ ### AI Feedback for Next Attempt
219+ > Move legend outside plot area with \\\`bbox_to_anchor=(1.05, 1)\\\`
220+ > Add return type to docstring
183221
184- ### Critical Issues (only if rejected)
185- - Issue 1 that must be fixed
186- - Issue 2 that must be fixed
222+ ### Verdict: APPROVED / REJECTED
187223 \`\`\`
188224
189- 6 . **Take action based on result**:
190- - **✅ Approved ** (score >= 7/10 on all criteria ):
225+ 7 . **Take action based on result**:
226+ - **APPROVED ** (score >= 85 ):
191227 - Run: \`gh pr edit ${prNumber} --add-label ai-approved\`
192- - Post review to Issue #${issueNumber} (include suggestions for future updates)
193- - **❌ Rejected ** (any score < 7/10 ):
228+ - Run: \`gh issue edit ${subIssueNumber} --remove-label reviewing --add-label ai-approved\`
229+ - **REJECTED ** (score < 85 ):
194230 - Run: \`gh pr edit ${prNumber} --add-label ai-rejected\`
195- - Post review to Issue #${issueNumber} (include specific fixes needed)
231+ - Run: \`gh issue edit ${subIssueNumber} --remove-label reviewing --add-label ai-rejected\`
196232
197- **Remember:** The Issue is the permanent knowledge base. Include all feedback there, even if approved!`
233+ **IMPORTANT:**
234+ - This is a **${library}-only** review - focus only on this library
235+ - Post feedback to **Sub-Issue #${subIssueNumber}**, NOT the main issue
236+ - Include the generated code in your review comment for documentation`
198237 });
199238
200239 - name : Mark as failed after 3 attempts
201240 if : steps.pr.outputs.skip != 'true' && steps.attempts.outputs.count == '3'
202241 env :
203242 GH_TOKEN : ${{ secrets.GITHUB_TOKEN }}
204243 run : |
205- ISSUE_NUM ="${{ steps.metadata .outputs.issue_number }}"
244+ SUB_ISSUE ="${{ steps.pr .outputs.sub_issue }}"
206245 PR_NUM="${{ steps.metadata.outputs.pr_number }}"
246+ LIBRARY="${{ steps.pr.outputs.library }}"
247+ SPEC_ID="${{ steps.pr.outputs.spec_id }}"
207248
249+ # Update PR labels
208250 gh pr edit "$PR_NUM" --add-label "ai-rejected"
209251
210- # Post to Issue
211- gh issue comment "$ISSUE_NUM" --body "## 🔍 Quality Check
252+ # Update sub-issue
253+ if [ -n "$SUB_ISSUE" ]; then
254+ gh issue edit "$SUB_ISSUE" \
255+ --remove-label "reviewing,ai-rejected" \
256+ --add-label "not-feasible"
257+
258+ # Post final status to sub-issue
259+ gh issue comment "$SUB_ISSUE" --body "## AI Review - Final Status
212260
213- ### Status: ❌ Rejected
261+ ### Status: Not Feasible
214262
215- AI Review failed after 3 attempts. Manual review required .
263+ AI Review failed after ** 3 attempts**. This ${LIBRARY} implementation for \`${SPEC_ID}\` could not meet quality standards .
216264
217- **PR:** #$PR_NUM
265+ **Options:**
266+ 1. Manual review and fix
267+ 2. Wait for improved AI capabilities
268+ 3. Mark this library as unsupported for this plot type
218269
219- ---
220- 🤖 *Automated quality check*"
270+ ---
271+ :robot: *Automated quality check*"
272+ fi
221273
222274 # Post to PR
223- gh pr comment "$PR_NUM" --body "❌ AI Review failed after 3 attempts. Manual review required ."
275+ gh pr comment "$PR_NUM" --body "AI Review failed after 3 attempts. See sub-issue #$SUB_ISSUE for details ."
0 commit comments