-
Notifications
You must be signed in to change notification settings - Fork 1
313 lines (258 loc) · 12.2 KB
/
impl-review.yml
File metadata and controls
313 lines (258 loc) · 12.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
name: "Impl: Review"
run-name: "Review: PR #${{ inputs.pr_number }}"
# AI quality review for implementation PRs
# Triggered by impl-generate.yml after PR creation
on:
workflow_dispatch:
inputs:
pr_number:
description: 'PR number to review'
required: true
type: string
jobs:
review:
runs-on: ubuntu-latest
permissions:
contents: write # Needed for pushing quality score to PR branch
pull-requests: write
issues: write
id-token: write
actions: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Extract PR info
id: pr
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ inputs.pr_number }}
run: |
PR_DATA=$(gh pr view "$PR_NUMBER" --json headRefName,headRefOid,body)
HEAD_REF=$(echo "$PR_DATA" | jq -r '.headRefName')
HEAD_SHA=$(echo "$PR_DATA" | jq -r '.headRefOid')
BODY=$(echo "$PR_DATA" | jq -r '.body')
# Extract spec-id and library from branch: implementation/{spec-id}/{library}
SPEC_ID=$(echo "$HEAD_REF" | cut -d'/' -f2)
LIBRARY=$(echo "$HEAD_REF" | cut -d'/' -f3)
# Extract issue number from PR body
ISSUE_NUMBER=$(echo "$BODY" | grep -oP '\*\*Parent Issue:\*\* #\K\d+' | head -1 || echo "")
echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
echo "specification_id=$SPEC_ID" >> $GITHUB_OUTPUT
echo "library=$LIBRARY" >> $GITHUB_OUTPUT
echo "branch=$HEAD_REF" >> $GITHUB_OUTPUT
echo "head_sha=$HEAD_SHA" >> $GITHUB_OUTPUT
echo "issue_number=$ISSUE_NUMBER" >> $GITHUB_OUTPUT
echo "::notice::Reviewing PR #$PR_NUMBER for $LIBRARY implementation of $SPEC_ID (branch: $HEAD_REF)"
- name: Checkout PR code
run: |
git fetch origin ${{ steps.pr.outputs.head_sha }}
git checkout ${{ steps.pr.outputs.head_sha }}
- name: Check attempt count
id: attempts
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ steps.pr.outputs.pr_number }}
run: |
LABELS=$(gh pr view "$PR_NUMBER" --json labels -q '.labels[].name' 2>/dev/null || echo "")
if echo "$LABELS" | grep -q "ai-attempt-3"; then
echo "count=3" >> $GITHUB_OUTPUT
echo "display=3" >> $GITHUB_OUTPUT
elif echo "$LABELS" | grep -q "ai-attempt-2"; then
echo "count=2" >> $GITHUB_OUTPUT
echo "display=3" >> $GITHUB_OUTPUT
elif echo "$LABELS" | grep -q "ai-attempt-1"; then
echo "count=1" >> $GITHUB_OUTPUT
echo "display=2" >> $GITHUB_OUTPUT
else
echo "count=0" >> $GITHUB_OUTPUT
echo "display=1" >> $GITHUB_OUTPUT
fi
- name: Setup GCS authentication
id: gcs
continue-on-error: true
uses: google-github-actions/auth@v2
with:
credentials_json: ${{ secrets.GCS_CREDENTIALS }}
- name: Setup gcloud CLI
if: steps.gcs.outcome == 'success'
uses: google-github-actions/setup-gcloud@v2
- name: Download plot images from staging
if: steps.gcs.outcome == 'success'
env:
SPEC_ID: ${{ steps.pr.outputs.specification_id }}
LIBRARY: ${{ steps.pr.outputs.library }}
run: |
mkdir -p plot_images
gsutil -m cp "gs://pyplots-images/staging/${SPEC_ID}/${LIBRARY}/*" plot_images/ 2>/dev/null || echo "No images found"
ls -la plot_images/ 2>/dev/null || echo "Empty"
- name: React with eyes emoji
if: steps.attempts.outputs.count != '3'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ steps.pr.outputs.pr_number }}
run: |
gh api "repos/${{ github.repository }}/issues/$PR_NUMBER/reactions" -f content=eyes
- name: Run AI Quality Review
id: review
if: steps.attempts.outputs.count != '3'
continue-on-error: true
timeout-minutes: 30
uses: anthropics/claude-code-action@v1
with:
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
claude_args: "--model opus"
prompt: |
## Task: AI Quality Review for **${{ steps.pr.outputs.library }}** (Attempt ${{ steps.attempts.outputs.display }}/3)
Review the implementation and evaluate if it meets quality standards.
### Your Task
1. **Read the specification**: `plots/${{ steps.pr.outputs.specification_id }}/specification.md`
2. **Read the implementation**:
`plots/${{ steps.pr.outputs.specification_id }}/implementations/${{ steps.pr.outputs.library }}.py`
3. **Read library rules**: `prompts/library/${{ steps.pr.outputs.library }}.md`
4. **View plot images** in `plot_images/` directory
- Use vision to analyze each image
- Compare with spec requirements
5. **Evaluate against quality criteria** from `prompts/quality-criteria.md`
6. **Post verdict as PR comment** on PR #${{ steps.pr.outputs.pr_number }}:
```markdown
## AI Review - Attempt ${{ steps.attempts.outputs.display }}/3
### Quality Score: XX/100
### Criteria Checklist
- [x] VQ-001: Axes labeled correctly
- [x] VQ-002: Grid is subtle
- [ ] VQ-003: Elements clear
...
### Issues Found
1. **Issue**: Description
2. **Issue**: Description
### AI Feedback
> Specific suggestions for improvement
### Verdict: APPROVED / REJECTED
```
7. **Save score to file**:
```bash
echo "XX" > quality_score.txt
```
8. **DO NOT add ai-approved or ai-rejected labels** - the workflow will add them after updating metadata.
- name: Extract quality score
id: score
if: steps.attempts.outputs.count != '3' && steps.review.conclusion == 'success'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUM: ${{ steps.pr.outputs.pr_number }}
run: |
if [ -f "quality_score.txt" ]; then
SCORE=$(cat quality_score.txt | tr -d '[:space:]')
else
SCORE=$(gh pr view "$PR_NUM" --json comments -q '.comments[-1].body' | grep -oP 'Score: \K\d+' | head -1 || echo "0")
fi
echo "score=$SCORE" >> $GITHUB_OUTPUT
- name: Add quality score label
if: steps.attempts.outputs.count != '3' && steps.review.conclusion == 'success' && steps.score.outputs.score != '0'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUM: ${{ steps.pr.outputs.pr_number }}
SCORE: ${{ steps.score.outputs.score }}
run: |
LABEL="quality:${SCORE}"
gh label create "$LABEL" --color "0e8a16" --description "Quality score ${SCORE}/100" 2>/dev/null || true
gh pr edit "$PR_NUM" --add-label "$LABEL"
- name: Update quality score in metadata
if: steps.attempts.outputs.count != '3' && steps.review.conclusion == 'success' && steps.score.outputs.score != '0'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SPEC_ID: ${{ steps.pr.outputs.specification_id }}
LIBRARY: ${{ steps.pr.outputs.library }}
SCORE: ${{ steps.score.outputs.score }}
BRANCH: ${{ steps.pr.outputs.branch }}
run: |
METADATA_FILE="plots/${SPEC_ID}/metadata/${LIBRARY}.yaml"
# Configure git auth and checkout the PR branch
git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/${{ github.repository }}.git"
git fetch origin "$BRANCH"
git checkout -B "$BRANCH" "origin/$BRANCH"
# Update quality_score in metadata file
if [ -f "$METADATA_FILE" ]; then
sed -i "s/quality_score: null.*/quality_score: ${SCORE}/" "$METADATA_FILE"
sed -i "s/quality_score: [0-9]\+.*/quality_score: ${SCORE}/" "$METADATA_FILE"
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add "$METADATA_FILE"
if ! git diff --cached --quiet; then
git commit -m "chore(${LIBRARY}): set quality score ${SCORE} for ${SPEC_ID}"
git push origin "$BRANCH"
echo "::notice::Quality score ${SCORE} committed to ${BRANCH}"
fi
fi
- name: Handle review failure
if: steps.attempts.outputs.count != '3' && steps.review.conclusion == 'failure'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUM: ${{ steps.pr.outputs.pr_number }}
run: |
gh pr edit "$PR_NUM" --add-label "ai-review-failed"
gh pr comment "$PR_NUM" --body "## :warning: AI Review Failed
The AI review action failed or timed out.
**Options:**
1. Re-run the workflow manually
2. Request manual human review
---
:robot: *[impl-review](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})*"
- name: Add verdict label and take action
if: steps.attempts.outputs.count != '3' && steps.review.conclusion == 'success' && steps.score.outputs.score != '0'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUM: ${{ steps.pr.outputs.pr_number }}
SPEC_ID: ${{ steps.pr.outputs.specification_id }}
LIBRARY: ${{ steps.pr.outputs.library }}
SCORE: ${{ steps.score.outputs.score }}
ATTEMPT: ${{ steps.attempts.outputs.display }}
run: |
# Add ai-approved or ai-rejected label based on score
if [ "$SCORE" -ge 85 ]; then
gh pr edit "$PR_NUM" --add-label "ai-approved"
echo "::notice::Added ai-approved label (score $SCORE >= 85)"
else
gh pr edit "$PR_NUM" --add-label "ai-rejected"
echo "::notice::Added ai-rejected label (score $SCORE < 85)"
fi
# Now check labels and take action
HAS_APPROVED=$(gh pr view "$PR_NUM" --json labels -q '[.labels[].name] | any(. == "ai-approved")' || echo "false")
HAS_REJECTED=$(gh pr view "$PR_NUM" --json labels -q '[.labels[].name] | any(. == "ai-rejected")' || echo "false")
if [[ "$HAS_APPROVED" == "true" ]]; then
echo "Triggering impl-merge.yml for approved PR"
gh workflow run impl-merge.yml -f pr_number="$PR_NUM"
echo "::notice::PR approved. Triggered impl-merge.yml"
elif [[ "$HAS_REJECTED" == "true" ]]; then
echo "Triggering impl-repair.yml for rejected PR"
gh pr edit "$PR_NUM" --add-label "ai-attempt-${ATTEMPT}" 2>/dev/null || true
gh workflow run impl-repair.yml \
-f pr_number="$PR_NUM" \
-f specification_id="$SPEC_ID" \
-f library="$LIBRARY" \
-f attempt="$ATTEMPT"
fi
- name: Mark as not-feasible after 3 attempts
if: steps.attempts.outputs.count == '3'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUM: ${{ steps.pr.outputs.pr_number }}
LIBRARY: ${{ steps.pr.outputs.library }}
SPEC_ID: ${{ steps.pr.outputs.specification_id }}
ISSUE_NUMBER: ${{ steps.pr.outputs.issue_number }}
run: |
gh pr edit "$PR_NUM" --add-label "not-feasible"
gh pr comment "$PR_NUM" --body "## AI Review - Final Status
### Status: Not Feasible
AI Review failed after **3 attempts**. This ${LIBRARY} implementation could not meet quality standards.
**Options:**
1. Manual review and fix
2. Mark this library as unsupported for this plot type
---
:robot: *[impl-review](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})*"
if [ -n "$ISSUE_NUMBER" ]; then
gh issue edit "$ISSUE_NUMBER" --add-label "impl:${LIBRARY}:failed" 2>/dev/null || true
gh issue comment "$ISSUE_NUMBER" --body "**${LIBRARY}** implementation failed after 3 AI review attempts. See PR #${PR_NUM}."
fi