Skip to content

Commit cf1fb42

Browse files
committed
fix: improve robustness and error handling in automation workflows
Addresses critical review feedback with 5 key improvements: 1. GCS Optional: Make GCS upload optional with proper error handling - Add continue-on-error and conditional checks - Workflows work without GCS credentials 2. Spec ID Extraction: Case-insensitive, flexible digit count - Support 3-4 digits (001-9999) - Convert to lowercase automatically - Validate spec file existence 3. Code Syntax Validation: AST-based validation before saving - New extract_and_validate_code() function - Catches syntax errors early with retry - Clear error messages 4. Path Parsing: Regex-based validation with error handling - Replace fragile cut-based parsing - Validate format: plots/{lib}/{type}/{spec}/{variant}.py - Skip invalid files with warning 5. API Error Handling: Retry with exponential backoff - Retry on RateLimitError and APIConnectionError - 3 retries with 2s, 4s, 8s delays - Don't retry on other API errors Ready for initial testing without GCS setup.
1 parent 33138b2 commit cf1fb42

File tree

3 files changed

+135
-25
lines changed

3 files changed

+135
-25
lines changed

.github/workflows/spec-to-code.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,26 @@ jobs:
2020
ISSUE_TITLE: ${{ github.event.issue.title }}
2121
run: |
2222
# Try to extract spec ID from title (format: "scatter-basic-001: Title" or just "scatter-basic-001")
23-
SPEC_ID=$(echo "$ISSUE_TITLE" | grep -oP '^[a-z]+-[a-z]+-\d{3}' || echo "")
23+
# Case-insensitive, allows 3-4 digits, converts to lowercase
24+
SPEC_ID=$(echo "$ISSUE_TITLE" | grep -oiP '^[a-z]+-[a-z]+-\d{3,4}' | tr '[:upper:]' '[:lower:]' || echo "")
2425
2526
if [ -z "$SPEC_ID" ]; then
2627
# Try to find spec ID in body (look for markdown heading like "# scatter-basic-001:")
27-
SPEC_ID=$(echo "$ISSUE_BODY" | grep -oP '^#\s*\K[a-z]+-[a-z]+-\d{3}' || echo "")
28+
SPEC_ID=$(echo "$ISSUE_BODY" | grep -oiP '^#\s*\K[a-z]+-[a-z]+-\d{3,4}' | tr '[:upper:]' '[:lower:]' || echo "")
2829
fi
2930
3031
if [ -z "$SPEC_ID" ]; then
3132
echo "❌ Could not extract spec ID from issue"
33+
echo "Expected format: {type}-{variant}-{001-9999}"
3234
exit 1
3335
fi
3436
37+
# Validate spec file exists
38+
if [ ! -f "specs/${SPEC_ID}.md" ]; then
39+
echo "⚠️ Warning: Spec file specs/${SPEC_ID}.md does not exist"
40+
echo "Please ensure the spec file is created before code generation"
41+
fi
42+
3543
echo "spec_id=$SPEC_ID" >> $GITHUB_OUTPUT
3644
echo "✅ Extracted spec ID: $SPEC_ID"
3745

.github/workflows/test-and-preview.yml

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,16 @@ jobs:
100100
101101
# Extract spec_id, library, variant from path
102102
# Format: plots/{library}/{plot_type}/{spec_id}/{variant}.py
103-
LIBRARY=$(echo "$file" | cut -d'/' -f2)
104-
SPEC_ID=$(echo "$file" | cut -d'/' -f4)
105-
VARIANT=$(basename "$file" .py)
103+
if [[ $file =~ ^plots/([^/]+)/([^/]+)/([^/]+)/([^/]+)\.py$ ]]; then
104+
LIBRARY="${BASH_REMATCH[1]}"
105+
PLOT_TYPE="${BASH_REMATCH[2]}"
106+
SPEC_ID="${BASH_REMATCH[3]}"
107+
VARIANT="${BASH_REMATCH[4]}"
108+
else
109+
echo "⚠️ Invalid file path format: $file"
110+
echo "Expected: plots/{library}/{plot_type}/{spec_id}/{variant}.py"
111+
continue
112+
fi
106113
107114
# Run the plot script to generate image
108115
OUTPUT_FILE="preview_outputs/${SPEC_ID}_${LIBRARY}_${VARIANT}.png"
@@ -127,20 +134,23 @@ jobs:
127134
128135
- name: Setup Google Cloud authentication
129136
if: steps.changed_plots.outputs.changed_files != ''
137+
id: gcs_auth
138+
continue-on-error: true
130139
uses: google-github-actions/auth@v2
131140
with:
132141
credentials_json: ${{ secrets.GCS_CREDENTIALS }}
133142

134143
- name: Upload previews to GCS
135-
if: steps.changed_plots.outputs.changed_files != ''
144+
if: steps.changed_plots.outputs.changed_files != '' && steps.gcs_auth.outcome == 'success'
145+
continue-on-error: true
136146
uses: google-github-actions/upload-cloud-storage@v2
137147
with:
138148
path: preview_outputs
139149
destination: ${{ secrets.GCS_BUCKET }}/previews/pr-${{ github.event.pull_request.number }}
140150
process_gcloudignore: false
141151

142152
- name: Generate preview URLs
143-
if: steps.changed_plots.outputs.changed_files != ''
153+
if: steps.changed_plots.outputs.changed_files != '' && steps.gcs_auth.outcome == 'success'
144154
id: preview_urls
145155
run: |
146156
BUCKET="${{ secrets.GCS_BUCKET }}"
@@ -165,7 +175,7 @@ jobs:
165175
echo -e "$PREVIEW_TABLE" > preview_table.md
166176
167177
- name: Comment on PR with previews
168-
if: steps.changed_plots.outputs.changed_files != ''
178+
if: steps.changed_plots.outputs.changed_files != '' && steps.gcs_auth.outcome == 'success'
169179
uses: actions/github-script@v7
170180
with:
171181
script: |

automation/generators/plot_generator.py

Lines changed: 109 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,102 @@
55
Generates plot implementations from specifications using Claude with versioned rules.
66
"""
77

8+
import ast
89
import os
910
import sys
11+
import time
1012
from pathlib import Path
11-
from typing import Literal
13+
from typing import Literal, Callable, TypeVar
1214
import anthropic
15+
from anthropic import APIError, RateLimitError, APIConnectionError
1316

1417

1518
LibraryType = Literal["matplotlib", "seaborn", "plotly", "bokeh", "altair"]
1619

1720

21+
def extract_and_validate_code(response_text: str) -> str:
22+
"""
23+
Extract Python code from Claude response and validate syntax.
24+
25+
Args:
26+
response_text: Raw response from Claude API
27+
28+
Returns:
29+
Validated Python code
30+
31+
Raises:
32+
ValueError: If code cannot be extracted or has syntax errors
33+
"""
34+
code = response_text.strip()
35+
36+
# Extract code if wrapped in markdown
37+
if "```python" in code:
38+
code = code.split("```python")[1].split("```")[0].strip()
39+
elif "```" in code:
40+
code = code.split("```")[1].split("```")[0].strip()
41+
42+
if not code:
43+
raise ValueError("No code could be extracted from response")
44+
45+
# Validate Python syntax
46+
try:
47+
ast.parse(code)
48+
except SyntaxError as e:
49+
raise ValueError(f"Generated code has syntax errors: {e}")
50+
51+
return code
52+
53+
54+
T = TypeVar('T')
55+
56+
57+
def retry_with_backoff(
58+
func: Callable[[], T],
59+
max_retries: int = 3,
60+
initial_delay: float = 2.0,
61+
backoff_factor: float = 2.0
62+
) -> T:
63+
"""
64+
Retry a function with exponential backoff.
65+
66+
Args:
67+
func: Function to retry
68+
max_retries: Maximum number of retry attempts
69+
initial_delay: Initial delay in seconds
70+
backoff_factor: Multiplier for delay after each retry
71+
72+
Returns:
73+
Result from successful function call
74+
75+
Raises:
76+
Last exception if all retries fail
77+
"""
78+
delay = initial_delay
79+
last_exception = None
80+
81+
for attempt in range(max_retries + 1):
82+
try:
83+
return func()
84+
except (RateLimitError, APIConnectionError) as e:
85+
last_exception = e
86+
if attempt < max_retries:
87+
print(f"⚠️ API error: {type(e).__name__}. Retrying in {delay}s... (attempt {attempt + 1}/{max_retries})")
88+
time.sleep(delay)
89+
delay *= backoff_factor
90+
else:
91+
print(f"❌ Max retries ({max_retries}) exceeded")
92+
raise
93+
except APIError as e:
94+
# For other API errors, don't retry
95+
print(f"❌ API error: {e}")
96+
raise
97+
98+
# Should never reach here, but for type checker
99+
if last_exception:
100+
raise last_exception
101+
raise RuntimeError("Unexpected retry loop exit")
102+
103+
18104
def load_spec(spec_id: str) -> str:
19105
"""Load specification from specs/ directory"""
20106
spec_path = Path(f"specs/{spec_id}.md")
@@ -165,20 +251,24 @@ def generate_code(
165251
166252
Generate the improved implementation:"""
167253

168-
# Call Claude
169-
response = client.messages.create(
170-
model="claude-sonnet-4-20250514",
171-
max_tokens=4000,
172-
messages=[{"role": "user", "content": prompt}]
254+
# Call Claude with retry logic
255+
response = retry_with_backoff(
256+
lambda: client.messages.create(
257+
model="claude-sonnet-4-20250514",
258+
max_tokens=4000,
259+
messages=[{"role": "user", "content": prompt}]
260+
)
173261
)
174262

175-
code = response.content[0].text
176-
177-
# Extract code if wrapped in markdown
178-
if "```python" in code:
179-
code = code.split("```python")[1].split("```")[0].strip()
180-
elif "```" in code:
181-
code = code.split("```")[1].split("```")[0].strip()
263+
# Extract and validate code
264+
try:
265+
code = extract_and_validate_code(response.content[0].text)
266+
except ValueError as e:
267+
print(f"❌ Code extraction/validation failed: {e}")
268+
if attempt < max_attempts:
269+
print(f"🔄 Retrying... ({attempt + 1}/{max_attempts})")
270+
continue
271+
raise
182272

183273
# Self-review
184274
print(f"🔍 Running self-review...")
@@ -215,10 +305,12 @@ def generate_code(
215305
[specific actionable items]
216306
"""
217307

218-
review_response = client.messages.create(
219-
model="claude-sonnet-4-20250514",
220-
max_tokens=2000,
221-
messages=[{"role": "user", "content": review_prompt}]
308+
review_response = retry_with_backoff(
309+
lambda: client.messages.create(
310+
model="claude-sonnet-4-20250514",
311+
max_tokens=2000,
312+
messages=[{"role": "user", "content": review_prompt}]
313+
)
222314
)
223315

224316
review_feedback = review_response.content[0].text

0 commit comments

Comments
 (0)