fix: improve robustness and error handling in automation workflows

claude · claude · commit cf1fb42f5236 · 2025-11-24T12:39:00.000Z
Addresses critical review feedback with 5 key improvements:

1. GCS Optional: Make GCS upload optional with proper error handling
   - Add continue-on-error and conditional checks
   - Workflows work without GCS credentials

2. Spec ID Extraction: Case-insensitive, flexible digit count
   - Support 3-4 digits (001-9999)
   - Convert to lowercase automatically
   - Validate spec file existence

3. Code Syntax Validation: AST-based validation before saving
   - New extract_and_validate_code() function
   - Catches syntax errors early with retry
   - Clear error messages

4. Path Parsing: Regex-based validation with error handling
   - Replace fragile cut-based parsing
   - Validate format: plots/{lib}/{type}/{spec}/{variant}.py
   - Skip invalid files with warning

5. API Error Handling: Retry with exponential backoff
   - Retry on RateLimitError and APIConnectionError
   - 3 retries with 2s, 4s, 8s delays
   - Don't retry on other API errors

Ready for initial testing without GCS setup.
diff --git a/.github/workflows/spec-to-code.yml b/.github/workflows/spec-to-code.yml
@@ -20,18 +20,26 @@ jobs:
           ISSUE_TITLE: ${{ github.event.issue.title }}
         run: |
           # Try to extract spec ID from title (format: "scatter-basic-001: Title" or just "scatter-basic-001")
-          SPEC_ID=$(echo "$ISSUE_TITLE" | grep -oP '^[a-z]+-[a-z]+-\d{3}' || echo "")
+          # Case-insensitive, allows 3-4 digits, converts to lowercase
+          SPEC_ID=$(echo "$ISSUE_TITLE" | grep -oiP '^[a-z]+-[a-z]+-\d{3,4}' | tr '[:upper:]' '[:lower:]' || echo "")
 
           if [ -z "$SPEC_ID" ]; then
             # Try to find spec ID in body (look for markdown heading like "# scatter-basic-001:")
-            SPEC_ID=$(echo "$ISSUE_BODY" | grep -oP '^#\s*\K[a-z]+-[a-z]+-\d{3}' || echo "")
+            SPEC_ID=$(echo "$ISSUE_BODY" | grep -oiP '^#\s*\K[a-z]+-[a-z]+-\d{3,4}' | tr '[:upper:]' '[:lower:]' || echo "")
           fi
 
           if [ -z "$SPEC_ID" ]; then
             echo "❌ Could not extract spec ID from issue"
+            echo "Expected format: {type}-{variant}-{001-9999}"
             exit 1
           fi
 
+          # Validate spec file exists
+          if [ ! -f "specs/${SPEC_ID}.md" ]; then
+            echo "⚠️  Warning: Spec file specs/${SPEC_ID}.md does not exist"
+            echo "Please ensure the spec file is created before code generation"
+          fi
+
           echo "spec_id=$SPEC_ID" >> $GITHUB_OUTPUT
           echo "✅ Extracted spec ID: $SPEC_ID"
 
diff --git a/.github/workflows/test-and-preview.yml b/.github/workflows/test-and-preview.yml
@@ -100,9 +100,16 @@ jobs:
 
             # Extract spec_id, library, variant from path
             # Format: plots/{library}/{plot_type}/{spec_id}/{variant}.py
-            LIBRARY=$(echo "$file" | cut -d'/' -f2)
-            SPEC_ID=$(echo "$file" | cut -d'/' -f4)
-            VARIANT=$(basename "$file" .py)
+            if [[ $file =~ ^plots/([^/]+)/([^/]+)/([^/]+)/([^/]+)\.py$ ]]; then
+              LIBRARY="${BASH_REMATCH[1]}"
+              PLOT_TYPE="${BASH_REMATCH[2]}"
+              SPEC_ID="${BASH_REMATCH[3]}"
+              VARIANT="${BASH_REMATCH[4]}"
+            else
+              echo "⚠️  Invalid file path format: $file"
+              echo "Expected: plots/{library}/{plot_type}/{spec_id}/{variant}.py"
+              continue
+            fi
 
             # Run the plot script to generate image
             OUTPUT_FILE="preview_outputs/${SPEC_ID}_${LIBRARY}_${VARIANT}.png"
@@ -127,20 +134,23 @@ jobs:
 
       - name: Setup Google Cloud authentication
         if: steps.changed_plots.outputs.changed_files != ''
+        id: gcs_auth
+        continue-on-error: true
         uses: google-github-actions/auth@v2
         with:
           credentials_json: ${{ secrets.GCS_CREDENTIALS }}
 
       - name: Upload previews to GCS
-        if: steps.changed_plots.outputs.changed_files != ''
+        if: steps.changed_plots.outputs.changed_files != '' && steps.gcs_auth.outcome == 'success'
+        continue-on-error: true
         uses: google-github-actions/upload-cloud-storage@v2
         with:
           path: preview_outputs
           destination: ${{ secrets.GCS_BUCKET }}/previews/pr-${{ github.event.pull_request.number }}
           process_gcloudignore: false
 
       - name: Generate preview URLs
-        if: steps.changed_plots.outputs.changed_files != ''
+        if: steps.changed_plots.outputs.changed_files != '' && steps.gcs_auth.outcome == 'success'
         id: preview_urls
         run: |
           BUCKET="${{ secrets.GCS_BUCKET }}"
@@ -165,7 +175,7 @@ jobs:
           echo -e "$PREVIEW_TABLE" > preview_table.md
 
       - name: Comment on PR with previews
-        if: steps.changed_plots.outputs.changed_files != ''
+        if: steps.changed_plots.outputs.changed_files != '' && steps.gcs_auth.outcome == 'success'
         uses: actions/github-script@v7
         with:
           script: |
diff --git a/automation/generators/plot_generator.py b/automation/generators/plot_generator.py
@@ -5,16 +5,102 @@
 Generates plot implementations from specifications using Claude with versioned rules.
 """
 
+import ast
 import os
 import sys
+import time
 from pathlib import Path
-from typing import Literal
+from typing import Literal, Callable, TypeVar
 import anthropic
+from anthropic import APIError, RateLimitError, APIConnectionError
 
 
 LibraryType = Literal["matplotlib", "seaborn", "plotly", "bokeh", "altair"]
 
 
+def extract_and_validate_code(response_text: str) -> str:
+    """
+    Extract Python code from Claude response and validate syntax.
+
+    Args:
+        response_text: Raw response from Claude API
+
+    Returns:
+        Validated Python code
+
+    Raises:
+        ValueError: If code cannot be extracted or has syntax errors
+    """
+    code = response_text.strip()
+
+    # Extract code if wrapped in markdown
+    if "```python" in code:
+        code = code.split("```python")[1].split("```")[0].strip()
+    elif "```" in code:
+        code = code.split("```")[1].split("```")[0].strip()
+
+    if not code:
+        raise ValueError("No code could be extracted from response")
+
+    # Validate Python syntax
+    try:
+        ast.parse(code)
+    except SyntaxError as e:
+        raise ValueError(f"Generated code has syntax errors: {e}")
+
+    return code
+
+
+T = TypeVar('T')
+
+
+def retry_with_backoff(
+    func: Callable[[], T],
+    max_retries: int = 3,
+    initial_delay: float = 2.0,
+    backoff_factor: float = 2.0
+) -> T:
+    """
+    Retry a function with exponential backoff.
+
+    Args:
+        func: Function to retry
+        max_retries: Maximum number of retry attempts
+        initial_delay: Initial delay in seconds
+        backoff_factor: Multiplier for delay after each retry
+
+    Returns:
+        Result from successful function call
+
+    Raises:
+        Last exception if all retries fail
+    """
+    delay = initial_delay
+    last_exception = None
+
+    for attempt in range(max_retries + 1):
+        try:
+            return func()
+        except (RateLimitError, APIConnectionError) as e:
+            last_exception = e
+            if attempt < max_retries:
+                print(f"⚠️  API error: {type(e).__name__}. Retrying in {delay}s... (attempt {attempt + 1}/{max_retries})")
+                time.sleep(delay)
+                delay *= backoff_factor
+            else:
+                print(f"❌ Max retries ({max_retries}) exceeded")
+                raise
+        except APIError as e:
+            # For other API errors, don't retry
+            print(f"❌ API error: {e}")
+            raise
+
+    # Should never reach here, but for type checker
+    if last_exception:
+        raise last_exception
+    raise RuntimeError("Unexpected retry loop exit")
+
+
 def load_spec(spec_id: str) -> str:
     """Load specification from specs/ directory"""
     spec_path = Path(f"specs/{spec_id}.md")
@@ -165,20 +251,24 @@ def generate_code(
 
 Generate the improved implementation:"""
 
-        # Call Claude
-        response = client.messages.create(
-            model="claude-sonnet-4-20250514",
-            max_tokens=4000,
-            messages=[{"role": "user", "content": prompt}]
+        # Call Claude with retry logic
+        response = retry_with_backoff(
+            lambda: client.messages.create(
+                model="claude-sonnet-4-20250514",
+                max_tokens=4000,
+                messages=[{"role": "user", "content": prompt}]
+            )
         )
 
-        code = response.content[0].text
-
-        # Extract code if wrapped in markdown
-        if "```python" in code:
-            code = code.split("```python")[1].split("```")[0].strip()
-        elif "```" in code:
-            code = code.split("```")[1].split("```")[0].strip()
+        # Extract and validate code
+        try:
+            code = extract_and_validate_code(response.content[0].text)
+        except ValueError as e:
+            print(f"❌ Code extraction/validation failed: {e}")
+            if attempt < max_attempts:
+                print(f"🔄 Retrying... ({attempt + 1}/{max_attempts})")
+                continue
+            raise
 
         # Self-review
         print(f"🔍 Running self-review...")
@@ -215,10 +305,12 @@ def generate_code(
 [specific actionable items]
 """
 
-        review_response = client.messages.create(
-            model="claude-sonnet-4-20250514",
-            max_tokens=2000,
-            messages=[{"role": "user", "content": review_prompt}]
+        review_response = retry_with_backoff(
+            lambda: client.messages.create(
+                model="claude-sonnet-4-20250514",
+                max_tokens=2000,
+                messages=[{"role": "user", "content": review_prompt}]
+            )
         )
 
         review_feedback = review_response.content[0].text