diff --git a/.github/workflows/impl-generate.yml b/.github/workflows/impl-generate.yml index 7660797923..14d0b2aab9 100644 --- a/.github/workflows/impl-generate.yml +++ b/.github/workflows/impl-generate.yml @@ -196,6 +196,71 @@ jobs: - name: Run Claude Code to generate implementation id: claude + continue-on-error: true + timeout-minutes: 60 + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + claude_args: "--model opus" + prompt: | + ## Task: Generate ${{ steps.inputs.outputs.library }} Implementation + + You are generating the **${{ steps.inputs.outputs.library }}** implementation for **${{ steps.inputs.outputs.specification_id }}**. + + ### Step 1: Read required files + 1. `prompts/plot-generator.md` - Base generation rules + 2. `prompts/default-style-guide.md` - Visual style requirements + 3. `prompts/quality-criteria.md` - Quality requirements + 4. `prompts/library/${{ steps.inputs.outputs.library }}.md` - Library-specific rules + 5. `plots/${{ steps.inputs.outputs.specification_id }}/specification.md` - The specification + + ### Step 2: Generate implementation + Create: `plots/${{ steps.inputs.outputs.specification_id }}/implementations/${{ steps.inputs.outputs.library }}.py` + + The script MUST: + - Save as `plot.png` in the current directory + - For interactive libraries (plotly, bokeh, altair, highcharts, pygal, letsplot): also save `plot.html` + + ### Step 3: Test and fix (up to 3 attempts) + Run the implementation: + ```bash + source .venv/bin/activate + cd plots/${{ steps.inputs.outputs.specification_id }}/implementations + MPLBACKEND=Agg python ${{ steps.inputs.outputs.library }}.py + ``` + + If it fails, fix and try again (max 3 attempts). + + ### Step 4: Visual self-check + Look at the generated `plot.png`: + - Does it match the specification? + - Are axes labeled correctly? + - Is the visualization clear? + + ### Step 5: Format the code + ```bash + source .venv/bin/activate + ruff format plots/${{ steps.inputs.outputs.specification_id }}/implementations/${{ steps.inputs.outputs.library }}.py + ruff check --fix plots/${{ steps.inputs.outputs.specification_id }}/implementations/${{ steps.inputs.outputs.library }}.py + ``` + + ### Step 6: Commit + ```bash + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add plots/${{ steps.inputs.outputs.specification_id }}/implementations/${{ steps.inputs.outputs.library }}.py + git commit -m "feat(${{ steps.inputs.outputs.library }}): implement ${{ steps.inputs.outputs.specification_id }}" + git push -u origin implementation/${{ steps.inputs.outputs.specification_id }}/${{ steps.inputs.outputs.library }} + ``` + + ### Report result + Print exactly one line: + - `GENERATION_SUCCESS` - if everything worked + - `GENERATION_FAILED: ` - if it failed + + - name: Retry Claude (on failure) + if: steps.claude.outcome == 'failure' + id: claude_retry timeout-minutes: 60 uses: anthropics/claude-code-action@v1 with: diff --git a/.github/workflows/impl-merge.yml b/.github/workflows/impl-merge.yml index 0b88c40cc2..bc507011a8 100644 --- a/.github/workflows/impl-merge.yml +++ b/.github/workflows/impl-merge.yml @@ -232,6 +232,54 @@ jobs: gh issue comment "$ISSUE" --body "$BODY" + - name: Close issue if all libraries done + if: steps.check.outputs.should_run == 'true' && steps.issue.outputs.number != '' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + ISSUE: ${{ steps.issue.outputs.number }} + SPEC_ID: ${{ steps.extract.outputs.specification_id }} + run: | + # All 9 supported libraries + LIBRARIES="matplotlib seaborn plotly bokeh altair plotnine pygal highcharts letsplot" + + # Get current labels on the issue + LABELS=$(gh issue view "$ISSUE" --json labels -q '.labels[].name' 2>/dev/null || echo "") + + # Count done implementations + DONE_COUNT=0 + for lib in $LIBRARIES; do + if echo "$LABELS" | grep -q "^impl:${lib}:done$"; then + DONE_COUNT=$((DONE_COUNT + 1)) + fi + done + + echo "::notice::Libraries done: $DONE_COUNT/9" + + # Close issue if all 9 libraries are done + if [ "$DONE_COUNT" -eq 9 ]; then + gh issue comment "$ISSUE" --body "## :tada: All Implementations Complete! + + All 9 library implementations for \`${SPEC_ID}\` have been successfully merged. + + | Library | Status | + |---------|--------| + | matplotlib | :white_check_mark: | + | seaborn | :white_check_mark: | + | plotly | :white_check_mark: | + | bokeh | :white_check_mark: | + | altair | :white_check_mark: | + | plotnine | :white_check_mark: | + | pygal | :white_check_mark: | + | highcharts | :white_check_mark: | + | letsplot | :white_check_mark: | + + --- + :robot: *[impl-merge](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})*" + + gh issue close "$ISSUE" + echo "::notice::Closed issue #$ISSUE - all implementations complete" + fi + - name: Trigger database sync if: steps.check.outputs.should_run == 'true' env: diff --git a/.github/workflows/impl-repair.yml b/.github/workflows/impl-repair.yml index 87da57e386..b563397f22 100644 --- a/.github/workflows/impl-repair.yml +++ b/.github/workflows/impl-repair.yml @@ -110,6 +110,68 @@ jobs: - name: Run Claude Code to repair implementation id: claude + continue-on-error: true + timeout-minutes: 45 + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + claude_args: "--model opus" + prompt: | + ## Task: Repair ${{ inputs.library }} Implementation for ${{ inputs.specification_id }} + + This is **repair attempt ${{ inputs.attempt }}/3**. The previous implementation was rejected. + + ### Step 1: Read the AI review feedback + Read `/tmp/ai_feedback.md` to understand what needs to be fixed. + + ### Step 2: Read reference files + 1. `prompts/library/${{ inputs.library }}.md` - Library-specific rules + 2. `plots/${{ inputs.specification_id }}/specification.md` - The specification + 3. `prompts/quality-criteria.md` - Quality requirements + + ### Step 3: Read current implementation + `plots/${{ inputs.specification_id }}/implementations/${{ inputs.library }}.py` + + ### Step 4: Fix the issues + Based on the AI feedback, fix: + - Visual quality issues + - Code quality issues + - Spec compliance issues + + ### Step 5: Test the fix + ```bash + source .venv/bin/activate + cd plots/${{ inputs.specification_id }}/implementations + MPLBACKEND=Agg python ${{ inputs.library }}.py + ``` + + ### Step 6: Visual self-check + View `plot.png` and verify fixes are correct. + + ### Step 7: Format the code + ```bash + source .venv/bin/activate + ruff format plots/${{ inputs.specification_id }}/implementations/${{ inputs.library }}.py + ruff check --fix plots/${{ inputs.specification_id }}/implementations/${{ inputs.library }}.py + ``` + + ### Step 8: Commit and push + ```bash + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add plots/${{ inputs.specification_id }}/implementations/${{ inputs.library }}.py + git commit -m "fix(${{ inputs.library }}): address review feedback for ${{ inputs.specification_id }} + + Attempt ${{ inputs.attempt }}/3 - fixes based on AI review" + git push origin ${{ env.branch }} + ``` + + ### Report result + Print: `REPAIR_SUCCESS` or `REPAIR_FAILED: ` + + - name: Retry Claude (on failure) + if: steps.claude.outcome == 'failure' + id: claude_retry timeout-minutes: 45 uses: anthropics/claude-code-action@v1 with: diff --git a/.github/workflows/spec-create.yml b/.github/workflows/spec-create.yml index 68d794fdc8..45eeff025e 100644 --- a/.github/workflows/spec-create.yml +++ b/.github/workflows/spec-create.yml @@ -67,6 +67,102 @@ jobs: - name: Process with Claude if: steps.check.outputs.should_run == 'true' id: process + continue-on-error: true + timeout-minutes: 30 + uses: anthropics/claude-code-action@v1 + with: + claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + claude_args: "--model opus" + prompt: | + ## Task: Create New Specification + + You are creating a new plot specification. + + ### Issue Details + - **Title:** ${{ github.event.issue.title }} + - **Number:** #${{ github.event.issue.number }} + - **Author:** ${{ github.event.issue.user.login }} + - **Body:** + ``` + ${{ github.event.issue.body }} + ``` + + --- + + ## Instructions + + 1. **Read the rules:** `prompts/spec-id-generator.md` + + 2. **Check for duplicates:** + - List all existing specs: `ls plots/` + - Read existing specification files if titles seem similar + - If duplicate found: Post comment explaining which spec matches, then STOP + + 3. **Generate specification-id:** + - Format: `{type}-{variant}` or `{type}-{variant}-{modifier}` + - Examples: `scatter-basic`, `bar-grouped-horizontal`, `heatmap-correlation` + - All lowercase, hyphens only + + 4. **Create specification branch:** + ```bash + git checkout -b "specification/{specification-id}" + ``` + + 5. **Post analysis comment:** + Post a SHORT comment (max 3-4 sentences) to the issue using `gh issue comment`: + - Is this a valid/useful plot type? + - Does it already exist? (check `ls plots/`) + - Any concerns? + + 6. **Create specification files:** + - Read template: `prompts/templates/specification.md` + - Read metadata template: `prompts/templates/specification.yaml` + - Create directory: `plots/{specification-id}/` + - Create: `plots/{specification-id}/specification.md` (follow template structure) + - Create: `plots/{specification-id}/specification.yaml` with: + - `specification_id`: the generated id + - `title`: a proper title + - `created`: Use `$(date -u +"%Y-%m-%dT%H:%M:%SZ")` for current timestamp + - `issue`: ${{ github.event.issue.number }} + - `suggested`: ${{ github.event.issue.user.login }} + - `tags`: appropriate tags for this plot type + - Create empty folder: `plots/{specification-id}/implementations/` + - Create empty folder: `plots/{specification-id}/metadata/` + + 7. **Commit and push:** + ```bash + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add plots/{specification-id}/ + git commit -m "spec: add {specification-id} specification + + Created from issue #${{ github.event.issue.number }}" + git push -u origin "specification/{specification-id}" + ``` + + 8. **Update issue title:** + ```bash + gh issue edit ${{ github.event.issue.number }} --title "[{specification-id}] {original title}" + ``` + + 9. **Output for workflow:** + After completing, print these lines exactly: + ``` + SPECIFICATION_ID={specification-id} + BRANCH=specification/{specification-id} + ``` + + --- + + ## Important Rules + - Do NOT create a PR (the workflow does that) + - Do NOT add labels + - Do NOT close the issue + - STOP after pushing the branch + + - name: Retry Claude (on failure) + if: steps.check.outputs.should_run == 'true' && steps.process.outcome == 'failure' + id: process_retry timeout-minutes: 30 uses: anthropics/claude-code-action@v1 with: @@ -194,7 +290,7 @@ jobs: --body "$(cat <= 85) | Workflow automatically | +| `ai-approved` | AI quality check passed (score >= 90) | Workflow automatically | | `rejected` | Human rejected | Maintainer manually | -| `ai-rejected` | AI quality check failed (score < 85) | Workflow automatically | +| `ai-rejected` | AI quality check failed (score < 90) | Workflow automatically | ### Quality Score Labels @@ -760,5 +760,5 @@ pytest --pdb # Debug on failure - **Spec improvements over code fixes**: If a plot has issues, improve the spec, not the code - **Your data first**: Examples work with real user data, not fake data - **Community-driven**: Anyone can propose plots via GitHub Issues -- **Multi-LLM quality**: Claude + Gemini + GPT ensure quality (score ≥85 required) +- **Multi-LLM quality**: Claude + Gemini + GPT ensure quality (score ≥90 required) - **Full transparency**: All feedback documented in GitHub Issues, not hidden in repo files diff --git a/README.md b/README.md index ffc69fd383..e1a113b4e3 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ plots/scatter-basic/ **Issue-based workflow**: GitHub Issues as state machine for plot lifecycle. Status tracked via live-updating table (no sub-issues). Each library generates in parallel, creating PRs to a feature branch. -**AI quality review**: Claude evaluates generated plots (score ≥ 85 required). Automatic feedback loops (max 3 attempts per library). Quality scores flow via PR labels → per-library metadata files. +**AI quality review**: Claude evaluates generated plots (score ≥ 90 required). Automatic feedback loops (max 3 attempts per library). Quality scores flow via PR labels → per-library metadata files. See [docs/architecture/](docs/architecture/) for details. @@ -158,7 +158,7 @@ We welcome contributions! **All code is AI-generated** - you propose ideas, AI i 2. AI generates spec, creates feature branch 3. Maintainer reviews and adds `approved` label 4. 9 library implementations generate in parallel (tracked via live status table) -5. AI quality review per library (score ≥ 85 required) +5. AI quality review per library (score ≥ 90 required) 6. Auto-merge to feature branch, then to main **Important**: Don't submit code directly! If a plot has quality issues, it means the spec needs improvement, not the diff --git a/docs/concepts/claude-skill-plot-generation.md b/docs/concepts/claude-skill-plot-generation.md index 3e0a9ddd66..55326bcad3 100644 --- a/docs/concepts/claude-skill-plot-generation.md +++ b/docs/concepts/claude-skill-plot-generation.md @@ -354,7 +354,7 @@ def generate_with_feedback_loop( library: str, rules: Rules, max_attempts: int = 3, - pass_threshold: int = 85 + pass_threshold: int = 90 ) -> GenerationResult: """ Main generation loop with self-correction @@ -471,7 +471,7 @@ workflow: action: Self-evaluate code against quality criteria loop: max_iterations: ${max_attempts} - continue_if: quality_score < 85 + continue_if: quality_score < 90 next_step: optimize - step: optimize diff --git a/docs/workflow.md b/docs/workflow.md index e0f147d6fb..7c5852466c 100644 --- a/docs/workflow.md +++ b/docs/workflow.md @@ -155,8 +155,8 @@ PR created → **`impl-review.yml`** runs: ``` impl-review.yml - ├─ Score ≥85 → [ai-approved] → triggers impl-merge.yml - └─ Score <85 → [ai-rejected] → triggers impl-repair.yml + ├─ Score ≥90 → [ai-approved] → triggers impl-merge.yml + └─ Score <90 → [ai-rejected] → triggers impl-repair.yml ``` ### Flow 6: Repair Loop (max 3 attempts) @@ -215,8 +215,8 @@ graph LR A[Issue + generate:matplotlib] --> B[impl-generate.yml] B --> C[PR created] C --> D[impl-review.yml] - D -->|Score ≥85| E[ai-approved] - D -->|Score <85| F[ai-rejected] + D -->|Score ≥90| E[ai-approved] + D -->|Score <90| F[ai-rejected] F -->|Attempt <3| G[impl-repair.yml] G --> D F -->|Attempt =3| H[not-feasible] @@ -245,7 +245,7 @@ graph LR **PR Labels:** | Label | Meaning | |-------|---------| -| `ai-approved` | Passed review (score ≥85) | +| `ai-approved` | Passed review (score ≥90) | | `ai-rejected` | Failed review, will retry | | `ai-attempt-1/2/3` | Retry counter | | `not-feasible` | 3x failed, library cannot implement | @@ -283,8 +283,8 @@ graph TD E -->|Creates PR| F[Implementation PR] F --> G{Flow 5: impl-review.yml} - G -->|Score ≥85| H[ai-approved] - G -->|Score <85| I[ai-rejected] + G -->|Score ≥90| H[ai-approved] + G -->|Score <90| I[ai-rejected] I --> J{Attempts < 3?} J -->|Yes| K[Flow 6: impl-repair.yml] diff --git a/prompts/quality-evaluator.md b/prompts/quality-evaluator.md index b39b518367..7b977db5b4 100644 --- a/prompts/quality-evaluator.md +++ b/prompts/quality-evaluator.md @@ -139,9 +139,9 @@ Simply add points for each passed criterion. | Score | Recommendation | |-------|----------------| -| >= 85 | `approve` | -| 75-84 | `request_changes` | -| < 75 | `reject` | +| >= 90 | `approve` | +| 80-89 | `request_changes` | +| < 80 | `reject` | ## Rules