anyplot/.github/workflows/daily-regen.yml at main · MarkusNeusinger/anyplot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
name: "Scheduled: Regen oldest specs"
run-name: "Scheduled regen (${{ github.event.inputs.specification_id || github.event.inputs.count || '1' }} / ${{ github.event.inputs.model || 'haiku' }})"

# Picks the N oldest specs (by most-recent implementation `updated` timestamp)
# and re-dispatches `bulk-generate.yml` for each. Default N=1 per cron tick.
#
# Schedule: every 2h, skipping the 20:00–24:00 Berlin (CEST) evening window.
# Berlin CEST run hours: 00, 02, 04, 06, 08, 10, 12, 14, 16, 18 → UTC 22, 00,
# 02, 04, 06, 08, 10, 12, 14, 16. The 20:00 and 22:00 Berlin slots (UTC 18, 20)
# are intentionally skipped so runs never start during the user's evening.
#
# bulk-generate is serialised via its own concurrency group. Default model is
# Haiku, so a single spec completes well within the 2h slot, leaving the user
# window clean. The model can be overridden per manual run.
#
# Triggers:
#   - schedule: 10× daily (UTC, every 2h except 18:00 and 20:00 UTC) → Haiku
#   - workflow_dispatch: manual, with inputs for spec id, model, count, dry-run

on:
  schedule:
    - cron: '0 0,2,4,6,8,10,12,14,16,22 * * *'
  workflow_dispatch:
    inputs:
      specification_id:
        description: "Specific spec id to regen (leave empty to pick the oldest)"
        required: false
        default: ''
      model:
        description: "Claude model to use across all generate / review / repair steps"
        required: false
        type: choice
        default: 'haiku'
        options:
          - haiku
          - sonnet
          - opus
      count:
        description: "How many of the oldest specs to regen (ignored when specification_id is set, default 1)"
        required: false
        default: '1'
      min_age_hours:
        description: "Skip specs regen'd within this many hours (default 20)"
        required: false
        default: '20'
      dry_run:
        description: "Just print picks, do not trigger bulk-generate"
        type: boolean
        default: false

permissions:
  contents: read
  actions: write

concurrency:
  group: daily-regen
  cancel-in-progress: false

jobs:
  pick:
    runs-on: ubuntu-latest
    outputs:
      specs: ${{ steps.pick.outputs.specs }}
      specs_json: ${{ steps.pick.outputs.specs_json }}
      count: ${{ steps.pick.outputs.count }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6

      - name: Set up Python
        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6
        with:
          python-version: '3.13'

      - name: Install PyYAML
        run: pip install pyyaml

      - name: Pick oldest spec(s) (or use override)
        id: pick
        env:
          COUNT: ${{ inputs.count || '1' }}
          MIN_AGE_HOURS: ${{ inputs.min_age_hours || '20' }}
          SPEC_OVERRIDE: ${{ inputs.specification_id }}
        run: |
          python3 <<'PY'
          import json
          import os
          import sys
          from datetime import datetime, timedelta, timezone
          from pathlib import Path

          import yaml


          COUNT = int(os.environ["COUNT"])
          MIN_AGE = timedelta(hours=int(os.environ["MIN_AGE_HOURS"]))
          OVERRIDE = (os.environ.get("SPEC_OVERRIDE") or "").strip()
          NOW = datetime.now(timezone.utc)

          specs_dir = Path("plots")

          # Manual override: caller specified a spec id — validate and use it.
          if OVERRIDE:
              if not (specs_dir / OVERRIDE / "specification.md").is_file():
                  print(f"::error::Spec '{OVERRIDE}' not found at plots/{OVERRIDE}/specification.md")
                  sys.exit(1)
              picks = [OVERRIDE]
              print(f"::notice::Using override spec: {OVERRIDE}")
              github_output = os.environ["GITHUB_OUTPUT"]
              with open(github_output, "a", encoding="utf-8") as f:
                  f.write(f"specs={OVERRIDE}\n")
                  f.write(f"specs_json={json.dumps(picks)}\n")
                  f.write(f"count=1\n")
              sys.exit(0)

          candidates: list[tuple[datetime, str]] = []

          for spec_dir in sorted(specs_dir.iterdir()):
              if not spec_dir.is_dir() or spec_dir.name.startswith("."):
                  continue
              meta_dir = spec_dir / "metadata" / "python"
              if not meta_dir.is_dir():
                  continue

              latest_updated: str | None = None
              for yaml_file in meta_dir.glob("*.yaml"):
                  try:
                      data = yaml.safe_load(yaml_file.read_text(encoding="utf-8")) or {}
                  except Exception:
                      continue
                  updated = data.get("updated") or data.get("created")
                  if not updated:
                      continue
                  s = str(updated)
                  if latest_updated is None or s > latest_updated:
                      latest_updated = s

              if not latest_updated:
                  # No metadata yet → treat as ancient, candidate for regen
                  candidates.append((datetime.min.replace(tzinfo=timezone.utc), spec_dir.name))
                  continue

              try:
                  dt = datetime.fromisoformat(latest_updated.replace("Z", "+00:00"))
                  if dt.tzinfo is None:
                      dt = dt.replace(tzinfo=timezone.utc)
              except Exception:
                  continue

              if NOW - dt < MIN_AGE:
                  continue  # too fresh to re-roll

              candidates.append((dt, spec_dir.name))

          candidates.sort()  # oldest first
          picks = [name for _, name in candidates[:COUNT]]

          print(f"::notice::Eligible specs: {len(candidates)}  picked: {picks}")
          for dt, name in candidates[:COUNT]:
              print(f"  - {name:40s}  latest_updated={dt.isoformat()}")

          github_output = os.environ["GITHUB_OUTPUT"]
          with open(github_output, "a", encoding="utf-8") as f:
              f.write(f"specs={' '.join(picks)}\n")
              f.write(f"specs_json={json.dumps(picks)}\n")
              f.write(f"count={len(picks)}\n")
          PY

  # ============================================================================
  # Pre-flight: per spec, run autonomous spec polish + cross-library similarity
  # audit, then dispatch bulk-generate with the resulting change_requests.
  #
  # Each matrix entry is one spec from the pick job. We do polish + audit +
  # dispatch in the same job so we don't have to aggregate matrix outputs back
  # into a separate dispatch job (which is awkward in GitHub Actions).
  #
  # The two pre-flight LLM steps are HARDCODED to Haiku regardless of
  # `inputs.model` — they're narrow, cheap audits. The user-selected model is
  # passed through to bulk-generate (and from there to impl-generate / review /
  # repair) unchanged.
  # ============================================================================
  preflight-dispatch:
    needs: pick
    if: ${{ needs.pick.outputs.count != '0' }}
    runs-on: ubuntu-latest
    permissions:
      contents: write       # spec polish: branch + commit
      pull-requests: write  # spec polish: open PR + add label
      actions: write        # dispatch bulk-generate
      id-token: write
    strategy:
      matrix:
        spec_id: ${{ fromJson(needs.pick.outputs.specs_json) }}
      fail-fast: false
      max-parallel: 1   # serialize so polish PRs and dispatches don't race
    # Note on dry_run: the JOB always runs when there's a spec to process, so
    # operators can exercise skip-gate + similarity-audit + collect on demand.
    # Side-effect steps (polish, dispatch) are individually gated on
    # `!inputs.dry_run` below.
    steps:
      - name: Checkout repository
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6
        with:
          fetch-depth: 0

      - name: Skip-gate — open PRs touching this spec?
        id: gate
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          SPEC_ID: ${{ matrix.spec_id }}
        run: |
          # If any PR is open that touches plots/{spec}/, skip the polish step
          # to avoid racing against human edits or stacking auto-polish PRs.
          # Similarity audit still runs — it's read-only.
          OPEN=$(gh pr list \
            --repo "${{ github.repository }}" \
            --search "plots/${SPEC_ID}/ in:files is:open" \
            --json number --jq 'length' 2>/dev/null || echo 0)
          if [ "${OPEN:-0}" -gt 0 ]; then
            echo "::notice::Open PR(s) touch plots/${SPEC_ID}/ — skipping spec polish"
            echo "skip_polish=1" >> "$GITHUB_OUTPUT"
          else
            echo "skip_polish=0" >> "$GITHUB_OUTPUT"
          fi

      - name: Spec polish (autonomous, opens PR — no auto-merge)
        if: ${{ steps.gate.outputs.skip_polish == '0' && !inputs.dry_run }}
        # Optional quality pass: a transient action failure here must not
        # block the main regeneration pipeline. Skip cleanly and continue.
        continue-on-error: true
        timeout-minutes: 15
        uses: anthropics/claude-code-action@2cc1ac1331eac7a6a96d716dd204dd2888d0fcd2  # v1
        with:
          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
          claude_args: '--model haiku'
          allowed_bots: '*'
          prompt: |
            Read `prompts/workflow-prompts/spec-polish-claude.md` and follow those instructions.

            Variables for this run:
            - SPEC_ID: ${{ matrix.spec_id }}

      - name: Cross-library similarity audit
        # Read-only audit; if it fails, fall back to empty change_requests
        # rather than aborting the dispatch.
        continue-on-error: true
        timeout-minutes: 15
        uses: anthropics/claude-code-action@2cc1ac1331eac7a6a96d716dd204dd2888d0fcd2  # v1
        with:
          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
          claude_args: '--model haiku'
          allowed_bots: '*'
          prompt: |
            Read `prompts/workflow-prompts/impl-similarity-claude.md` and follow those instructions.

            Variables for this run:
            - SPEC_ID: ${{ matrix.spec_id }}

      - name: Collect change_requests
        id: collect
        run: |
          # Default to empty object if the audit never wrote a file (e.g.
          # fewer than 2 metadata files exist).
          if [ -f /tmp/change-requests.json ]; then
            CR=$(cat /tmp/change-requests.json)
            # Validate it's a JSON object; fall back to empty otherwise.
            if ! echo "$CR" | jq -e 'type == "object"' >/dev/null 2>&1; then
              echo "::warning::/tmp/change-requests.json is not a valid JSON object; using {} (got: ${CR})"
              CR='{}'
            fi
          else
            CR='{}'
          fi
          # Compact + escape newlines so it survives as a single GitHub Actions output line.
          CR_COMPACT=$(echo "$CR" | jq -c '.')
          echo "change_requests=${CR_COMPACT}" >> "$GITHUB_OUTPUT"
          flagged=$(echo "$CR_COMPACT" | jq 'length')
          echo "::notice::change_requests for ${{ matrix.spec_id }}: ${flagged} lib(s) flagged — ${CR_COMPACT}"

      - name: Dispatch bulk-generate with change_requests
        if: ${{ !inputs.dry_run }}
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          SPEC_ID: ${{ matrix.spec_id }}
          MODEL: ${{ inputs.model || 'haiku' }}
          CHANGE_REQUESTS: ${{ steps.collect.outputs.change_requests }}
        run: |
          echo "::notice::Dispatching bulk-generate for ${SPEC_ID} (all 9 libs, model=${MODEL})"
          gh workflow run bulk-generate.yml \
            --repo "${{ github.repository }}" \
            -f specification_id="${SPEC_ID}" \
            -f library=all \
            -f model="${MODEL}" \
            -f change_requests="${CHANGE_REQUESTS}"
          # Small pause so GitHub's webhook processing has a moment before
          # the next matrix entry's dispatch (matrix is serialized via
          # max-parallel: 1, so this is between specs).
          sleep 5