-
Notifications
You must be signed in to change notification settings - Fork 170
743 lines (670 loc) · 31.9 KB
/
agentic-ci-daily.yml
File metadata and controls
743 lines (670 loc) · 31.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
name: "Agentic CI: Daily Audit"
on:
schedule:
- cron: "0 8 * * 1-5" # weekdays at 08:00 UTC
workflow_dispatch:
inputs:
suite:
description: "Override which suite to run (docs-and-references, dependencies, structure, code-quality, test-health, all)"
required: false
default: ""
permissions:
contents: write
pull-requests: write
concurrency:
group: agentic-ci-daily
cancel-in-progress: false
jobs:
determine-suite:
runs-on: ubuntu-latest
outputs:
suites: ${{ steps.pick.outputs.suites }}
steps:
- name: Pick suite(s) for today
id: pick
run: |
OVERRIDE="${{ github.event.inputs.suite }}"
if [ -n "$OVERRIDE" ] && [ "$OVERRIDE" != "all" ]; then
echo "suites=[\"${OVERRIDE}\"]" >> "$GITHUB_OUTPUT"
echo "Running override suite: ${OVERRIDE}"
exit 0
fi
if [ "$OVERRIDE" = "all" ]; then
echo 'suites=["docs-and-references","dependencies","structure","code-quality","test-health"]' >> "$GITHUB_OUTPUT"
echo "Running all suites"
exit 0
fi
# Day-of-week rotation: 1=Mon .. 5=Fri
DOW=$(date -u +%u)
case "$DOW" in
1) SUITE="docs-and-references" ;;
2) SUITE="dependencies" ;;
3) SUITE="structure" ;;
4) SUITE="code-quality" ;;
5) SUITE="test-health" ;;
*) echo "suites=[]" >> "$GITHUB_OUTPUT"; echo "Weekend - no suite"; exit 0 ;;
esac
echo "suites=[\"${SUITE}\"]" >> "$GITHUB_OUTPUT"
echo "Running ${DOW}/weekday suite: ${SUITE}"
audit:
needs: determine-suite
if: needs.determine-suite.outputs.suites != '[]'
runs-on: [self-hosted, agentic-ci]
timeout-minutes: 40
strategy:
fail-fast: false
matrix:
suite: ${{ fromJSON(needs.determine-suite.outputs.suites) }}
concurrency:
# cancel-in-progress is intentionally false: a cancellation between
# the agent's git push and gh pr create would leave an orphaned
# branch with no attempted_fixes record. Queueing a duplicate run is
# the lesser evil. See _fix-policy.md "Atomicity".
group: agentic-ci-daily-${{ matrix.suite }}
cancel-in-progress: false
steps:
- name: Check required config
env:
AGENTIC_CI_MODEL: ${{ vars.AGENTIC_CI_MODEL }}
run: |
if [ -z "$AGENTIC_CI_MODEL" ]; then
echo "::error::AGENTIC_CI_MODEL variable is not set. Configure it in repo settings."
exit 1
fi
- name: Checkout main
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
ref: main
fetch-depth: 0
- name: Restore runner memory
id: cache
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: |
.agentic-ci-state
graphify-out
key: agentic-ci-state-${{ matrix.suite }}-${{ github.run_id }}
restore-keys: |
agentic-ci-state-${{ matrix.suite }}-
- name: Initialize runner memory
env:
SUITE: ${{ matrix.suite }}
run: |
mkdir -p .agentic-ci-state
if [ ! -f .agentic-ci-state/runner-state.json ]; then
printf '{"suite":"%s","last_run":null,"known_issues":[],"baselines":{}}\n' \
"${SUITE}" > .agentic-ci-state/runner-state.json
fi
echo "Runner memory state:"
cat .agentic-ci-state/runner-state.json
- name: Install dev environment
run: |
make install-dev
echo "${{ github.workspace }}/.venv/bin" >> "$GITHUB_PATH"
.venv/bin/python - <<'PY' 2>/dev/null || echo " (version check skipped)"
from data_designer.config._version import __version__ as cv
from data_designer.engine._version import __version__ as ev
print(f' config: {cv} engine: {ev}')
PY
- name: Install graphify
if: matrix.suite == 'structure'
run: |
python -m venv /tmp/graphify-venv
/tmp/graphify-venv/bin/python -m pip install graphifyy==0.4.23 --quiet 2>&1 | tail -3
- name: Configure git identity
run: |
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config user.name "github-actions[bot]"
- name: Pre-flight checks
env:
ANTHROPIC_BASE_URL: ${{ secrets.AGENTIC_CI_API_BASE_URL }}
ANTHROPIC_API_KEY: ${{ secrets.AGENTIC_CI_API_KEY }}
AGENTIC_CI_MODEL: ${{ vars.AGENTIC_CI_MODEL }}
run: |
if ! command -v claude &> /dev/null; then
echo "::error::claude CLI not found in PATH"
exit 1
fi
echo "Claude CLI version: $(claude --version 2>&1 || true)"
if [ -n "$ANTHROPIC_BASE_URL" ] && [ -n "$ANTHROPIC_API_KEY" ]; then
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
--max-time 10 \
-X POST "${ANTHROPIC_BASE_URL}/v1/messages" \
-H "Content-Type: application/json" \
-H "x-api-key: ${ANTHROPIC_API_KEY}" \
-H "anthropic-version: 2023-06-01" \
-d "{\"model\":\"${AGENTIC_CI_MODEL}\",\"max_tokens\":5,\"messages\":[{\"role\":\"user\",\"content\":\"hi\"}]}")
if [ "$HTTP_CODE" -lt 200 ] || [ "$HTTP_CODE" -ge 300 ]; then
echo "::error::API pre-flight failed with HTTP ${HTTP_CODE}"
exit 1
fi
echo "API pre-flight passed (HTTP ${HTTP_CODE})"
fi
- name: Run audit recipe
id: audit
env:
ANTHROPIC_BASE_URL: ${{ secrets.AGENTIC_CI_API_BASE_URL }}
ANTHROPIC_API_KEY: ${{ secrets.AGENTIC_CI_API_KEY }}
AGENTIC_CI_MODEL: ${{ vars.AGENTIC_CI_MODEL }}
DISABLE_PROMPT_CACHING: "1"
GH_TOKEN: ${{ github.token }}
GITHUB_REPOSITORY: ${{ github.repository }}
SUITE: ${{ matrix.suite }}
run: |
set -o pipefail
RECIPE_DIR=".agents/recipes/${SUITE}"
if [ ! -f "${RECIPE_DIR}/recipe.md" ]; then
echo "::error::Recipe not found: ${RECIPE_DIR}/recipe.md"
exit 1
fi
# Build prompt: phase directive + _runner.md + _fix-policy.md + recipe body (strip YAML frontmatter)
PHASE_DIRECTIVE=$(cat .agents/recipes/_phase-audit.md)
RUNNER_CTX=$(cat .agents/recipes/_runner.md)
FIX_POLICY=$(cat .agents/recipes/_fix-policy.md)
RECIPE_BODY=$(sed '1,/^---$/{ /^---$/,/^---$/d }' "${RECIPE_DIR}/recipe.md")
PROMPT=$(printf '%s\n\n%s\n\n%s\n\n%s\n' "${PHASE_DIRECTIVE}" "${RUNNER_CTX}" "${FIX_POLICY}" "${RECIPE_BODY}" \
| sed "s|{{suite}}|${SUITE}|g" \
| sed "s|{{date}}|$(date -u +%Y-%m-%d)|g" \
| sed "s|{{memory_path}}|.agentic-ci-state|g")
stdbuf -oL -eL claude \
--model "$AGENTIC_CI_MODEL" \
-p "$PROMPT" \
--max-turns 50 \
--output-format stream-json \
--verbose \
2>&1 | tee /tmp/claude-audit-log.txt
- name: Check fix backlog
id: backlog
if: steps.audit.outcome == 'success' && matrix.suite != 'test-health'
run: |
BACKLOG_SIZE=$(jq '.fix_backlog // [] | length' .agentic-ci-state/runner-state.json 2>/dev/null || echo 0)
echo "size=${BACKLOG_SIZE}" >> "$GITHUB_OUTPUT"
echo "fix_backlog has ${BACKLOG_SIZE} entries"
- name: Snapshot pre-fix attempted_fixes
# Captures (id, attempts-length) pairs before the fix step runs so
# the post-fix gates can identify which entry grew during *this*
# run, instead of grabbing the last globally-open entry (which
# might be a stale orphan from a prior crashed run).
id: snapshot
if: steps.audit.outcome == 'success' && steps.backlog.outcome == 'success' && matrix.suite != 'test-health' && fromJSON(steps.backlog.outputs.size || '0') > 0
run: |
jq -c '.attempted_fixes // [] | map({id, n: (.attempts | length)})' \
.agentic-ci-state/runner-state.json > /tmp/prior-attempted-fixes.json
echo "Snapshot: $(cat /tmp/prior-attempted-fixes.json)"
- name: Run fix recipe
id: fix
# Custom if: bypasses implicit success(), so snapshot.outcome must
# be checked explicitly. Without it, a snapshot failure (corrupt
# runner-state, disk error) would leave /tmp/prior-attempted-fixes.json
# missing, the scope gate's jq --slurpfile would short-circuit, and
# the gate would exit 0 — silently approving the agent's PR.
if: steps.audit.outcome == 'success' && steps.backlog.outcome == 'success' && steps.snapshot.outcome == 'success' && matrix.suite != 'test-health' && fromJSON(steps.backlog.outputs.size || '0') > 0
env:
ANTHROPIC_BASE_URL: ${{ secrets.AGENTIC_CI_API_BASE_URL }}
ANTHROPIC_API_KEY: ${{ secrets.AGENTIC_CI_API_KEY }}
AGENTIC_CI_MODEL: ${{ vars.AGENTIC_CI_MODEL }}
DISABLE_PROMPT_CACHING: "1"
GH_TOKEN: ${{ github.token }}
GITHUB_REPOSITORY: ${{ github.repository }}
SUITE: ${{ matrix.suite }}
run: |
set -o pipefail
RECIPE_DIR=".agents/recipes/${SUITE}"
# Build prompt: phase directive + _runner.md + _fix-policy.md + recipe body (strip YAML frontmatter)
PHASE_DIRECTIVE=$(cat .agents/recipes/_phase-fix.md)
RUNNER_CTX=$(cat .agents/recipes/_runner.md)
FIX_POLICY=$(cat .agents/recipes/_fix-policy.md)
RECIPE_BODY=$(sed '1,/^---$/{ /^---$/,/^---$/d }' "${RECIPE_DIR}/recipe.md")
PROMPT=$(printf '%s\n\n%s\n\n%s\n\n%s\n' "${PHASE_DIRECTIVE}" "${RUNNER_CTX}" "${FIX_POLICY}" "${RECIPE_BODY}" \
| sed "s|{{suite}}|${SUITE}|g" \
| sed "s|{{date}}|$(date -u +%Y-%m-%d)|g" \
| sed "s|{{memory_path}}|.agentic-ci-state|g")
stdbuf -oL -eL claude \
--model "$AGENTIC_CI_MODEL" \
-p "$PROMPT" \
--max-turns 50 \
--output-format stream-json \
--verbose \
2>&1 | tee /tmp/claude-fix-log.txt
- name: Validate fix scope (allowlist + LOC + file cap)
# Workflow-level enforcement of the localized-fix bar from
# _fix-policy.md. Recipe instructions alone cannot bind the agent;
# this gate re-derives the diff and closes the PR if the agent
# escaped the allowlist or the LOC/file caps. The docs-and-references
# suite additionally gets AST-based docstring-only enforcement on
# .py edits (no non-docstring/non-comment lines may change).
id: scope_gate
# Run even if the fix step failed after opening a PR. The snapshot
# is the only hard precondition for identifying newly-open attempts.
if: always() && steps.snapshot.outcome == 'success'
env:
SUITE: ${{ matrix.suite }}
GH_TOKEN: ${{ github.token }}
run: |
set -o pipefail
# Identify every attempted_fixes entry that grew during *this* run
# (vs the pre-fix snapshot), not just the last globally-open entry.
OPEN_ENTRIES=$(jq -c --slurpfile prior /tmp/prior-attempted-fixes.json '
(($prior[0] // []) | map({key: .id, value: .n}) | from_entries) as $p
| [
.attempted_fixes // []
| .[]
| select(
((.attempts | last | .outcome) == "open")
and ((.attempts | length) > ($p[.id] // 0))
)
]
' .agentic-ci-state/runner-state.json)
OPEN_COUNT=$(echo "$OPEN_ENTRIES" | jq 'length')
if [ "$OPEN_COUNT" -eq 0 ]; then
echo "No new open attempted_fix recorded by this run; nothing to validate."
exit 0
fi
echo "Validating ${OPEN_COUNT} new open attempted_fix entries."
REJECTED=0
while IFS= read -r OPEN; do
BRANCH=$(echo "$OPEN" | jq -r '.attempts | last | .branch // empty')
PR_NUMBER=$(echo "$OPEN" | jq -r '.attempts | last | .pr_number // empty')
FINDING_ID=$(echo "$OPEN" | jq -r '.id')
DIFF_REF=""
REASONS=""
if [ -z "$BRANCH" ] && [ -n "$PR_NUMBER" ] && [ "$PR_NUMBER" != "null" ]; then
BRANCH=$(gh pr view "$PR_NUMBER" --json headRefName -q .headRefName 2>/dev/null || true)
if [ -n "$BRANCH" ]; then
echo "::warning::Open attempt had no branch; recovered $BRANCH from PR #$PR_NUMBER."
fi
fi
if [ -z "$BRANCH" ]; then
REASONS="${REASONS}- open attempt has no branch and could not be recovered from PR ${PR_NUMBER:-unknown}\n"
fi
# Diff against the actual pushed branch (origin/$BRANCH), not
# local HEAD — HEAD may not match what was pushed if the agent
# left the working tree in an unexpected state.
if [ -n "$BRANCH" ]; then
if git fetch --depth=50 origin "$BRANCH" 2>/dev/null; then
DIFF_REF="FETCH_HEAD"
else
REASONS="${REASONS}- origin/$BRANCH was not fetchable for scope validation\n"
fi
fi
case "$SUITE" in
docs-and-references)
ALLOW='^(architecture/|docs/|README\.md$|CONTRIBUTING\.md$|DEVELOPMENT\.md$|STYLEGUIDE\.md$|packages/[^/]+/src/.*\.py$)'
;;
dependencies)
ALLOW='^packages/[^/]+/pyproject\.toml$'
;;
structure|code-quality)
ALLOW='^packages/[^/]+/src/.*\.py$'
;;
*)
echo "::error::No allowlist defined for suite: $SUITE"
exit 1
;;
esac
if [ -n "$DIFF_REF" ]; then
mapfile -t FILE_ARR < <(git diff --name-only "origin/main...$DIFF_REF")
FILE_COUNT=${#FILE_ARR[@]}
if [ "$FILE_COUNT" -gt 0 ]; then
BAD=$(printf '%s\n' "${FILE_ARR[@]}" | grep -vE "$ALLOW" | grep -v '^$' || true)
else
BAD=""
fi
LOC_DELTA=$(git diff --shortstat "origin/main...$DIFF_REF" \
| awk '{ a=0; d=0; for (i=1;i<=NF;i++) { if ($i ~ /insertion/) a=$(i-1); if ($i ~ /deletion/) d=$(i-1) } print a+d }')
: "${LOC_DELTA:=0}"
else
FILE_ARR=()
FILE_COUNT=0
BAD=""
LOC_DELTA=0
fi
if [ -n "$BAD" ]; then
REASONS="${REASONS}- files outside allowlist:\n$(echo "$BAD" | sed 's/^/ - /')\n"
fi
if [ "$FILE_COUNT" -gt 3 ]; then
REASONS="${REASONS}- file count ($FILE_COUNT) exceeds 3-file cap\n"
fi
if [ "$LOC_DELTA" -gt 50 ]; then
REASONS="${REASONS}- LOC delta ($LOC_DELTA) exceeds 50-line cap\n"
fi
# Docs suite: AST-enforce the docstring-only caveat on .py edits.
if [ "$SUITE" = "docs-and-references" ] && [ "$FILE_COUNT" -gt 0 ]; then
PY_FILES=$(printf '%s\n' "${FILE_ARR[@]}" | grep -E '^packages/[^/]+/src/.*\.py$' || true)
if [ -n "$PY_FILES" ]; then
NON_DOCSTRING=$(PY_FILES="$PY_FILES" DIFF_REF="$DIFF_REF" python3 - <<'PY'
import ast
import os
import re
import subprocess
import sys
files = [p for p in os.environ['PY_FILES'].splitlines() if p]
diff_ref = os.environ['DIFF_REF']
violations = []
hunk_re = re.compile(r'@@ -(?P<old>\d+)(?:,\d+)? \+(?P<new>\d+)(?:,\d+)? @@')
try:
base_ref = subprocess.check_output(
['git', 'merge-base', 'origin/main', diff_ref], text=True
).strip()
except subprocess.CalledProcessError:
violations.append(f'could not compute merge base for origin/main...{diff_ref}')
base_ref = 'origin/main'
def collect_docstring_lines(ref, path, *, missing_ok=False):
try:
content = subprocess.check_output(
['git', 'show', f'{ref}:{path}'], text=True, errors='replace'
)
except subprocess.CalledProcessError:
if missing_ok:
return set()
violations.append(f'{path}: file missing at {ref}')
return None
try:
tree = ast.parse(content)
except SyntaxError as e:
violations.append(f'{path}: parse error at {ref} ({e})')
return None
lines = set()
for node in ast.walk(tree):
if isinstance(node, (ast.Module, ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
body = getattr(node, 'body', None) or []
if (body and isinstance(body[0], ast.Expr)
and isinstance(body[0].value, ast.Constant)
and isinstance(body[0].value.value, str)):
start, end = body[0].lineno, body[0].end_lineno
if start and end:
lines.update(range(start, end + 1))
return lines
for path in files:
old_docstring_lines = collect_docstring_lines(base_ref, path, missing_ok=True)
new_docstring_lines = collect_docstring_lines(diff_ref, path)
if new_docstring_lines is None:
violations.append(f'{path}: file deleted or unreadable at {diff_ref}')
continue
if old_docstring_lines is None:
continue
try:
hunks = subprocess.check_output(
['git', 'diff', '-U0', f'origin/main...{diff_ref}', '--', path],
text=True, errors='replace',
)
except subprocess.CalledProcessError:
violations.append(f'{path}: could not compute diff')
continue
old_cur = None
new_cur = None
for line in hunks.splitlines():
if line.startswith('@@'):
match = hunk_re.match(line)
if match:
old_cur = int(match.group('old'))
new_cur = int(match.group('new'))
else:
old_cur = None
new_cur = None
continue
if old_cur is None or new_cur is None:
continue
if line.startswith('+') and not line.startswith('+++'):
stripped = line[1:].strip()
ln = new_cur
new_cur += 1
if not stripped or stripped.startswith('#'):
continue
if ln not in new_docstring_lines:
violations.append(f'{path}:{ln} added outside docstring')
elif line.startswith('-') and not line.startswith('---'):
stripped = line[1:].strip()
ln = old_cur
old_cur += 1
if not stripped or stripped.startswith('#'):
continue
if ln not in old_docstring_lines:
violations.append(f'{path}:{ln} removed outside docstring')
elif line.startswith(' '):
old_cur += 1
new_cur += 1
if violations:
print('\n'.join(violations))
sys.exit(1)
PY
) || true
if [ -n "$NON_DOCSTRING" ]; then
REASONS="${REASONS}- non-docstring/non-comment .py edits in docs suite:\n$(echo "$NON_DOCSTRING" | sed 's/^/ - /')\n"
fi
fi
fi
if [ -z "$REASONS" ]; then
echo "Scope gate passed for ${FINDING_ID}: ${FILE_COUNT} file(s), ${LOC_DELTA} LOC, all within allowlist."
continue
fi
REJECTED=1
echo "::error::Scope gate violation for ${FINDING_ID}"
printf '%b' "$REASONS"
if [ -n "$PR_NUMBER" ] && [ "$PR_NUMBER" != "null" ]; then
MSG=$(printf 'Closed by workflow scope gate. The pushed diff violated the localized-fix bar (see `.agents/recipes/_fix-policy.md`):\n\n%b\nThe `attempted_fixes` entry has been flipped to `abandoned`.' "$REASONS")
gh pr close "$PR_NUMBER" --comment "$MSG" --delete-branch || \
echo "::warning::gh pr close failed; branch may need manual cleanup"
elif [ -n "$BRANCH" ]; then
git push origin --delete "$BRANCH" || \
echo "::warning::Could not delete remote branch $BRANCH"
else
echo "::warning::No PR number or branch available for cleanup"
fi
FINDING_ID="$FINDING_ID" python3 - <<'PY'
import json, os
finding_id = os.environ['FINDING_ID']
path = '.agentic-ci-state/runner-state.json'
with open(path) as f:
state = json.load(f)
for entry in state.get('attempted_fixes', []):
if entry.get('id') != finding_id:
continue
attempts = entry.get('attempts') or []
if attempts and attempts[-1].get('outcome') == 'open':
attempts[-1]['outcome'] = 'abandoned'
attempts[-1]['gate_violation'] = True
tmp = path + '.tmp'
with open(tmp, 'w') as f:
json.dump(state, f, indent=2)
os.replace(tmp, path)
PY
done < <(echo "$OPEN_ENTRIES" | jq -c '.[]')
if [ "$REJECTED" -eq 1 ]; then
echo "rejected=true" >> "$GITHUB_OUTPUT"
fi
exit 0
- name: Verify dependencies lockfile
# Dependencies suite only: re-run make install-dev against the
# agent's pyproject.toml changes. This catches the failure mode
# where the per-package test target passed against the *old*
# lockfile but the proposed dep does not actually resolve.
id: lockfile_gate
if: always() && matrix.suite == 'dependencies' && steps.snapshot.outcome == 'success' && steps.scope_gate.outcome == 'success'
env:
GH_TOKEN: ${{ github.token }}
run: |
set -o pipefail
# Same snapshot-based selector as the scope gate: target every
# entry whose attempts grew during this run.
OPEN_ENTRIES=$(jq -c --slurpfile prior /tmp/prior-attempted-fixes.json '
(($prior[0] // []) | map({key: .id, value: .n}) | from_entries) as $p
| [
.attempted_fixes // []
| .[]
| select(
((.attempts | last | .outcome) == "open")
and ((.attempts | length) > ($p[.id] // 0))
)
]
' .agentic-ci-state/runner-state.json)
OPEN_COUNT=$(echo "$OPEN_ENTRIES" | jq 'length')
if [ "$OPEN_COUNT" -eq 0 ]; then
echo "No new open attempted_fix; skipping lockfile verification."
exit 0
fi
echo "Verifying dependencies lockfile for ${OPEN_COUNT} new open attempted_fix entries."
REJECTED=0
BASE_REF=$(git rev-parse HEAD)
abandon_open_attempt() {
local finding_id="$1"
local marker="$2"
FINDING_ID="$finding_id" MARKER="$marker" python3 - <<'PY'
import json, os
finding_id = os.environ['FINDING_ID']
marker = os.environ['MARKER']
path = '.agentic-ci-state/runner-state.json'
with open(path) as f:
state = json.load(f)
for entry in state.get('attempted_fixes', []):
if entry.get('id') != finding_id:
continue
attempts = entry.get('attempts') or []
if attempts and attempts[-1].get('outcome') == 'open':
attempts[-1]['outcome'] = 'abandoned'
attempts[-1][marker] = True
tmp = path + '.tmp'
with open(tmp, 'w') as f:
json.dump(state, f, indent=2)
os.replace(tmp, path)
PY
}
while IFS= read -r OPEN; do
PR_NUMBER=$(echo "$OPEN" | jq -r '.attempts | last | .pr_number // empty')
BRANCH=$(echo "$OPEN" | jq -r '.attempts | last | .branch // empty')
FINDING_ID=$(echo "$OPEN" | jq -r '.id')
CHECKOUT_REASON=""
# Verify against the actually-pushed branch, not local HEAD.
if [ -z "$BRANCH" ] && [ -n "$PR_NUMBER" ] && [ "$PR_NUMBER" != "null" ]; then
BRANCH=$(gh pr view "$PR_NUMBER" --json headRefName -q .headRefName 2>/dev/null || true)
if [ -n "$BRANCH" ]; then
echo "::warning::Open attempt had no branch; recovered $BRANCH from PR #$PR_NUMBER."
fi
fi
git checkout --force --detach "$BASE_REF" >/dev/null 2>&1 || true
if [ -z "$BRANCH" ]; then
CHECKOUT_REASON="open attempt has no branch and could not be recovered from PR ${PR_NUMBER:-unknown}"
elif ! git fetch --depth=50 origin "$BRANCH" 2>/dev/null; then
CHECKOUT_REASON="origin/$BRANCH was not fetchable for lockfile verification"
elif ! git checkout --force --detach FETCH_HEAD 2>/dev/null; then
CHECKOUT_REASON="origin/$BRANCH could not be checked out for lockfile verification"
fi
if [ -n "$CHECKOUT_REASON" ]; then
REJECTED=1
echo "::error::${CHECKOUT_REASON}"
if [ -n "$PR_NUMBER" ] && [ "$PR_NUMBER" != "null" ]; then
MSG="Closed by workflow lockfile verification. ${CHECKOUT_REASON}; refusing to validate the previous working tree."
gh pr close "$PR_NUMBER" --comment "$MSG" --delete-branch || \
echo "::warning::gh pr close failed"
elif [ -n "$BRANCH" ]; then
git push origin --delete "$BRANCH" || true
else
echo "::warning::No PR number or branch available for cleanup"
fi
abandon_open_attempt "$FINDING_ID" "lockfile_checkout_failed"
continue
fi
if make install-dev 2>&1 | tee /tmp/install-dev-verify.log; then
echo "Lockfile resolves cleanly for ${FINDING_ID}."
continue
fi
REJECTED=1
echo "::error::make install-dev failed against the agent's pyproject changes for ${FINDING_ID}"
if [ -n "$PR_NUMBER" ] && [ "$PR_NUMBER" != "null" ]; then
MSG="Closed by workflow lockfile verification. \`make install-dev\` failed against the agent's \`pyproject.toml\` changes — the dependency edit does not resolve cleanly. See \`/tmp/install-dev-verify.log\` in the workflow artifact."
gh pr close "$PR_NUMBER" --comment "$MSG" --delete-branch || \
echo "::warning::gh pr close failed"
elif [ -n "$BRANCH" ]; then
git push origin --delete "$BRANCH" || true
else
echo "::warning::No PR number or branch available for cleanup"
fi
abandon_open_attempt "$FINDING_ID" "lockfile_verification_failed"
done < <(echo "$OPEN_ENTRIES" | jq -c '.[]')
if [ "$REJECTED" -eq 1 ]; then
echo "rejected=true" >> "$GITHUB_OUTPUT"
fi
exit 0
- name: Update runner memory
if: always()
env:
SUITE: ${{ matrix.suite }}
AUDIT_OUTCOME: ${{ steps.audit.outcome }}
run: |
# Always validate state before cache save/post-job handling.
python3 - <<'PY'
import json, datetime, os
try:
with open('.agentic-ci-state/runner-state.json') as f:
state = json.load(f)
except (json.JSONDecodeError, FileNotFoundError) as e:
print(f'::warning::runner-state.json is invalid ({e}), resetting')
state = {'suite': os.environ['SUITE'], 'known_issues': [], 'baselines': {}}
# Only stamp last_run if the audit actually succeeded.
# Fix phase manages its own state via attempted_fixes; its outcome
# does not gate last_run.
if os.environ.get('AUDIT_OUTCOME') == 'success':
state['last_run'] = datetime.datetime.now(datetime.timezone.utc).isoformat()
state['suite'] = os.environ['SUITE']
with open('.agentic-ci-state/runner-state.json', 'w') as f:
json.dump(state, f, indent=2)
PY
- name: Upload agent log
# Always upload: for autonomous PR generation, the most interesting
# failure mode is "the workflow succeeded but the PR was wrong".
# The full event stream is the only way to look back days later.
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: claude-audit-log-${{ matrix.suite }}-${{ github.run_id }}-${{ github.run_attempt }}
path: |
/tmp/claude-audit-log.txt
/tmp/claude-fix-log.txt
/tmp/audit-${{ matrix.suite }}.md
/tmp/pr-body-${{ matrix.suite }}.md
/tmp/install-dev-verify.log
.agentic-ci-state/runner-state.json
retention-days: 14
if-no-files-found: ignore
- name: Write job summary
if: always()
env:
SUITE: ${{ matrix.suite }}
AUDIT_OUTCOME: ${{ steps.audit.outcome }}
FIX_OUTCOME: ${{ steps.fix.outcome }}
BACKLOG_SIZE: ${{ steps.backlog.outputs.size }}
SCOPE_REJECTED: ${{ steps.scope_gate.outputs.rejected }}
LOCKFILE_REJECTED: ${{ steps.lockfile_gate.outputs.rejected }}
run: |
echo "## Daily Audit: ${SUITE}" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "- Audit: \`${AUDIT_OUTCOME:-unknown}\`" >> "$GITHUB_STEP_SUMMARY"
echo "- Fix backlog size: \`${BACKLOG_SIZE:-n/a}\`" >> "$GITHUB_STEP_SUMMARY"
echo "- Fix: \`${FIX_OUTCOME:-skipped}\`" >> "$GITHUB_STEP_SUMMARY"
if [ "${SCOPE_REJECTED}" = "true" ]; then
echo "- Scope gate: \`rejected and closed PR\`" >> "$GITHUB_STEP_SUMMARY"
fi
if [ "${LOCKFILE_REJECTED}" = "true" ]; then
echo "- Lockfile gate: \`rejected and closed PR\`" >> "$GITHUB_STEP_SUMMARY"
fi
echo "" >> "$GITHUB_STEP_SUMMARY"
if [ -s "/tmp/audit-${SUITE}.md" ]; then
cat "/tmp/audit-${SUITE}.md" >> "$GITHUB_STEP_SUMMARY"
else
echo "No report generated. See the \`claude-audit-log-*\` artifact on failures for the full event stream." >> "$GITHUB_STEP_SUMMARY"
fi
- name: Save rejected gate state
if: always() && (steps.scope_gate.outputs.rejected == 'true' || steps.lockfile_gate.outputs.rejected == 'true')
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: |
.agentic-ci-state
graphify-out
key: agentic-ci-state-${{ matrix.suite }}-${{ github.run_id }}-${{ github.run_attempt }}-rejected
- name: Fail rejected fix gates
if: always() && (steps.scope_gate.outputs.rejected == 'true' || steps.lockfile_gate.outputs.rejected == 'true')
run: |
echo "::error::A post-fix gate rejected and closed the agent PR. Runner memory was saved before failing."
exit 1