Skip to content

Commit ad481ee

Browse files
committed
chore: integrate agentic review pipeline
1 parent 9e01a39 commit ad481ee

20 files changed

+1351
-0
lines changed

.github/workflows/record-run.yml

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
name: Record pipeline run
2+
3+
on:
4+
pull_request:
5+
types: [closed]
6+
7+
jobs:
8+
record:
9+
if: github.event.pull_request.merged == true
10+
runs-on: ubuntu-latest
11+
permissions:
12+
actions: read
13+
contents: write
14+
15+
steps:
16+
- name: Checkout
17+
uses: actions/checkout@v4
18+
with:
19+
token: ${{ secrets.GITHUB_TOKEN }}
20+
21+
- name: Set up Python
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: "3.11"
25+
26+
- name: Locate and download pipeline artifacts
27+
env:
28+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
29+
REPOSITORY: ${{ github.repository }}
30+
HEAD_SHA: ${{ github.event.pull_request.head.sha }}
31+
shell: bash
32+
run: |
33+
set -euo pipefail
34+
python <<'PY'
35+
import json
36+
import os
37+
import urllib.request
38+
from pathlib import Path
39+
40+
token = os.environ["GH_TOKEN"]
41+
repository = os.environ["REPOSITORY"]
42+
head_sha = os.environ["HEAD_SHA"]
43+
44+
def request(path: str) -> dict:
45+
req = urllib.request.Request(
46+
f"https://api.github.com{path}",
47+
headers={
48+
"Accept": "application/vnd.github+json",
49+
"Authorization": f"Bearer {token}",
50+
"X-GitHub-Api-Version": "2022-11-28",
51+
},
52+
)
53+
with urllib.request.urlopen(req, timeout=60) as resp:
54+
return json.loads(resp.read().decode("utf-8"))
55+
56+
runs = request(f"/repos/{repository}/actions/runs?head_sha={head_sha}&per_page=30")
57+
workflow_runs = runs.get("workflow_runs", [])
58+
59+
target_run_id = None
60+
for run in workflow_runs:
61+
run_id = run.get("id")
62+
if not run_id:
63+
continue
64+
artifacts = request(f"/repos/{repository}/actions/runs/{run_id}/artifacts")
65+
names = {artifact.get("name", "") for artifact in artifacts.get("artifacts", [])}
66+
if "coordinator-report" in names:
67+
target_run_id = str(run_id)
68+
break
69+
70+
if not target_run_id:
71+
print("No prior workflow run for this PR head SHA published a coordinator-report artifact.")
72+
raise SystemExit(0)
73+
74+
Path(".pipeline_run_id").write_text(target_run_id, encoding="utf-8")
75+
PY
76+
77+
if [[ -f .pipeline_run_id ]]; then
78+
PIPELINE_RUN_ID="$(cat .pipeline_run_id)"
79+
gh run download "$PIPELINE_RUN_ID" -R "$REPOSITORY" -n coordinator-report -D .
80+
gh run download "$PIPELINE_RUN_ID" -R "$REPOSITORY" -n verify-feedback -D . || true
81+
fi
82+
83+
- name: Stop early when no coordinator report is available
84+
if: ${{ hashFiles('coordinator-report.md') == '' }}
85+
run: |
86+
echo "Skipping scorebook update: no real coordinator-report.md artifact was available for this merged PR."
87+
echo "This is expected until a manual or automated review executor uploads pipeline artifacts for the PR head SHA."
88+
89+
- name: Record run in scorebook
90+
if: ${{ hashFiles('coordinator-report.md') != '' }}
91+
shell: bash
92+
run: |
93+
set -euo pipefail
94+
args=(
95+
--pr "${{ github.event.pull_request.number }}"
96+
--coordinator-report coordinator-report.md
97+
)
98+
if [[ -f verify-feedback.json ]]; then
99+
args+=(--verify-feedback verify-feedback.json)
100+
fi
101+
python scripts/record_run.py "${args[@]}"
102+
103+
- name: Commit updated scorebook
104+
if: ${{ hashFiles('coordinator-report.md') != '' }}
105+
shell: bash
106+
run: |
107+
set -euo pipefail
108+
git config user.name "github-actions[bot]"
109+
git config user.email "github-actions[bot]@users.noreply.github.com"
110+
git add data/pipeline-runs.jsonl
111+
git diff --cached --quiet || git commit -m "chore: record pipeline scorebook entry"
112+
git push
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
name: Reflect and propose prompt mutations
2+
3+
on:
4+
schedule:
5+
- cron: "0 9 * * 1"
6+
workflow_dispatch:
7+
push:
8+
paths:
9+
- "data/pipeline-runs.jsonl"
10+
11+
jobs:
12+
reflect:
13+
runs-on: ubuntu-latest
14+
permissions:
15+
contents: write
16+
pull-requests: write
17+
models: read
18+
19+
steps:
20+
- name: Checkout
21+
uses: actions/checkout@v4
22+
with:
23+
token: ${{ secrets.GITHUB_TOKEN }}
24+
25+
- name: Set up Python
26+
uses: actions/setup-python@v5
27+
with:
28+
python-version: "3.11"
29+
30+
- name: Decide whether to run reflect loop
31+
id: gate
32+
shell: bash
33+
run: |
34+
set -euo pipefail
35+
lines=0
36+
if [[ -f data/pipeline-runs.jsonl ]]; then
37+
lines=$(grep -cve '^[[:space:]]*$' data/pipeline-runs.jsonl || true)
38+
fi
39+
40+
should_run=false
41+
if [[ "$lines" -gt 0 ]]; then
42+
if [[ "${{ github.event_name }}" == "schedule" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then
43+
should_run=true
44+
elif (( lines % 50 == 0 )); then
45+
should_run=true
46+
fi
47+
fi
48+
49+
echo "line_count=$lines" >> "$GITHUB_OUTPUT"
50+
echo "should_run=$should_run" >> "$GITHUB_OUTPUT"
51+
52+
- name: Stop early when no scorebook window is ready
53+
if: steps.gate.outputs.should_run != 'true'
54+
run: |
55+
echo "Skipping reflect loop; non-empty scorebook line count is ${{ steps.gate.outputs.line_count }}."
56+
57+
- name: Run reflect agent via GitHub Models
58+
if: steps.gate.outputs.should_run == 'true'
59+
env:
60+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
61+
WINDOW_SIZE: "50"
62+
shell: bash
63+
run: |
64+
set -euo pipefail
65+
python scripts/run_reflect.py --window-size "$WINDOW_SIZE"
66+
67+
- name: Detect whether report has mutation proposals
68+
if: steps.gate.outputs.should_run == 'true'
69+
id: proposals
70+
shell: bash
71+
run: |
72+
set -euo pipefail
73+
python scripts/detect_reflect_proposals.py --report reflect-report.md
74+
75+
- name: Create mutation proposal PR
76+
if: steps.gate.outputs.should_run == 'true' && steps.proposals.outputs.has_proposals == 'true'
77+
uses: peter-evans/create-pull-request@v6
78+
with:
79+
branch: chore/reflect-proposals-${{ github.run_id }}
80+
title: "chore: reflect prompt mutation proposals"
81+
commit-message: "chore: add reflect mutation proposals"
82+
body-path: reflect-report.md
83+
add-paths: |
84+
reflect-report.md

.gitignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,12 @@ dist/
4747

4848
# Integration test files
4949
tmp/
50+
51+
# Agentic review pipeline local runtime artifacts
52+
coordinator-report.md
53+
remediation-report.md
54+
verification-report.md
55+
verification-report.json
56+
verify-feedback.json
57+
escalation-report.md
58+
escalation-report.json

README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ A Go SDK for the [IBM ContextForge MCP Gateway](https://github.com/IBM/mcp-conte
3333
- [Teams Service](#teams-service)
3434
- [Examples](#examples)
3535
- [Development](#development)
36+
- [Agentic Review Pipeline](#agentic-review-pipeline)
3637
- [Releasing](#releasing)
3738
- [Architecture](#architecture)
3839
- [Known Issues](#known-issues)
@@ -1042,6 +1043,46 @@ make ci
10421043
- `make release-prep VERSION=vX.Y.Z` - Prepare release with specific version
10431044
- `make release` - Full release preparation workflow
10441045

1046+
## Agentic Review Pipeline
1047+
1048+
This repository includes an upstream-shaped, CI-assisted adoption of the
1049+
Agentic Review Pipeline. It installs prompt assets under `agents/`, a manual
1050+
operator entrypoint at `commands/review_pr.md`, stdlib-only helper scripts under
1051+
`scripts/`, and two GitHub Actions workflows for scorebook recording and
1052+
reflect-loop proposal PRs.
1053+
1054+
### Manual entrypoint
1055+
1056+
Use `commands/review_pr.md` as the contract for a human-operated `/review_pr`
1057+
command. The current integration is manual-first: it standardizes artifact names
1058+
such as `coordinator-report.md` and `verify-feedback.json`, but it does not yet
1059+
include an automated PR-review executor.
1060+
1061+
### Workflows and permissions
1062+
1063+
The repository now includes:
1064+
1065+
- `.github/workflows/record-run.yml`
1066+
- `.github/workflows/reflect-and-propose.yml`
1067+
1068+
Required workflow permissions:
1069+
1070+
- `contents: write`
1071+
- `pull-requests: write`
1072+
- `models: read`
1073+
1074+
`record-run.yml` is strict by design: it only appends to
1075+
`data/pipeline-runs.jsonl` when a real `coordinator-report.md` artifact is
1076+
available. Until a manual or automated review executor uploads those artifacts,
1077+
the scorebook workflow will skip cleanly and the reflect loop will remain
1078+
dormant.
1079+
1080+
### Optional repository bootstrap
1081+
1082+
If repository Actions settings need to be adjusted, use
1083+
`scripts/configure_actions_permissions.py` with a repo-admin token. This is an
1084+
optional bootstrap helper, not part of normal development or release flows.
1085+
10451086
## Releasing
10461087

10471088
This project uses semantic versioning and includes automated release tooling to streamline the release process.

agents/coordinator.md

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# Coordinator Agent
2+
3+
## Role
4+
5+
Central orchestrator for the manual Agentic Review Pipeline in `go-contextforge`.
6+
Classify the request, dispatch the right specialist prompts, aggregate findings,
7+
compute a confidence score, and produce a single gate decision.
8+
9+
## Repository Context
10+
11+
- Target repository: `go-contextforge`, a Go SDK for the ContextForge REST management API.
12+
- In scope: CRUD-style REST client behavior, three-state update semantics, tests, docs, and examples.
13+
- Out of scope: MCP JSON-RPC `/rpc` methods and SSE streaming endpoints.
14+
- Design baseline: `google/go-github`-style service patterns, context-first APIs, and public API changes that update README/examples/changelog when needed.
15+
16+
## Operating Constraints
17+
18+
- Never modify code directly. The coordinator routes and synthesizes.
19+
- Parallel review dispatch is the default for PR review tasks.
20+
- Use the artifact names `coordinator-report.md` and `verify-feedback.json`.
21+
- This repository currently uses the pipeline in manual mode. Do not assume an automated PR executor exists.
22+
23+
## Task Classification
24+
25+
Classify the task into exactly one type:
26+
27+
| Task Type | Agents | Mode |
28+
|---|---|---|
29+
| `code-review` | testing-khorikov + design-ousterhout + security + human-review | parallel |
30+
| `security-audit` | security | single |
31+
| `documentation` | human-review + design-ousterhout | parallel |
32+
| `general-coding` | remediation + verify after human findings exist | sequential |
33+
34+
Prefer `code-review` when the request mentions a PR, review, diff, patch, or merge readiness.
35+
36+
## Dispatch Rules
37+
38+
1. Read the specialist prompt before dispatching it.
39+
2. Pass the same PR context to each review agent: title, summary, changed files, unified diff, and any test or CI signals.
40+
3. Keep specialists scoped to their prompt domain.
41+
4. Collect structured findings with severity, location, recommendation, and confidence.
42+
5. When public API or behavior changes appear, expect docs/examples/changelog review as part of aggregation.
43+
44+
## Confidence Scoring
45+
46+
Compute an overall score with this weighting:
47+
48+
```text
49+
confidence = 0.30 * test_score
50+
+ 0.25 * acceptance_score
51+
+ 0.20 * review_score
52+
+ 0.15 * traceability_score
53+
+ 0.10 * regression_score
54+
```
55+
56+
Use:
57+
58+
- `test_score`: testing-khorikov output
59+
- `acceptance_score`: human-review output
60+
- `review_score`: combined design + security output
61+
- `traceability_score`: coordinator judgment on whether the change matches intent
62+
- `regression_score`: verification result, or `1.0` when no fixes were applied and no failing signals are present
63+
64+
## Gate Decision
65+
66+
Return one of:
67+
68+
- `ready`: overall confidence `>= 0.85` and no critical or high blocking findings
69+
- `needs-human-review`: confidence `0.70-0.84`, unclear tradeoffs, or low-confidence findings
70+
- `needs-changes`: confidence `< 0.70` or any critical finding
71+
72+
Any critical security or correctness issue forces `needs-changes`.
73+
74+
## Manual Operating Model
75+
76+
Because this repository does not yet have an automated PR executor:
77+
78+
1. Write `coordinator-report.md`.
79+
2. Stop on `ready` or `needs-human-review` unless a human explicitly asks for remediation.
80+
3. Only invoke `agents/remediation/remediation.md` and `agents/verify/verify-agent.md` when a human operator chooses to run the fix loop manually.
81+
4. Preserve artifact names for future workflow adoption.
82+
83+
## Output Format
84+
85+
```markdown
86+
## Coordinator Report
87+
88+
### Task Classification
89+
- **Type**: code-review | security-audit | documentation | general-coding
90+
- **Agents Dispatched**: [list]
91+
- **Classification Confidence**: 0.0-1.0
92+
- **Overall Confidence**: 0.0-1.0
93+
94+
### Findings Summary
95+
96+
| # | Severity | Agent | Finding | Location | Confidence |
97+
|---|---|---|---|---|---|
98+
| 1 | high | security | Bearer token leaks through error string | `contextforge/contextforge.go:88` | 0.91 |
99+
100+
### Recommended Actions
101+
- [ ] Concrete next steps, ordered by severity and effort
102+
103+
### Confidence Breakdown
104+
105+
| Dimension | Score | Source |
106+
|---|---|---|
107+
| Test quality | 0.86 | testing-khorikov |
108+
| Acceptance | 0.82 | human-review |
109+
| Code quality | 0.80 | design-ousterhout, security |
110+
| Traceability | 0.90 | coordinator |
111+
| Regression | 1.00 | verification |
112+
| **Overall** | **0.86** | weighted average |
113+
114+
### PR Readiness: ready | needs-human-review | needs-changes
115+
116+
**Reason**: One short sentence explaining the gate.
117+
```

0 commit comments

Comments
 (0)