leefowlercu
diff --git a/‎.github/workflows/record-run.yml‎
Lines changed: 112 additions & 0 deletions b/‎.github/workflows/record-run.yml‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎.github/workflows/reflect-and-propose.yml‎
Lines changed: 84 additions & 0 deletions b/‎.github/workflows/reflect-and-propose.yml‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 9 additions & 0 deletions b/‎.gitignore‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 41 additions & 0 deletions b/‎README.md‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎agents/coordinator.md‎
Lines changed: 117 additions & 0 deletions b/‎agents/coordinator.md‎
Lines changed: 117 additions & 0 deletions
@@ -0,0 +1,112 @@
+name: Record pipeline run
+
+on:
+  pull_request:
+    types: [closed]
+
+jobs:
+  record:
+    if: github.event.pull_request.merged == true
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: write
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Locate and download pipeline artifacts
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPOSITORY: ${{ github.repository }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          python <<'PY'
+          import json
+          import os
+          import urllib.request
+          from pathlib import Path
+
+          token = os.environ["GH_TOKEN"]
+          repository = os.environ["REPOSITORY"]
+          head_sha = os.environ["HEAD_SHA"]
+
+          def request(path: str) -> dict:
+              req = urllib.request.Request(
+                  f"https://api.github.com{path}",
+                  headers={
+                      "Accept": "application/vnd.github+json",
+                      "Authorization": f"Bearer {token}",
+                      "X-GitHub-Api-Version": "2022-11-28",
+                  },
+              )
+              with urllib.request.urlopen(req, timeout=60) as resp:
+                  return json.loads(resp.read().decode("utf-8"))
+
+          runs = request(f"/repos/{repository}/actions/runs?head_sha={head_sha}&per_page=30")
+          workflow_runs = runs.get("workflow_runs", [])
+
+          target_run_id = None
+          for run in workflow_runs:
+              run_id = run.get("id")
+              if not run_id:
+                  continue
+              artifacts = request(f"/repos/{repository}/actions/runs/{run_id}/artifacts")
+              names = {artifact.get("name", "") for artifact in artifacts.get("artifacts", [])}
+              if "coordinator-report" in names:
+                  target_run_id = str(run_id)
+                  break
+
+          if not target_run_id:
+              print("No prior workflow run for this PR head SHA published a coordinator-report artifact.")
+              raise SystemExit(0)
+
+          Path(".pipeline_run_id").write_text(target_run_id, encoding="utf-8")
+          PY
+
+          if [[ -f .pipeline_run_id ]]; then
+            PIPELINE_RUN_ID="$(cat .pipeline_run_id)"
+            gh run download "$PIPELINE_RUN_ID" -R "$REPOSITORY" -n coordinator-report -D .
+            gh run download "$PIPELINE_RUN_ID" -R "$REPOSITORY" -n verify-feedback -D . || true
+          fi
+
+      - name: Stop early when no coordinator report is available
+        if: ${{ hashFiles('coordinator-report.md') == '' }}
+        run: |
+          echo "Skipping scorebook update: no real coordinator-report.md artifact was available for this merged PR."
+          echo "This is expected until a manual or automated review executor uploads pipeline artifacts for the PR head SHA."
+
+      - name: Record run in scorebook
+        if: ${{ hashFiles('coordinator-report.md') != '' }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          args=(
+            --pr "${{ github.event.pull_request.number }}"
+            --coordinator-report coordinator-report.md
+          )
+          if [[ -f verify-feedback.json ]]; then
+            args+=(--verify-feedback verify-feedback.json)
+          fi
+          python scripts/record_run.py "${args[@]}"
+
+      - name: Commit updated scorebook
+        if: ${{ hashFiles('coordinator-report.md') != '' }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git add data/pipeline-runs.jsonl
+          git diff --cached --quiet || git commit -m "chore: record pipeline scorebook entry"
+          git push
@@ -0,0 +1,84 @@
+name: Reflect and propose prompt mutations
+
+on:
+  schedule:
+    - cron: "0 9 * * 1"
+  workflow_dispatch:
+  push:
+    paths:
+      - "data/pipeline-runs.jsonl"
+
+jobs:
+  reflect:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+      pull-requests: write
+      models: read
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Decide whether to run reflect loop
+        id: gate
+        shell: bash
+        run: |
+          set -euo pipefail
+          lines=0
+          if [[ -f data/pipeline-runs.jsonl ]]; then
+            lines=$(grep -cve '^[[:space:]]*$' data/pipeline-runs.jsonl || true)
+          fi
+
+          should_run=false
+          if [[ "$lines" -gt 0 ]]; then
+            if [[ "${{ github.event_name }}" == "schedule" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then
+              should_run=true
+            elif (( lines % 50 == 0 )); then
+              should_run=true
+            fi
+          fi
+
+          echo "line_count=$lines" >> "$GITHUB_OUTPUT"
+          echo "should_run=$should_run" >> "$GITHUB_OUTPUT"
+
+      - name: Stop early when no scorebook window is ready
+        if: steps.gate.outputs.should_run != 'true'
+        run: |
+          echo "Skipping reflect loop; non-empty scorebook line count is ${{ steps.gate.outputs.line_count }}."
+
+      - name: Run reflect agent via GitHub Models
+        if: steps.gate.outputs.should_run == 'true'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          WINDOW_SIZE: "50"
+        shell: bash
+        run: |
+          set -euo pipefail
+          python scripts/run_reflect.py --window-size "$WINDOW_SIZE"
+
+      - name: Detect whether report has mutation proposals
+        if: steps.gate.outputs.should_run == 'true'
+        id: proposals
+        shell: bash
+        run: |
+          set -euo pipefail
+          python scripts/detect_reflect_proposals.py --report reflect-report.md
+
+      - name: Create mutation proposal PR
+        if: steps.gate.outputs.should_run == 'true' && steps.proposals.outputs.has_proposals == 'true'
+        uses: peter-evans/create-pull-request@v6
+        with:
+          branch: chore/reflect-proposals-${{ github.run_id }}
+          title: "chore: reflect prompt mutation proposals"
+          commit-message: "chore: add reflect mutation proposals"
+          body-path: reflect-report.md
+          add-paths: |
+            reflect-report.md
@@ -47,3 +47,12 @@ dist/
 
 # Integration test files
 tmp/
+
+# Agentic review pipeline local runtime artifacts
+coordinator-report.md
+remediation-report.md
+verification-report.md
+verification-report.json
+verify-feedback.json
+escalation-report.md
+escalation-report.json
@@ -33,6 +33,7 @@ A Go SDK for the [IBM ContextForge MCP Gateway](https://github.com/IBM/mcp-conte
   - [Teams Service](#teams-service)
 - [Examples](#examples)
 - [Development](#development)
+- [Agentic Review Pipeline](#agentic-review-pipeline)
 - [Releasing](#releasing)
 - [Architecture](#architecture)
 - [Known Issues](#known-issues)
@@ -1042,6 +1043,46 @@ make ci
 - `make release-prep VERSION=vX.Y.Z` - Prepare release with specific version
 - `make release` - Full release preparation workflow
 
+## Agentic Review Pipeline
+
+This repository includes an upstream-shaped, CI-assisted adoption of the
+Agentic Review Pipeline. It installs prompt assets under `agents/`, a manual
+operator entrypoint at `commands/review_pr.md`, stdlib-only helper scripts under
+`scripts/`, and two GitHub Actions workflows for scorebook recording and
+reflect-loop proposal PRs.
+
+### Manual entrypoint
+
+Use `commands/review_pr.md` as the contract for a human-operated `/review_pr`
+command. The current integration is manual-first: it standardizes artifact names
+such as `coordinator-report.md` and `verify-feedback.json`, but it does not yet
+include an automated PR-review executor.
+
+### Workflows and permissions
+
+The repository now includes:
+
+- `.github/workflows/record-run.yml`
+- `.github/workflows/reflect-and-propose.yml`
+
+Required workflow permissions:
+
+- `contents: write`
+- `pull-requests: write`
+- `models: read`
+
+`record-run.yml` is strict by design: it only appends to
+`data/pipeline-runs.jsonl` when a real `coordinator-report.md` artifact is
+available. Until a manual or automated review executor uploads those artifacts,
+the scorebook workflow will skip cleanly and the reflect loop will remain
+dormant.
+
+### Optional repository bootstrap
+
+If repository Actions settings need to be adjusted, use
+`scripts/configure_actions_permissions.py` with a repo-admin token. This is an
+optional bootstrap helper, not part of normal development or release flows.
+
 ## Releasing
 
 This project uses semantic versioning and includes automated release tooling to streamline the release process.
 
@@ -0,0 +1,117 @@
+# Coordinator Agent
+
+## Role
+
+Central orchestrator for the manual Agentic Review Pipeline in `go-contextforge`.
+Classify the request, dispatch the right specialist prompts, aggregate findings,
+compute a confidence score, and produce a single gate decision.
+
+## Repository Context
+
+- Target repository: `go-contextforge`, a Go SDK for the ContextForge REST management API.
+- In scope: CRUD-style REST client behavior, three-state update semantics, tests, docs, and examples.
+- Out of scope: MCP JSON-RPC `/rpc` methods and SSE streaming endpoints.
+- Design baseline: `google/go-github`-style service patterns, context-first APIs, and public API changes that update README/examples/changelog when needed.
+
+## Operating Constraints
+
+- Never modify code directly. The coordinator routes and synthesizes.
+- Parallel review dispatch is the default for PR review tasks.
+- Use the artifact names `coordinator-report.md` and `verify-feedback.json`.
+- This repository currently uses the pipeline in manual mode. Do not assume an automated PR executor exists.
+
+## Task Classification
+
+Classify the task into exactly one type:
+
+| Task Type | Agents | Mode |
+|---|---|---|
+| `code-review` | testing-khorikov + design-ousterhout + security + human-review | parallel |
+| `security-audit` | security | single |
+| `documentation` | human-review + design-ousterhout | parallel |
+| `general-coding` | remediation + verify after human findings exist | sequential |
+
+Prefer `code-review` when the request mentions a PR, review, diff, patch, or merge readiness.
+
+## Dispatch Rules
+
+1. Read the specialist prompt before dispatching it.
+2. Pass the same PR context to each review agent: title, summary, changed files, unified diff, and any test or CI signals.
+3. Keep specialists scoped to their prompt domain.
+4. Collect structured findings with severity, location, recommendation, and confidence.
+5. When public API or behavior changes appear, expect docs/examples/changelog review as part of aggregation.
+
+## Confidence Scoring
+
+Compute an overall score with this weighting:
+
+```text
+confidence = 0.30 * test_score
+           + 0.25 * acceptance_score
+           + 0.20 * review_score
+           + 0.15 * traceability_score
+           + 0.10 * regression_score
+```
+
+Use:
+
+- `test_score`: testing-khorikov output
+- `acceptance_score`: human-review output
+- `review_score`: combined design + security output
+- `traceability_score`: coordinator judgment on whether the change matches intent
+- `regression_score`: verification result, or `1.0` when no fixes were applied and no failing signals are present
+
+## Gate Decision
+
+Return one of:
+
+- `ready`: overall confidence `>= 0.85` and no critical or high blocking findings
+- `needs-human-review`: confidence `0.70-0.84`, unclear tradeoffs, or low-confidence findings
+- `needs-changes`: confidence `< 0.70` or any critical finding
+
+Any critical security or correctness issue forces `needs-changes`.
+
+## Manual Operating Model
+
+Because this repository does not yet have an automated PR executor:
+
+1. Write `coordinator-report.md`.
+2. Stop on `ready` or `needs-human-review` unless a human explicitly asks for remediation.
+3. Only invoke `agents/remediation/remediation.md` and `agents/verify/verify-agent.md` when a human operator chooses to run the fix loop manually.
+4. Preserve artifact names for future workflow adoption.
+
+## Output Format
+
+```markdown
+## Coordinator Report
+
+### Task Classification
+- **Type**: code-review | security-audit | documentation | general-coding
+- **Agents Dispatched**: [list]
+- **Classification Confidence**: 0.0-1.0
+- **Overall Confidence**: 0.0-1.0
+
+### Findings Summary
+
+| # | Severity | Agent | Finding | Location | Confidence |
+|---|---|---|---|---|---|
+| 1 | high | security | Bearer token leaks through error string | `contextforge/contextforge.go:88` | 0.91 |
+
+### Recommended Actions
+- [ ] Concrete next steps, ordered by severity and effort
+
+### Confidence Breakdown
+
+| Dimension | Score | Source |
+|---|---|---|
+| Test quality | 0.86 | testing-khorikov |
+| Acceptance | 0.82 | human-review |
+| Code quality | 0.80 | design-ousterhout, security |
+| Traceability | 0.90 | coordinator |
+| Regression | 1.00 | verification |
+| **Overall** | **0.86** | weighted average |
+
+### PR Readiness: ready | needs-human-review | needs-changes
+
+**Reason**: One short sentence explaining the gate.
+```