feat(agent): add in-pipeline pre-PR self-review phase (#262)

bgagent · bgagent · commit b53cde9f1b4f · 2026-06-04T13:39:08.000-04:00
Adds an optional self-review phase between agent execution and post-hooks
where the LLM critiques its own cumulative diff before the PR is created.
This improves first-pass PR quality by catching bugs, style issues, and
test gaps before human review.

- New self_review.py orchestration module with run_self_review()
- New prompts/self_review.py with focused review prompt template
- TaskConfig extended with self_review_enabled and self_review_max_turns
- Fields threaded through build_config, get_config, server, pipeline
- Fail-open: self-review errors never block PR creation
- Uses remaining turns/budget from original allocation (capped)
- Feature is opt-in (disabled by default)
diff --git a/agent/src/config.py b/agent/src/config.py
@@ -337,6 +337,8 @@ def build_config(
     initial_approval_gate_count: int = 0,
     approval_gate_cap: int | None = None,
     attachments: list[dict] | None = None,
+    self_review_enabled: bool = False,
+    self_review_max_turns: int = 5,
 ) -> TaskConfig:
     """Build and validate configuration from explicit parameters.
 
@@ -407,6 +409,8 @@ def build_config(
         initial_approval_gate_count=initial_approval_gate_count,
         approval_gate_cap=approval_gate_cap,
         attachments=validated_attachments,
+        self_review_enabled=self_review_enabled,
+        self_review_max_turns=self_review_max_turns,
     )
 
 
@@ -431,6 +435,9 @@ def get_config() -> TaskConfig:
             # an unreachable ``traces//`` key.
             trace=os.environ.get("TRACE", "").lower() in ("1", "true", "yes"),
             user_id=os.environ.get("USER_ID", ""),
+            self_review_enabled=os.environ.get("SELF_REVIEW_ENABLED", "").lower()
+            in ("1", "true", "yes"),
+            self_review_max_turns=int(os.environ.get("SELF_REVIEW_MAX_TURNS", "5")),
         )
     except ValueError as e:
         print(f"ERROR: {e}", file=sys.stderr)
diff --git a/agent/src/models.py b/agent/src/models.py
@@ -186,6 +186,9 @@ class TaskConfig(BaseModel):
     # Attachments from the orchestrator payload (Phase 3). Validated as
     # AttachmentConfig models. Empty list for tasks without attachments.
     attachments: list[AttachmentConfig] = Field(default_factory=list)
+    # Self-review: optional LLM diff critique before PR creation.
+    self_review_enabled: bool = False
+    self_review_max_turns: int = 5  # Cap on turns allocated to self-review
 
     @model_validator(mode="after")
     def _validate_trace_requires_user_id(self) -> Self:
diff --git a/agent/src/pipeline.py b/agent/src/pipeline.py
@@ -29,6 +29,7 @@
 from progress_writer import _ProgressWriter
 from prompt_builder import build_system_prompt, discover_project_config
 from runner import run_agent
+from self_review import run_self_review
 from shell import log, log_error_cw
 from system_prompt import SYSTEM_PROMPT
 from telemetry import (
@@ -279,6 +280,8 @@ def run_task(
     trace: bool = False,
     user_id: str = "",
     attachments: list[dict] | None = None,
+    self_review_enabled: bool = False,
+    self_review_max_turns: int = 5,
 ) -> dict:
     """Run the full agent pipeline and return a serialized result dict.
 
@@ -318,6 +321,8 @@ def run_task(
         initial_approval_gate_count=initial_approval_gate_count,
         approval_gate_cap=approval_gate_cap,
         attachments=attachments,
+        self_review_enabled=self_review_enabled,
+        self_review_max_turns=self_review_max_turns,
     )
 
     # Inject Cedar policies into config for the PolicyEngine in runner.py
@@ -623,6 +628,22 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
                     "turns_attempted": agent_result.num_turns or agent_result.turns,
                 }
 
+            # Self-review phase: LLM critiques its own diff before PR creation.
+            # Runs between cancel-check and post-hooks. Fail-open: errors here
+            # never block PR creation.
+            with task_span("task.self_review"):
+                review_result = run_self_review(
+                    config, setup, agent_result, trajectory, progress
+                )
+                if review_result is not None:
+                    # Accumulate turns and cost from the review phase
+                    agent_result.turns += review_result.turns
+                    agent_result.num_turns += review_result.num_turns or review_result.turns
+                    if review_result.cost_usd is not None:
+                        agent_result.cost_usd = (
+                            (agent_result.cost_usd or 0.0) + review_result.cost_usd
+                        )
+
             # Post-hooks (agent_result is guaranteed set by the try/except above)
             with task_span("task.post_hooks") as post_span:
                 # Safety net: commit any uncommitted tracked changes (skip for read-only tasks)
diff --git a/agent/src/prompts/__init__.py b/agent/src/prompts/__init__.py
@@ -4,6 +4,7 @@
 from .new_task import NEW_TASK_WORKFLOW
 from .pr_iteration import PR_ITERATION_WORKFLOW
 from .pr_review import PR_REVIEW_WORKFLOW
+from .self_review import SELF_REVIEW_PROMPT as SELF_REVIEW_PROMPT
 
 _PROMPTS = {
     "new_task": BASE_PROMPT.replace("{workflow}", NEW_TASK_WORKFLOW),
diff --git a/agent/src/prompts/self_review.py b/agent/src/prompts/self_review.py
@@ -0,0 +1,40 @@
+"""Self-review prompt template for pre-PR diff critique."""
+
+SELF_REVIEW_PROMPT = """\
+You are reviewing your own work before it becomes a pull request. Below is the \
+cumulative diff of all changes on this branch compared to the base branch.
+
+<diff>
+{diff}
+</diff>
+
+## Task context
+
+{task_description}
+
+## Review checklist
+
+Examine the diff carefully for:
+
+1. **Correctness** — Logic errors, off-by-one mistakes, missing edge cases, \
+incorrect assumptions about data shapes or API contracts.
+2. **Bugs** — Null/undefined dereferences, unhandled error paths, resource leaks, \
+race conditions.
+3. **Security** — Injection vulnerabilities (SQL, command, XSS), hardcoded secrets, \
+insecure defaults, OWASP Top 10 issues.
+4. **Style & consistency** — Naming conventions, code style violations relative to \
+the surrounding codebase, unnecessary complexity.
+5. **Test gaps** — Important behaviour that is untested, assertions that don't \
+verify the right thing, missing edge-case coverage.
+
+## Instructions
+
+- If you find issues, fix them directly: edit the files, run the build/tests to \
+verify your fixes, and commit the changes.
+- If no issues are found, stop immediately — do not make changes for the sake of \
+making changes.
+- Do NOT refactor code that was not part of the original diff unless it has a \
+concrete bug or security issue.
+- Keep fixes minimal and focused. Each fix should be a separate commit with a \
+clear message.
+"""
diff --git a/agent/src/self_review.py b/agent/src/self_review.py
@@ -0,0 +1,198 @@
+"""Self-review orchestration: LLM critiques its own diff before PR creation."""
+
+from __future__ import annotations
+
+import asyncio
+import subprocess
+from typing import TYPE_CHECKING
+
+from prompts.self_review import SELF_REVIEW_PROMPT
+from shell import log
+
+if TYPE_CHECKING:
+    from models import AgentResult, RepoSetup, TaskConfig
+    from progress_writer import _ProgressWriter
+    from telemetry import _TrajectoryWriter
+
+# Diff truncation limit (characters). Large diffs are cut at hunk boundaries.
+_MAX_DIFF_CHARS = 60_000
+
+# Minimal system prompt for the self-review agent invocation.
+_REVIEW_SYSTEM_PROMPT = """\
+You are a code reviewer working inside the repository {repo_url} on branch {branch_name}.
+Your working directory is {repo_dir}.
+
+You have full access to the filesystem and can run commands. Fix any issues you \
+find directly — edit files, run the build, and commit fixes. Keep changes minimal \
+and focused.
+
+Do NOT open a pull request or push. Just fix issues and commit locally.
+"""
+
+
+def _get_diff(repo_dir: str, default_branch: str) -> str:
+    """Generate the cumulative diff of the branch vs origin/{default_branch}."""
+    try:
+        result = subprocess.run(
+            ["git", "diff", f"origin/{default_branch}...HEAD"],
+            cwd=repo_dir,
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+        if result.returncode != 0:
+            log("WARN", f"self_review: git diff failed (exit {result.returncode})")
+            return ""
+        return result.stdout
+    except (subprocess.TimeoutExpired, OSError) as e:
+        log("WARN", f"self_review: git diff error: {type(e).__name__}: {e}")
+        return ""
+
+
+def _truncate_diff(diff: str, max_chars: int = _MAX_DIFF_CHARS) -> str:
+    """Truncate diff at a hunk boundary if it exceeds max_chars.
+
+    Cuts at the last complete hunk (line starting with '@@') that fits
+    within the limit, appending a truncation notice.
+    """
+    if len(diff) <= max_chars:
+        return diff
+
+    # Find the last hunk header that starts before max_chars
+    truncated = diff[:max_chars]
+    last_hunk = truncated.rfind("\n@@")
+    if last_hunk > 0:
+        # Cut just before this hunk header
+        truncated = truncated[:last_hunk]
+    else:
+        # No hunk boundary found — hard-cut at max_chars
+        last_newline = truncated.rfind("\n")
+        if last_newline > 0:
+            truncated = truncated[:last_newline]
+
+    total_lines = diff.count("\n")
+    kept_lines = truncated.count("\n")
+    truncated += (
+        f"\n\n... [diff truncated: showing ~{kept_lines} of ~{total_lines} lines; "
+        f"{len(diff) - len(truncated)} chars omitted] ..."
+    )
+    return truncated
+
+
+def _build_review_system_prompt(config: TaskConfig, setup: RepoSetup) -> str:
+    """Build a minimal system prompt for the self-review agent."""
+    return _REVIEW_SYSTEM_PROMPT.format(
+        repo_url=config.repo_url,
+        branch_name=setup.branch,
+        repo_dir=setup.repo_dir,
+    )
+
+
+def run_self_review(
+    config: TaskConfig,
+    setup: RepoSetup,
+    agent_result: AgentResult,
+    trajectory: _TrajectoryWriter,
+    progress: _ProgressWriter,
+) -> AgentResult | None:
+    """Run the self-review phase: LLM critiques its own diff and fixes issues.
+
+    Returns the AgentResult from the review phase, or None if skipped.
+    Fail-open: errors are logged but never block the pipeline.
+    """
+    # Skip condition: feature disabled
+    if not config.self_review_enabled:
+        log("TASK", "self_review: disabled (self_review_enabled=False)")
+        return None
+
+    # Skip condition: pr_review is read-only (no diff to review)
+    if config.task_type == "pr_review":
+        log("TASK", "self_review: skipped for pr_review task type")
+        return None
+
+    # Compute remaining turns
+    used_turns = agent_result.turns or 0
+    remaining_turns = config.max_turns - used_turns
+    review_turns = min(remaining_turns, config.self_review_max_turns)
+    if review_turns <= 0:
+        log("TASK", f"self_review: no remaining turns (used={used_turns}, max={config.max_turns})")
+        return None
+
+    # Compute remaining budget
+    review_budget: float | None = None
+    if config.max_budget_usd is not None:
+        used_cost = agent_result.cost_usd or 0.0
+        remaining_budget = config.max_budget_usd - used_cost
+        if remaining_budget <= 0:
+            log(
+                "TASK",
+                f"self_review: no remaining budget "
+                f"(used=${used_cost:.2f}, max=${config.max_budget_usd:.2f})",
+            )
+            return None
+        review_budget = remaining_budget
+
+    # Get the diff
+    diff = _get_diff(setup.repo_dir, setup.default_branch)
+    if not diff.strip():
+        log("TASK", "self_review: no diff found — skipping")
+        return None
+
+    # Truncate if needed
+    diff = _truncate_diff(diff)
+
+    # Build the review prompt
+    task_desc = config.task_description or f"Issue #{config.issue_number}"
+    user_prompt = SELF_REVIEW_PROMPT.format(diff=diff, task_description=task_desc)
+    system_prompt = _build_review_system_prompt(config, setup)
+
+    # Build a modified config for the review run
+    review_config = config.model_copy(
+        update={
+            "max_turns": review_turns,
+            "max_budget_usd": review_budget,
+        }
+    )
+
+    log(
+        "TASK",
+        f"self_review: starting (turns={review_turns}, "
+        f"budget={'$' + f'{review_budget:.2f}' if review_budget else 'unlimited'}, "
+        f"diff_chars={len(diff)})",
+    )
+    progress.write_agent_milestone(
+        "self_review_started",
+        f"turns={review_turns} diff_chars={len(diff)}",
+    )
+
+    try:
+        from runner import run_agent
+
+        review_result = asyncio.run(
+            run_agent(
+                user_prompt,
+                system_prompt,
+                review_config,
+                cwd=setup.repo_dir,
+                trajectory=trajectory,
+            )
+        )
+    except Exception as e:
+        # Fail-open: self-review errors never block the pipeline
+        log("WARN", f"self_review: agent execution failed: {type(e).__name__}: {e}")
+        progress.write_agent_milestone(
+            "self_review_complete",
+            f"status=error error={type(e).__name__}: {e}",
+        )
+        return None
+
+    log(
+        "TASK",
+        f"self_review: complete (status={review_result.status}, "
+        f"turns={review_result.turns}, cost=${review_result.cost_usd or 0:.4f})",
+    )
+    progress.write_agent_milestone(
+        "self_review_complete",
+        f"status={review_result.status} turns={review_result.turns}",
+    )
+    return review_result
diff --git a/agent/src/server.py b/agent/src/server.py
@@ -386,6 +386,8 @@ def _run_task_background(
     user_id: str = "",
     workload_access_token: str = "",
     attachments: list[dict] | None = None,
+    self_review_enabled: bool = False,
+    self_review_max_turns: int = 5,
 ) -> None:
     """Run the agent task in a background thread."""
     global _background_pipeline_failed
@@ -469,6 +471,8 @@ def _run_task_background(
             trace=trace,
             user_id=user_id,
             attachments=attachments,
+            self_review_enabled=self_review_enabled,
+            self_review_max_turns=self_review_max_turns,
         )
         _background_pipeline_failed = False
     except Exception as e:
@@ -557,6 +561,9 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
     channel_source = inp.get("channel_source", "") or ""
     channel_metadata = inp.get("channel_metadata") or {}
     attachments = inp.get("attachments") or []
+    # Self-review (opt-in): LLM critiques its own diff before PR creation.
+    self_review_enabled = inp.get("self_review_enabled") is True
+    self_review_max_turns = int(inp.get("self_review_max_turns", 5))
     # ``trace`` is strictly opt-in (design §10.1). Accept only real
     # booleans from the orchestrator — a string "false" would otherwise
     # flip the flag on.
@@ -630,6 +637,8 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict:
         "user_id": user_id,
         "workload_access_token": workload_access_token,
         "attachments": attachments,
+        "self_review_enabled": self_review_enabled,
+        "self_review_max_turns": self_review_max_turns,
     }
 
 
diff --git a/agent/tests/test_self_review.py b/agent/tests/test_self_review.py