|
| 1 | +"""Self-review orchestration: LLM critiques its own diff before PR creation.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import asyncio |
| 6 | +import subprocess |
| 7 | +from typing import TYPE_CHECKING |
| 8 | + |
| 9 | +from prompts.self_review import SELF_REVIEW_PROMPT |
| 10 | +from shell import log |
| 11 | + |
| 12 | +if TYPE_CHECKING: |
| 13 | + from models import AgentResult, RepoSetup, TaskConfig |
| 14 | + from progress_writer import _ProgressWriter |
| 15 | + from telemetry import _TrajectoryWriter |
| 16 | + |
| 17 | +# Diff truncation limit (characters). Large diffs are cut at hunk boundaries. |
| 18 | +_MAX_DIFF_CHARS = 60_000 |
| 19 | + |
| 20 | +# Minimal system prompt for the self-review agent invocation. |
| 21 | +_REVIEW_SYSTEM_PROMPT = """\ |
| 22 | +You are a code reviewer working inside the repository {repo_url} on branch {branch_name}. |
| 23 | +Your working directory is {repo_dir}. |
| 24 | +
|
| 25 | +You have full access to the filesystem and can run commands. Fix any issues you \ |
| 26 | +find directly — edit files, run the build, and commit fixes. Keep changes minimal \ |
| 27 | +and focused. |
| 28 | +
|
| 29 | +Do NOT open a pull request or push. Just fix issues and commit locally. |
| 30 | +""" |
| 31 | + |
| 32 | + |
| 33 | +def _get_diff(repo_dir: str, default_branch: str) -> str: |
| 34 | + """Generate the cumulative diff of the branch vs origin/{default_branch}.""" |
| 35 | + try: |
| 36 | + result = subprocess.run( |
| 37 | + ["git", "diff", f"origin/{default_branch}...HEAD"], |
| 38 | + cwd=repo_dir, |
| 39 | + capture_output=True, |
| 40 | + text=True, |
| 41 | + timeout=60, |
| 42 | + ) |
| 43 | + if result.returncode != 0: |
| 44 | + log("WARN", f"self_review: git diff failed (exit {result.returncode})") |
| 45 | + return "" |
| 46 | + return result.stdout |
| 47 | + except (subprocess.TimeoutExpired, OSError) as e: |
| 48 | + log("WARN", f"self_review: git diff error: {type(e).__name__}: {e}") |
| 49 | + return "" |
| 50 | + |
| 51 | + |
| 52 | +def _truncate_diff(diff: str, max_chars: int = _MAX_DIFF_CHARS) -> str: |
| 53 | + """Truncate diff at a hunk boundary if it exceeds max_chars. |
| 54 | +
|
| 55 | + Cuts at the last complete hunk (line starting with '@@') that fits |
| 56 | + within the limit, appending a truncation notice. |
| 57 | + """ |
| 58 | + if len(diff) <= max_chars: |
| 59 | + return diff |
| 60 | + |
| 61 | + # Find the last hunk header that starts before max_chars |
| 62 | + truncated = diff[:max_chars] |
| 63 | + last_hunk = truncated.rfind("\n@@") |
| 64 | + if last_hunk > 0: |
| 65 | + # Cut just before this hunk header |
| 66 | + truncated = truncated[:last_hunk] |
| 67 | + else: |
| 68 | + # No hunk boundary found — hard-cut at max_chars |
| 69 | + last_newline = truncated.rfind("\n") |
| 70 | + if last_newline > 0: |
| 71 | + truncated = truncated[:last_newline] |
| 72 | + |
| 73 | + total_lines = diff.count("\n") |
| 74 | + kept_lines = truncated.count("\n") |
| 75 | + truncated += ( |
| 76 | + f"\n\n... [diff truncated: showing ~{kept_lines} of ~{total_lines} lines; " |
| 77 | + f"{len(diff) - len(truncated)} chars omitted] ..." |
| 78 | + ) |
| 79 | + return truncated |
| 80 | + |
| 81 | + |
| 82 | +def _build_review_system_prompt(config: TaskConfig, setup: RepoSetup) -> str: |
| 83 | + """Build a minimal system prompt for the self-review agent.""" |
| 84 | + return _REVIEW_SYSTEM_PROMPT.format( |
| 85 | + repo_url=config.repo_url, |
| 86 | + branch_name=setup.branch, |
| 87 | + repo_dir=setup.repo_dir, |
| 88 | + ) |
| 89 | + |
| 90 | + |
| 91 | +def run_self_review( |
| 92 | + config: TaskConfig, |
| 93 | + setup: RepoSetup, |
| 94 | + agent_result: AgentResult, |
| 95 | + trajectory: _TrajectoryWriter, |
| 96 | + progress: _ProgressWriter, |
| 97 | +) -> AgentResult | None: |
| 98 | + """Run the self-review phase: LLM critiques its own diff and fixes issues. |
| 99 | +
|
| 100 | + Returns the AgentResult from the review phase, or None if skipped. |
| 101 | + Fail-open: errors are logged but never block the pipeline. |
| 102 | + """ |
| 103 | + # Skip condition: feature disabled |
| 104 | + if not config.self_review_enabled: |
| 105 | + log("TASK", "self_review: disabled (self_review_enabled=False)") |
| 106 | + return None |
| 107 | + |
| 108 | + # Skip condition: pr_review is read-only (no diff to review) |
| 109 | + if config.task_type == "pr_review": |
| 110 | + log("TASK", "self_review: skipped for pr_review task type") |
| 111 | + return None |
| 112 | + |
| 113 | + # Compute remaining turns |
| 114 | + used_turns = agent_result.turns or 0 |
| 115 | + remaining_turns = config.max_turns - used_turns |
| 116 | + review_turns = min(remaining_turns, config.self_review_max_turns) |
| 117 | + if review_turns <= 0: |
| 118 | + log("TASK", f"self_review: no remaining turns (used={used_turns}, max={config.max_turns})") |
| 119 | + return None |
| 120 | + |
| 121 | + # Compute remaining budget |
| 122 | + review_budget: float | None = None |
| 123 | + if config.max_budget_usd is not None: |
| 124 | + used_cost = agent_result.cost_usd or 0.0 |
| 125 | + remaining_budget = config.max_budget_usd - used_cost |
| 126 | + if remaining_budget <= 0: |
| 127 | + log( |
| 128 | + "TASK", |
| 129 | + f"self_review: no remaining budget " |
| 130 | + f"(used=${used_cost:.2f}, max=${config.max_budget_usd:.2f})", |
| 131 | + ) |
| 132 | + return None |
| 133 | + review_budget = remaining_budget |
| 134 | + |
| 135 | + # Get the diff |
| 136 | + diff = _get_diff(setup.repo_dir, setup.default_branch) |
| 137 | + if not diff.strip(): |
| 138 | + log("TASK", "self_review: no diff found — skipping") |
| 139 | + return None |
| 140 | + |
| 141 | + # Truncate if needed |
| 142 | + diff = _truncate_diff(diff) |
| 143 | + |
| 144 | + # Build the review prompt |
| 145 | + task_desc = config.task_description or f"Issue #{config.issue_number}" |
| 146 | + user_prompt = SELF_REVIEW_PROMPT.format(diff=diff, task_description=task_desc) |
| 147 | + system_prompt = _build_review_system_prompt(config, setup) |
| 148 | + |
| 149 | + # Build a modified config for the review run |
| 150 | + review_config = config.model_copy( |
| 151 | + update={ |
| 152 | + "max_turns": review_turns, |
| 153 | + "max_budget_usd": review_budget, |
| 154 | + } |
| 155 | + ) |
| 156 | + |
| 157 | + log( |
| 158 | + "TASK", |
| 159 | + f"self_review: starting (turns={review_turns}, " |
| 160 | + f"budget={'$' + f'{review_budget:.2f}' if review_budget else 'unlimited'}, " |
| 161 | + f"diff_chars={len(diff)})", |
| 162 | + ) |
| 163 | + progress.write_agent_milestone( |
| 164 | + "self_review_started", |
| 165 | + f"turns={review_turns} diff_chars={len(diff)}", |
| 166 | + ) |
| 167 | + |
| 168 | + try: |
| 169 | + from runner import run_agent |
| 170 | + |
| 171 | + review_result = asyncio.run( |
| 172 | + run_agent( |
| 173 | + user_prompt, |
| 174 | + system_prompt, |
| 175 | + review_config, |
| 176 | + cwd=setup.repo_dir, |
| 177 | + trajectory=trajectory, |
| 178 | + ) |
| 179 | + ) |
| 180 | + except Exception as e: |
| 181 | + # Fail-open: self-review errors never block the pipeline |
| 182 | + log("WARN", f"self_review: agent execution failed: {type(e).__name__}: {e}") |
| 183 | + progress.write_agent_milestone( |
| 184 | + "self_review_complete", |
| 185 | + f"status=error error={type(e).__name__}: {e}", |
| 186 | + ) |
| 187 | + return None |
| 188 | + |
| 189 | + log( |
| 190 | + "TASK", |
| 191 | + f"self_review: complete (status={review_result.status}, " |
| 192 | + f"turns={review_result.turns}, cost=${review_result.cost_usd or 0:.4f})", |
| 193 | + ) |
| 194 | + progress.write_agent_milestone( |
| 195 | + "self_review_complete", |
| 196 | + f"status={review_result.status} turns={review_result.turns}", |
| 197 | + ) |
| 198 | + return review_result |
0 commit comments