Skip to content

Commit e138c82

Browse files
authored
feat(review): Cursor + OpenCode review engines (unified marker-review) (#959)
Add Cursor CLI and OpenCode as background code-review engines alongside Claude/Codex/Tour, unified behind a shared marker-review module (prose marker-block parsing, since neither CLI exposes a schema flag). Includes multi-provider dynamic model discovery, review-profile support, fail-closed parse/ingest, Bun+Pi parity, and the review UI (Provider picker, brand icons). Hardening from review: per-job nonce on marker tags (prevents echoed/quoted tags from corrupting extraction), truncation fail-closed, async model discovery (never blocks the event loop), nested-slash model ids, shared finding types.
1 parent affeaa0 commit e138c82

16 files changed

Lines changed: 2037 additions & 138 deletions

File tree

AGENTS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ During normal plan review, an Archive sidebar tab provides the same browsing via
311311
| `/api/external-annotations` | POST | Add external annotations (single or batch `{ annotations: [...] }`) |
312312
| `/api/external-annotations` | PATCH | Update fields on a single annotation (`?id=`) |
313313
| `/api/external-annotations` | DELETE | Remove by `?id=`, `?source=`, or clear all |
314-
| `/api/agents/capabilities` | GET | Check available agent providers (claude, codex, tour) |
314+
| `/api/agents/capabilities` | GET | Check available agent providers (claude, codex, tour, cursor, opencode) |
315315
| `/api/agents/review-profiles` | GET | List launchable review profiles (enabled skills + builtin default) |
316316
| `/api/agents/skills` | GET | List all discovered skills for the add-a-review picker (each flagged `enabled`) |
317317
| `/api/agents/review-skills` | POST | Enable a skill as a review (body: `{ name }`); writes `review-skills.json` |

apps/pi-extension/server/agent-jobs.ts

Lines changed: 127 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
*/
99

1010
import type { IncomingMessage, ServerResponse } from "node:http";
11-
import { spawn, execFileSync, type ChildProcess } from "node:child_process";
11+
import { spawn, execFileSync, execFile, type ChildProcess } from "node:child_process";
12+
import { promisify } from "node:util";
13+
14+
const execFileAsync = promisify(execFile);
1215
import {
1316
type AgentJobInfo,
1417
type AgentJobEvent,
@@ -21,6 +24,12 @@ import {
2124
AGENT_HEARTBEAT_INTERVAL_MS,
2225
} from "../generated/agent-jobs.js";
2326
import { formatClaudeLogEvent } from "../generated/claude-review.js";
27+
import {
28+
MARKER_ENGINES,
29+
formatMarkerLogEvent,
30+
type MarkerEngine,
31+
type MarkerModel,
32+
} from "../generated/marker-review.js";
2433
import { json, parseBody } from "./helpers.js";
2534

2635
// ---------------------------------------------------------------------------
@@ -32,6 +41,16 @@ const JOBS = `${BASE}/jobs`;
3241
const JOBS_STREAM = `${JOBS}/stream`;
3342
const CAPABILITIES = `${BASE}/capabilities`;
3443

44+
// Providers whose command is owned by the server. Client-supplied argv is never
45+
// spawned for these — buildCommand must produce the command or the launch fails.
46+
const SERVER_BUILT_PROVIDERS: ReadonlySet<string> = new Set([
47+
"claude",
48+
"codex",
49+
"tour",
50+
"cursor",
51+
"opencode",
52+
]);
53+
3554
// ---------------------------------------------------------------------------
3655
// which() helper for Node.js
3756
// ---------------------------------------------------------------------------
@@ -54,7 +73,7 @@ export interface AgentJobHandlerOptions {
5473
mode: "plan" | "review" | "annotate";
5574
getServerUrl: () => string;
5675
getCwd: () => string;
57-
/** Server-side command builder for known providers (codex, claude, tour). */
76+
/** Build the command server-side for a given provider. */
5877
buildCommand?: (provider: string, config?: Record<string, unknown>) => Promise<{
5978
command: string[];
6079
outputPath?: string;
@@ -88,6 +107,27 @@ export interface AgentJobHandlerOptions {
88107
onJobComplete?: (job: AgentJobInfo, meta: { outputPath?: string; stdout?: string; cwd?: string }) => void | Promise<void>;
89108
}
90109

110+
/**
111+
* Best-effort model catalog for a marker engine, spawned once. The spawn lives
112+
* HERE (per-runtime — child_process execFile) rather than in marker-review.ts,
113+
* which must stay Bun-free for the Pi vendor build. ASYNC so it never blocks the
114+
* event loop on the /capabilities request path (a slow/hanging CLI would otherwise
115+
* freeze every other in-flight request for up to the timeout). Empty when discovery
116+
* fails or the CLI is unauthenticated / has no providers configured — the UI falls
117+
* back to the engine's default picker. Account/config-specific, so never hardcoded.
118+
*/
119+
async function discoverMarkerModels(engine: MarkerEngine): Promise<MarkerModel[]> {
120+
try {
121+
const { stdout } = await execFileAsync(engine.binary, engine.modelsArgv, {
122+
timeout: 5000,
123+
encoding: "utf8",
124+
});
125+
return engine.parseModels(stdout);
126+
} catch {
127+
return [];
128+
}
129+
}
130+
91131
export function createAgentJobHandler(options: AgentJobHandlerOptions) {
92132
const { mode, getServerUrl, getCwd } = options;
93133

@@ -103,11 +143,32 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
103143
{ id: "codex", name: "Codex CLI", available: whichCmd("codex") },
104144
{ id: "tour", name: "Code Tour", available: whichCmd("claude") || whichCmd("codex") },
105145
];
106-
const capabilitiesResponse: AgentCapabilities = {
107-
mode,
108-
providers: capabilities,
109-
available: capabilities.some((c) => c.available),
110-
};
146+
// Marker engines (Cursor, OpenCode) — same shape, one loop. Available only in
147+
// review mode when the binary is on PATH (NOTE: cursor's binary is `agent`).
148+
// Model catalogs are discovered LAZILY (see buildCapabilitiesResponse) so a
149+
// slow/unauthenticated `<binary> models` spawn never blocks startup.
150+
for (const engine of Object.values(MARKER_ENGINES)) {
151+
capabilities.push({
152+
id: engine.id,
153+
name: engine.name,
154+
available: mode === "review" && whichCmd(engine.binary),
155+
});
156+
}
157+
158+
const markerModelsCache = new Map<string, MarkerModel[]>();
159+
async function buildCapabilitiesResponse(): Promise<AgentCapabilities> {
160+
const providers = await Promise.all(capabilities.map(async (c) => {
161+
const engine = MARKER_ENGINES[c.id as "cursor" | "opencode"];
162+
if (!engine || !c.available) return c;
163+
let models = markerModelsCache.get(engine.id);
164+
if (!models) {
165+
models = await discoverMarkerModels(engine);
166+
markerModelsCache.set(engine.id, models);
167+
}
168+
return { ...c, models };
169+
}));
170+
return { mode, providers, available: providers.some((p) => p.available) };
171+
}
111172

112173
// --- SSE broadcasting ---
113174
function broadcast(event: AgentJobEvent): void {
@@ -190,31 +251,47 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
190251
if (spawnOptions?.cwd) jobOutputPaths.set(`${id}:cwd`, spawnOptions.cwd);
191252
broadcast({ type: "job:started", job: { ...info } });
192253

193-
// --- Stdout capture (Claude JSONL streaming) ---
254+
// --- Stdout capture (Claude/Cursor stream-json) ---
194255
let stdoutBuf = "";
195256
if (captureStdout && proc.stdout) {
257+
// Format one complete JSONL line into a live-log delta (skip result
258+
// events — handled in onJobComplete).
259+
const emitLogLine = (line: string) => {
260+
if (!line.trim()) return;
261+
// Tour jobs with the Claude engine also stream Claude JSONL.
262+
if (provider === "claude" || spawnOptions?.engine === "claude") {
263+
const formatted = formatClaudeLogEvent(line);
264+
if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
265+
return;
266+
}
267+
// Marker engines (Cursor, OpenCode): map their NDJSON stream events
268+
// into readable log deltas via the engine's own formatter (Cursor
269+
// applies the partial-output dedup rule; OpenCode reads text parts).
270+
const markerEngine = MARKER_ENGINES[provider as "cursor" | "opencode"];
271+
if (markerEngine) {
272+
const formatted = formatMarkerLogEvent(line, markerEngine);
273+
if (formatted !== null) broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
274+
return;
275+
}
276+
try {
277+
const event = JSON.parse(line);
278+
if (event.type === 'result') return;
279+
} catch { /* not JSON — forward as raw log */ }
280+
broadcast({ type: "job:log", jobId: id, delta: line + '\n' });
281+
};
282+
// stream-json output is NDJSON and chunk boundaries are arbitrary —
283+
// carry the trailing partial line until a later chunk completes it,
284+
// otherwise records split across chunks are dropped from live logs.
285+
let logLineCarry = "";
196286
proc.stdout.on("data", (chunk: Buffer) => {
197287
const text = chunk.toString();
198288
stdoutBuf += text;
199-
200-
// Forward JSONL lines as log events
201-
const lines = text.split('\n');
202-
for (const line of lines) {
203-
if (!line.trim()) continue;
204-
// Tour jobs with the Claude engine also stream Claude JSONL.
205-
if (provider === "claude" || spawnOptions?.engine === "claude") {
206-
const formatted = formatClaudeLogEvent(line);
207-
if (formatted !== null) {
208-
broadcast({ type: "job:log", jobId: id, delta: formatted + '\n' });
209-
}
210-
continue;
211-
}
212-
try {
213-
const event = JSON.parse(line);
214-
if (event.type === 'result') continue;
215-
} catch { /* not JSON — forward as raw log */ }
216-
broadcast({ type: "job:log", jobId: id, delta: line + '\n' });
217-
}
289+
const lines = (logLineCarry + text).split('\n');
290+
logLineCarry = lines.pop() ?? "";
291+
for (const line of lines) emitLogLine(line);
292+
});
293+
proc.stdout.on("end", () => {
294+
if (logLineCarry) emitLogLine(logLineCarry);
218295
});
219296
}
220297

@@ -272,8 +349,15 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
272349
stdout: captureStdout ? stdoutBuf : undefined,
273350
cwd: jobCwd,
274351
});
275-
} catch {
276-
// Result ingestion failure shouldn't prevent job completion broadcast
352+
} catch (err) {
353+
// Claude/Codex are fail-open; Cursor and OpenCode are fail-closed — an
354+
// unexpected throw during prompt-enforced ingestion must fail the job,
355+
// not pass it. (Their handlers normally fail by mutation and never
356+
// throw; this guards future refactors.)
357+
if (MARKER_ENGINES[provider as "cursor" | "opencode"]) {
358+
entry.info.status = "failed";
359+
entry.info.error = err instanceof Error ? err.message : `${provider} result ingestion failed`;
360+
}
277361
}
278362
}
279363
jobOutputPaths.delete(id);
@@ -350,7 +434,7 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
350434
): Promise<boolean> {
351435
// --- GET /api/agents/capabilities ---
352436
if (url.pathname === CAPABILITIES && req.method === "GET") {
353-
json(res, capabilitiesResponse);
437+
json(res, await buildCapabilitiesResponse());
354438
return true;
355439
}
356440

@@ -440,6 +524,19 @@ export function createAgentJobHandler(options: AgentJobHandlerOptions) {
440524
return true;
441525
}
442526

527+
// Fail-closed enforcement for server-owned providers: the command MUST
528+
// be built server-side. Client-supplied argv is never spawned for these
529+
// providers — a null/throwing builder becomes an error, not a fallback.
530+
if (SERVER_BUILT_PROVIDERS.has(provider)) {
531+
if (!options.buildCommand) {
532+
json(res, { error: `Provider ${provider} requires server-built command` }, 400);
533+
return true;
534+
}
535+
// Discard any client-supplied argv so a null build cleanly hits the
536+
// `command.length === 0` guard below instead of falling through.
537+
command = [];
538+
}
539+
443540
// Try server-side command building for known providers
444541
let captureStdout = false;
445542
let stdinPrompt: string | undefined;

apps/pi-extension/server/serverReview.ts

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,15 @@ import {
9090
transformClaudeFindings,
9191
} from "../generated/claude-review.js";
9292
import { createTourSession, TOUR_EMPTY_OUTPUT_ERROR } from "../generated/tour-review.js";
93+
import {
94+
MARKER_ENGINES,
95+
composeMarkerReviewPrompt,
96+
buildMarkerCommand,
97+
parseMarkerStreamOutput,
98+
transformMarkerFindings,
99+
makeMarkerNonce,
100+
extractMarkerNonce,
101+
} from "../generated/marker-review.js";
93102
import {
94103
WorkspaceReviewSession,
95104
type WorkspaceDiffType,
@@ -588,6 +597,24 @@ export async function startReviewServer(options: {
588597
return { command, stdinPrompt, prompt, cwd, label: jobLabel, captureStdout: true, model, effort, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext, reviewProfileId: reviewProfile.id, reviewProfileLabel: reviewProfile.label };
589598
}
590599

600+
// Marker engines (Cursor, OpenCode) — one branch, same shape as Claude.
601+
// Neither CLI has a schema flag, so composeMarkerReviewPrompt ALWAYS
602+
// appends the marker-block output contract (even for a custom profile —
603+
// it's the only thing that makes their prose output parseable). The
604+
// engine's buildArgv passes the prompt as the trailing positional arg and
605+
// threads the spawn cwd (--workspace for Cursor, --dir for OpenCode).
606+
// captureStdout is required: the marker block comes back on stdout NDJSON.
607+
const markerEngine = MARKER_ENGINES[provider as "cursor" | "opencode"];
608+
if (markerEngine) {
609+
const model = typeof config?.model === "string" && config.model ? config.model : undefined;
610+
// Per-job nonce embedded in the marker contract; recovered from job.prompt
611+
// at parse time so echoed/quoted bare tags can't be mistaken for the payload.
612+
const nonce = makeMarkerNonce();
613+
const prompt = composeMarkerReviewPrompt(reviewProfile, userMessage, nonce);
614+
const { command } = buildMarkerCommand(markerEngine, prompt, model, cwd);
615+
return { command, prompt, cwd, label: jobLabel, captureStdout: true, model, prUrl: launchPrUrl, diffScope: launchDiffScope, diffContext, reviewProfileId: reviewProfile.id, reviewProfileLabel: reviewProfile.label };
616+
}
617+
591618
return null;
592619
},
593620

@@ -612,7 +639,7 @@ export async function startReviewServer(options: {
612639
// Map findings onto annotations and ingest. Shared by both engine branches;
613640
// no-ops on an empty set so a clean (zero-finding) review stays "done".
614641
const ingest = <T extends object>(transformed: readonly T[], logTag: string) => {
615-
if (transformed.length === 0) return;
642+
if (transformed.length === 0) return undefined;
616643
const annotations = transformed.map((a) => ({
617644
...a,
618645
...jobPrContext,
@@ -621,6 +648,7 @@ export async function startReviewServer(options: {
621648
}));
622649
const result = externalAnnotations.addAnnotations({ annotations });
623650
if ("error" in result) console.error(`[${logTag}] addAnnotations error:`, result.error);
651+
return result;
624652
};
625653

626654
if (job.provider === "codex") {
@@ -682,6 +710,56 @@ export async function startReviewServer(options: {
682710
return;
683711
}
684712

713+
// --- Marker path (Cursor, OpenCode) ---
714+
// FAIL-CLOSED: marker output is prompt-enforced (no schema flag), so any
715+
// missing/malformed/schema/transform/insertion failure must MUTATE the job
716+
// to failed — NEVER throw (agent-jobs.ts swallows throws, silently leaving
717+
// an exit-0 job marked done). Mirrors the Tour fail-closed pattern below.
718+
// Findings carry nullable file/line, classified into line/whole-file/
719+
// general by transformMarkerFindings — nothing is dropped (same as Claude).
720+
const markerEngine = MARKER_ENGINES[job.provider as "cursor" | "opencode"];
721+
if (markerEngine) {
722+
// Recover the per-job nonce embedded in the prompt; without it no block
723+
// can be trusted, so parse fails closed below.
724+
const nonce = extractMarkerNonce(job.prompt ?? "");
725+
const output = nonce && meta.stdout ? parseMarkerStreamOutput(meta.stdout, markerEngine, nonce) : null;
726+
if (!output) {
727+
job.status = "failed";
728+
job.error = `${markerEngine.author} review output missing or unparseable (no valid marker JSON).`;
729+
return;
730+
}
731+
732+
// Derive the verdict from finding severities (like Claude) rather than
733+
// trusting the model's free-form `correctness` string. Marker engines
734+
// have no schema flag, so a model value like "not correct" would be
735+
// stored verbatim and the detail panel (any string containing "correct"
736+
// except "incorrect" → green) would invert the displayed result.
737+
const hasImportant = output.findings.some((f) => f.severity === "important");
738+
job.summary = {
739+
correctness: hasImportant ? "Issues Found" : "Correct",
740+
explanation: output.summary.explanation,
741+
confidence: output.summary.confidence,
742+
};
743+
744+
// Reuse the shared ingest() decoration; add a fail-closed check on result.
745+
const result = ingest(
746+
transformMarkerFindings(
747+
output.findings,
748+
job.source,
749+
markerEngine.author,
750+
cwd,
751+
workspace ? (filePath) => workspace.normalizeAnnotationPath(filePath) : undefined,
752+
),
753+
`${markerEngine.id}-review`,
754+
);
755+
if (result && "error" in result) {
756+
job.status = "failed";
757+
job.error = `${markerEngine.author} annotation insertion failed: ${result.error}`;
758+
return;
759+
}
760+
return;
761+
}
762+
685763
if (job.provider === "tour") {
686764
const { summary } = await tour.onJobComplete({ job, meta });
687765
if (summary) {

apps/pi-extension/vendor.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ for f in feedback-templates prompts review-core diff-paths cli-pagination jj-cor
1313
done
1414

1515
# Vendor review agent modules from packages/server/ — rewrite imports for generated/ layout
16-
for f in agent-review-message codex-review claude-review path-utils review-skill-loader; do
16+
for f in agent-review-message codex-review claude-review review-findings marker-review path-utils review-skill-loader; do
1717
src="../../packages/server/$f.ts"
1818
printf '// @generated — DO NOT EDIT. Source: packages/server/%s.ts\n' "$f" | cat - "$src" \
1919
| sed 's|from "./vcs"|from "./review-core.js"|' \

packages/review-editor/dock/panels/ReviewAgentJobDetailPanel.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ function ProviderPill({ provider, engine, model }: { provider: string; engine?:
352352
const engineLabel = engine === 'codex' ? 'Codex' : 'Claude';
353353
label = model && engine !== 'codex' ? `Tour · ${engineLabel} ${model.charAt(0).toUpperCase() + model.slice(1)}` : `Tour · ${engineLabel}`;
354354
} else {
355-
label = provider === 'claude' ? 'Claude' : provider === 'codex' ? 'Codex' : 'Shell';
355+
label = provider === 'claude' ? 'Claude' : provider === 'codex' ? 'Codex' : provider === 'cursor' ? 'Cursor' : provider === 'opencode' ? 'OpenCode' : 'Shell';
356356
}
357357
return (
358358
<span className={`text-[10px] font-semibold uppercase tracking-wider px-1.5 py-0.5 rounded ${

0 commit comments

Comments
 (0)