Skip to content

Commit cf78663

Browse files
xuiocodex
andcommitted
Harden app-server steering validation
Co-Authored-By: OpenAI Codex <noreply@openai.com>
1 parent 8de20ea commit cf78663

10 files changed

Lines changed: 161 additions & 92 deletions

dist/index.js

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23653,35 +23653,42 @@ var CodexSessionManager = class {
2365323653
}
2365423654
const wasActive = Boolean(session.controller);
2365523655
if (session.protocol === "app-server" && session.appServer && session.activeTurn && !options.interruptCurrent) {
23656-
const turn = this.recordSteerDelivery(session, prompt);
23657-
try {
23658-
const delivered = await session.appServer.steer(prompt);
23659-
turn.status = delivered.delivered ? "completed" : "failed";
23660-
turn.resultOk = delivered.delivered;
23661-
turn.resultStatus = delivered.delivered ? "completed" : "failed";
23662-
turn.error = delivered.delivered ? void 0 : "No active Codex app-server turn accepted steering.";
23663-
turn.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
23664-
this.notifyTurn(turn);
23665-
this.notifySession(session);
23666-
if (options.wait && session.activeTurn) await this.waitForTurn(session, session.activeTurn);
23667-
return {
23668-
session: snapshot2(session),
23669-
turn: turnSnapshot(turn),
23670-
delivery: delivered.delivered ? "delivered_to_active_turn" : "queued_after_current",
23671-
error: turn.error
23672-
};
23673-
} catch (error2) {
23674-
logger.error("session.steer_app_server_failed", {
23656+
const activeCodexTurnId = await this.waitForAppServerActiveTurn(session, 1e3);
23657+
if (!activeCodexTurnId) {
23658+
logger.warn("session.steer_app_server_not_ready", {
2367523659
sessionId: id,
23676-
error: errorForLog(error2)
23660+
activeTurnId: session.activeTurn.id
2367723661
});
23678-
turn.status = "failed";
23679-
turn.error = error2 instanceof Error ? error2.message : String(error2);
23680-
turn.resultOk = false;
23681-
turn.resultStatus = "failed";
23682-
turn.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
23683-
this.notifyTurn(turn);
23684-
this.notifySession(session);
23662+
} else {
23663+
try {
23664+
const delivered = await session.appServer.steer(prompt);
23665+
if (!delivered.delivered) {
23666+
logger.warn("session.steer_app_server_rejected", {
23667+
sessionId: id,
23668+
activeTurnId: session.activeTurn.id,
23669+
activeCodexTurnId
23670+
});
23671+
} else {
23672+
const turn = this.recordSteerDelivery(session, prompt);
23673+
turn.status = "completed";
23674+
turn.resultOk = true;
23675+
turn.resultStatus = "completed";
23676+
turn.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
23677+
this.notifyTurn(turn);
23678+
this.notifySession(session);
23679+
if (options.wait && session.activeTurn) await this.waitForTurn(session, session.activeTurn);
23680+
return {
23681+
session: snapshot2(session),
23682+
turn: turnSnapshot(turn),
23683+
delivery: "delivered_to_active_turn"
23684+
};
23685+
}
23686+
} catch (error2) {
23687+
logger.error("session.steer_app_server_failed", {
23688+
sessionId: id,
23689+
error: errorForLog(error2)
23690+
});
23691+
}
2368523692
}
2368623693
}
2368723694
const response = await this.send(id, prompt, overrides, {
@@ -23971,6 +23978,15 @@ var CodexSessionManager = class {
2397123978
});
2397223979
}
2397323980
}
23981+
async waitForAppServerActiveTurn(session, timeoutMs) {
23982+
const deadline = Date.now() + timeoutMs;
23983+
while (session.controller && session.activeTurn && session.appServer && Date.now() < deadline) {
23984+
const activeTurnId = session.appServer.activeTurnId;
23985+
if (activeTurnId) return activeTurnId;
23986+
await new Promise((resolve) => setTimeout(resolve, 10));
23987+
}
23988+
return session.appServer?.activeTurnId;
23989+
}
2397423990
notifyTurn(turn) {
2397523991
for (const waiter of turn.waiters) waiter();
2397623992
turn.waiters.clear();

src/sessions.ts

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -280,35 +280,42 @@ export class CodexSessionManager {
280280
}
281281
const wasActive = Boolean(session.controller);
282282
if (session.protocol === "app-server" && session.appServer && session.activeTurn && !options.interruptCurrent) {
283-
const turn = this.recordSteerDelivery(session, prompt);
284-
try {
285-
const delivered = await session.appServer.steer(prompt);
286-
turn.status = delivered.delivered ? "completed" : "failed";
287-
turn.resultOk = delivered.delivered;
288-
turn.resultStatus = delivered.delivered ? "completed" : "failed";
289-
turn.error = delivered.delivered ? undefined : "No active Codex app-server turn accepted steering.";
290-
turn.updatedAt = new Date().toISOString();
291-
this.notifyTurn(turn);
292-
this.notifySession(session);
293-
if (options.wait && session.activeTurn) await this.waitForTurn(session, session.activeTurn);
294-
return {
295-
session: snapshot(session),
296-
turn: turnSnapshot(turn),
297-
delivery: delivered.delivered ? "delivered_to_active_turn" : "queued_after_current",
298-
error: turn.error,
299-
};
300-
} catch (error) {
301-
logger.error("session.steer_app_server_failed", {
283+
const activeCodexTurnId = await this.waitForAppServerActiveTurn(session, 1_000);
284+
if (!activeCodexTurnId) {
285+
logger.warn("session.steer_app_server_not_ready", {
302286
sessionId: id,
303-
error: errorForLog(error),
287+
activeTurnId: session.activeTurn.id,
304288
});
305-
turn.status = "failed";
306-
turn.error = error instanceof Error ? error.message : String(error);
307-
turn.resultOk = false;
308-
turn.resultStatus = "failed";
309-
turn.updatedAt = new Date().toISOString();
310-
this.notifyTurn(turn);
311-
this.notifySession(session);
289+
} else {
290+
try {
291+
const delivered = await session.appServer.steer(prompt);
292+
if (!delivered.delivered) {
293+
logger.warn("session.steer_app_server_rejected", {
294+
sessionId: id,
295+
activeTurnId: session.activeTurn.id,
296+
activeCodexTurnId,
297+
});
298+
} else {
299+
const turn = this.recordSteerDelivery(session, prompt);
300+
turn.status = "completed";
301+
turn.resultOk = true;
302+
turn.resultStatus = "completed";
303+
turn.updatedAt = new Date().toISOString();
304+
this.notifyTurn(turn);
305+
this.notifySession(session);
306+
if (options.wait && session.activeTurn) await this.waitForTurn(session, session.activeTurn);
307+
return {
308+
session: snapshot(session),
309+
turn: turnSnapshot(turn),
310+
delivery: "delivered_to_active_turn",
311+
};
312+
}
313+
} catch (error) {
314+
logger.error("session.steer_app_server_failed", {
315+
sessionId: id,
316+
error: errorForLog(error),
317+
});
318+
}
312319
}
313320
}
314321
const response = await this.send(id, prompt, overrides, {
@@ -652,6 +659,19 @@ export class CodexSessionManager {
652659
}
653660
}
654661

662+
private async waitForAppServerActiveTurn(
663+
session: CodexSessionRecord,
664+
timeoutMs: number,
665+
): Promise<string | undefined> {
666+
const deadline = Date.now() + timeoutMs;
667+
while (session.controller && session.activeTurn && session.appServer && Date.now() < deadline) {
668+
const activeTurnId = session.appServer.activeTurnId;
669+
if (activeTurnId) return activeTurnId;
670+
await new Promise((resolve) => setTimeout(resolve, 10));
671+
}
672+
return session.appServer?.activeTurnId;
673+
}
674+
655675
private notifyTurn(turn: CodexSessionTurnRecord): void {
656676
for (const waiter of turn.waiters) waiter();
657677
turn.waiters.clear();

test/claude-autodiscovery.mjs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { spawnSync } from "node:child_process";
22
import { mkdir, readFile, readdir, rm, stat } from "node:fs/promises";
33
import os from "node:os";
44
import path from "node:path";
5+
import { extractJsonResult } from "./json-result.mjs";
56

67
const root = process.cwd();
78
const fakeCodex = path.join(root, "test/fixtures/fake-codex.mjs");
@@ -59,12 +60,6 @@ async function resolveClaudeCodeBinary() {
5960
return resolved;
6061
}
6162

62-
function extractJsonResult(rawResult) {
63-
const trimmed = rawResult.trim();
64-
const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/);
65-
return JSON.parse(fenced ? fenced[1] : trimmed);
66-
}
67-
6863
function assert(condition, message, details) {
6964
if (!condition) {
7065
throw new Error(`${message}${details ? `\n${JSON.stringify(details, null, 2)}` : ""}`);

test/claude-large-output.mjs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { spawnSync } from "node:child_process";
22
import { readdir, stat } from "node:fs/promises";
33
import os from "node:os";
44
import path from "node:path";
5+
import { extractJsonResult } from "./json-result.mjs";
56

67
const root = process.cwd();
78
const fakeCodex = path.join(root, "test/fixtures/fake-codex.mjs");
@@ -58,12 +59,6 @@ async function resolveClaudeCodeBinary() {
5859
return resolved;
5960
}
6061

61-
function extractJsonResult(rawResult) {
62-
const trimmed = rawResult.trim();
63-
const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/);
64-
return JSON.parse(fenced ? fenced[1] : trimmed);
65-
}
66-
6762
function assert(condition, message, details) {
6863
if (!condition) {
6964
throw new Error(`${message}${details ? `\n${JSON.stringify(details, null, 2)}` : ""}`);

test/claude-orchestration.mjs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { spawnSync } from "node:child_process";
22
import { readdir, stat } from "node:fs/promises";
33
import os from "node:os";
44
import path from "node:path";
5+
import { extractJsonResult } from "./json-result.mjs";
56

67
const root = process.cwd();
78
const fakeCodex = path.join(root, "test/fixtures/fake-codex.mjs");
@@ -58,12 +59,6 @@ async function resolveClaudeCodeBinary() {
5859
return resolved;
5960
}
6061

61-
function extractJsonResult(rawResult) {
62-
const trimmed = rawResult.trim();
63-
const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/);
64-
return JSON.parse(fenced ? fenced[1] : trimmed);
65-
}
66-
6762
function assert(condition, message, details) {
6863
if (!condition) {
6964
throw new Error(`${message}${details ? `\n${JSON.stringify(details, null, 2)}` : ""}`);

test/claude-real-codex.mjs

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { spawnSync } from "node:child_process";
22
import { readdir, stat } from "node:fs/promises";
33
import os from "node:os";
44
import path from "node:path";
5+
import { extractJsonResult } from "./json-result.mjs";
56

67
const root = process.cwd();
78
const codexBin =
@@ -58,12 +59,6 @@ async function resolveClaudeCodeBinary() {
5859
return resolved;
5960
}
6061

61-
function extractJsonResult(rawResult) {
62-
const trimmed = rawResult.trim();
63-
const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/);
64-
return JSON.parse(fenced ? fenced[1] : trimmed);
65-
}
66-
6762
function assert(condition, message, details) {
6863
if (!condition) {
6964
throw new Error(`${message}${details ? `\n${JSON.stringify(details, null, 2)}` : ""}`);

test/claude-real-session.mjs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { spawnSync } from "node:child_process";
22
import { readdir, stat } from "node:fs/promises";
33
import os from "node:os";
44
import path from "node:path";
5+
import { extractJsonResult } from "./json-result.mjs";
56

67
const root = process.cwd();
78
const codexBin =
@@ -69,12 +70,6 @@ async function resolveClaudeCodeBinary() {
6970
return resolved;
7071
}
7172

72-
function extractJsonResult(rawResult) {
73-
const trimmed = rawResult.trim();
74-
const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/);
75-
return JSON.parse(fenced ? fenced[1] : trimmed);
76-
}
77-
7873
function assert(condition, message, details) {
7974
if (!condition) {
8075
throw new Error(`${message}${details ? `\n${JSON.stringify(details, null, 2)}` : ""}`);
@@ -89,7 +84,7 @@ Perform exactly these checks:
8984
3. Call continue_codex_session for that session id with task "Follow-up persistent session validation. Stay read-only. Reply exactly REAL_SESSION_FOLLOW_OK", codex_bin "${codexBin}", model_preset "spark", reasoning_effort "low", timeout_ms 180000. Important: intentionally omit project_dir and cwd from this follow-up call.
9085
4. Verify the continue_codex_session result has agent.ok true, agent.cwd equal to "${root}", session.projectDir equal to "${root}", and finalMessage containing REAL_SESSION_FOLLOW_OK.
9186
5. Call get_session for the same session id and verify session.projectDir is still "${root}" and turns is at least 2.
92-
6. Call start_codex_session_async with task "Real async app-server steering validation. Stay read-only. Run the shell command \`sleep 6\`, then reply exactly REAL_SESSION_ASYNC_START_OK unless a later steering instruction changes the exact final reply.", project_dir "${root}", codex_bin "${codexBin}", model_preset "spark", reasoning_effort "low", timeout_ms 180000. Verify it returns a second session.id and a turn.id immediately.
87+
6. Call start_codex_session_async with task "Real async app-server steering validation. Stay read-only. Run the shell command \`sleep 30\`, then reply exactly REAL_SESSION_ASYNC_START_OK unless a later steering instruction changes the exact final reply.", project_dir "${root}", codex_bin "${codexBin}", model_preset "spark", reasoning_effort "low", timeout_ms 180000. Verify it returns a second session.id and a turn.id immediately.
9388
7. Poll get_codex_session for that second session id until session.supportsRealSteering is true and session.activeTurn is present.
9489
8. Call steer_codex_session for that second session id with steering_prompt "Steering validation. Change the exact final reply to REAL_SESSION_STEER_OK.", codex_bin "${codexBin}", model_preset "spark", reasoning_effort "low", timeout_ms 180000, wait_for_completion false. Verify it returns delivery "delivered_to_active_turn" and a completed steer turn.
9590
9. Call wait_codex_session for the second session id with timeout_ms 300000. Verify completed true, session.protocol is "app-server", session.turns is at least 1, recentTurns contains a completed steer turn, and lastResult.finalMessage contains REAL_SESSION_STEER_OK.

test/claude-session-steering.mjs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { spawnSync } from "node:child_process";
22
import { mkdir, readFile, readdir, rm, stat } from "node:fs/promises";
33
import os from "node:os";
44
import path from "node:path";
5+
import { extractJsonResult } from "./json-result.mjs";
56

67
const root = process.cwd();
78
const fakeCodex = path.join(root, "test/fixtures/fake-codex.mjs");
@@ -59,12 +60,6 @@ async function resolveClaudeCodeBinary() {
5960
return resolved;
6061
}
6162

62-
function extractJsonResult(rawResult) {
63-
const trimmed = rawResult.trim();
64-
const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/);
65-
return JSON.parse(fenced ? fenced[1] : trimmed);
66-
}
67-
6863
function assert(condition, message, details) {
6964
if (!condition) {
7065
throw new Error(`${message}${details ? `\n${JSON.stringify(details, null, 2)}` : ""}`);
@@ -76,7 +71,7 @@ await mkdir(recordDir, { recursive: true });
7671
try {
7772
const prompt = `Validate the codex-subagents plugin's long-running session flow from inside Claude Code. Use only the codex-subagents MCP tools. Use this exact fake Codex binary: ${fakeCodex}. Use this exact project_dir: ${projectDir}.
7873
79-
Start a Codex Spark session in the background with task "CLAUDE_STEERING_START DELAY_MS=2000". Poll get_codex_session until the session reports supportsRealSteering true and has an activeTurn. While it is running, first steer the session with steering prompt "CLAUDE_STEERING_STEER" without waiting for steering to complete. Then add a normal follow-up prompt "CLAUDE_STEERING_FOLLOW" without waiting for that prompt to complete. Then wait until the session is idle.
74+
Start a Codex Spark session in the background with task "CLAUDE_STEERING_START DELAY_MS=10000". Poll get_codex_session until the session reports supportsRealSteering true and has an activeTurn. While it is running, first steer the session with steering prompt "CLAUDE_STEERING_STEER" without waiting for steering to complete. Then add a normal follow-up prompt "CLAUDE_STEERING_FOLLOW" without waiting for that prompt to complete. Then wait until the session is idle.
8075
8176
Return exactly one compact JSON object and no markdown. Shape: {"ok": boolean, "turns": number, "steerCompleted": boolean, "completed": boolean}.`;
8277

test/json-result.mjs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
function findFirstJsonValue(text) {
2+
for (let start = 0; start < text.length; start += 1) {
3+
const opener = text[start];
4+
if (opener !== "{" && opener !== "[") continue;
5+
6+
const stack = [opener];
7+
let inString = false;
8+
let escaped = false;
9+
10+
for (let index = start + 1; index < text.length; index += 1) {
11+
const char = text[index];
12+
if (inString) {
13+
if (escaped) escaped = false;
14+
else if (char === "\\") escaped = true;
15+
else if (char === "\"") inString = false;
16+
continue;
17+
}
18+
19+
if (char === "\"") {
20+
inString = true;
21+
continue;
22+
}
23+
if (char === "{" || char === "[") {
24+
stack.push(char);
25+
continue;
26+
}
27+
if (char !== "}" && char !== "]") continue;
28+
29+
const expected = char === "}" ? "{" : "[";
30+
if (stack.at(-1) !== expected) break;
31+
stack.pop();
32+
if (stack.length === 0) return text.slice(start, index + 1);
33+
}
34+
}
35+
return undefined;
36+
}
37+
38+
export function extractJsonResult(rawResult) {
39+
const trimmed = String(rawResult ?? "").trim();
40+
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
41+
if (fenced) return JSON.parse(fenced[1].trim());
42+
43+
try {
44+
return JSON.parse(trimmed);
45+
} catch (strictError) {
46+
const value = findFirstJsonValue(trimmed);
47+
if (value) return JSON.parse(value);
48+
throw strictError;
49+
}
50+
}

test/json-result.test.mjs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import { describe, expect, it } from "vitest";
2+
import { extractJsonResult } from "./json-result.mjs";
3+
4+
describe("extractJsonResult", () => {
5+
it("parses strict JSON, fenced JSON, and prefixed Claude text", () => {
6+
expect(extractJsonResult('{"ok":true}')).toEqual({ ok: true });
7+
expect(extractJsonResult('```json\n{"ok":true}\n```')).toEqual({ ok: true });
8+
expect(extractJsonResult('All checks passed. {"ok":true,"text":"brace } inside string"}')).toEqual({
9+
ok: true,
10+
text: "brace } inside string",
11+
});
12+
});
13+
});

0 commit comments

Comments
 (0)