Skip to content

Commit fab9f5b

Browse files
xuiocodex
andcommitted
Harden native MCP failure paths
Co-Authored-By: OpenAI Codex <noreply@openai.com>
1 parent 43a366d commit fab9f5b

20 files changed

Lines changed: 545 additions & 70 deletions

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ Full local access is opt-in per call:
111111
Use that only when the user explicitly wants Codex to edit files, write git state,
112112
use DNS/network, install packages, or behave like a normal unrestricted Codex run.
113113

114+
Advanced callers can still request `advanced.sandbox: "workspace-write"` for
115+
project-scoped writes, and the `patcher` role uses that sandbox because its job is
116+
to suggest or prepare patches. That is not full access: arbitrary filesystem
117+
writes and DNS/network remain disabled unless `full_access: true` is set.
118+
114119
## Documentation
115120

116121
- [Usage guide](docs/USAGE.md) - tools, examples, models, sessions, env vars.

dist/index.js

Lines changed: 130 additions & 31 deletions
Large diffs are not rendered by default.

docs/KNOWN_LIMITATIONS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ That maps to Codex's unrestricted local mode. It can write files, mutate git
5555
state, use DNS/network, and run package installs. Use it only when the user
5656
explicitly asks for that capability.
5757

58+
There is also an advanced `workspace-write` sandbox. It can write inside the
59+
configured workspace but is not full local access and does not enable DNS/network
60+
or arbitrary filesystem writes. The `patcher` role uses `workspace-write` by
61+
default; use other roles or omit `subagent_type` for strictly read-only work.
62+
5863
## Nested Subagents Can Increase Cost And Latency
5964

6065
Nested Codex subagents are supported, including Spark. Keep nested work scoped,

docs/USAGE.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,11 @@ This maps to Codex's `--dangerously-bypass-approvals-and-sandbox` flag and allow
200200
DNS/network access, file writes, package installs, and git writes. Keep it scoped
201201
to the specific tool call that needs it.
202202

203+
`advanced.sandbox: "workspace-write"` is a narrower advanced mode for
204+
project-scoped writes without DNS/network or arbitrary filesystem access. The
205+
`patcher` role uses `workspace-write`; all other built-in roles stay read-only.
206+
Responses include a `safety` block so callers can see which sandbox actually ran.
207+
203208
## Sharp Edges
204209

205210
See [Known limitations](KNOWN_LIMITATIONS.md) for the current operational sharp

src/index.ts

Lines changed: 105 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -574,9 +574,70 @@ function summarizeResultValue(value: unknown, fallbackText: string, fallback: st
574574
const summary = (value as { summary?: unknown }).summary;
575575
if (typeof summary === "string" && summary.trim()) return firstUsefulLine(summary, fallback);
576576
}
577+
if (typeof value === "string" && !value.trim() && fallbackText.trim()) {
578+
return firstUsefulLine(fallbackText, fallback);
579+
}
577580
return firstUsefulLine(stringifyResultValue(value, fallbackText), fallback);
578581
}
579582

583+
function agentFallbackErrorText(
584+
agent: {
585+
validationError?: unknown;
586+
eventSummary?: { errors?: unknown[] };
587+
stderr?: unknown;
588+
},
589+
recovery?: { recommendedAction?: string },
590+
): string | undefined {
591+
const validationError = typeof agent.validationError === "string" ? agent.validationError.trim() : "";
592+
if (validationError) return validationError;
593+
594+
const eventError = agent.eventSummary?.errors?.find(
595+
(error): error is string => typeof error === "string" && error.trim().length > 0,
596+
);
597+
if (eventError) return eventError.trim();
598+
599+
const stderr = typeof agent.stderr === "string" ? agent.stderr.trim() : "";
600+
if (stderr) return stderr;
601+
602+
const recoveryAction = recovery?.recommendedAction?.trim();
603+
return recoveryAction || undefined;
604+
}
605+
606+
function visibleAgentAnswer(
607+
agent: ReturnType<typeof compactAgentResultForMcp>,
608+
recovery?: { recommendedAction?: string },
609+
): string {
610+
const resultValue = agent.structuredOutput ?? agent.finalMessage;
611+
const answer = stringifyResultValue(resultValue, agent.finalMessage);
612+
const structuredOutputError = typeof agent.structuredOutputError === "string" ? agent.structuredOutputError : undefined;
613+
if (!agent.ok && structuredOutputError) {
614+
return answer
615+
? `${answer}\n\nStructured output parse failed: ${structuredOutputError}`
616+
: `Codex task ${agent.status}: Structured output parse failed: ${structuredOutputError}`;
617+
}
618+
619+
const fallbackReason = !agent.ok && !answer ? agentFallbackErrorText(agent, recovery) : undefined;
620+
if (fallbackReason) return `Codex task ${agent.status}: ${fallbackReason}`;
621+
return answer;
622+
}
623+
624+
function agentSafety(agent: ReturnType<typeof compactAgentResultForMcp>): Record<string, unknown> {
625+
return {
626+
sandbox: agent.sandbox,
627+
full_access: Boolean(agent.dangerouslyBypassApprovalsAndSandbox),
628+
warning:
629+
agent.dangerouslyBypassApprovalsAndSandbox
630+
? "Codex ran with full non-sandbox access."
631+
: agent.sandbox === "workspace-write"
632+
? "Codex could write inside the configured workspace."
633+
: undefined,
634+
};
635+
}
636+
637+
function agentCommands(agent: ReturnType<typeof compactAgentResultForMcp>): Array<{ command?: string; status?: string }> {
638+
return agent.eventSummary.commands.map((command) => ({ ...command }));
639+
}
640+
580641
function suggestedActionForAgent(result: { ok: boolean; status: string }, recovery?: { recommendedAction?: string }): string {
581642
if (recovery?.recommendedAction) return recovery.recommendedAction;
582643
if (result.ok) return "Use the result directly, or ask a follow-up Codex task only if more independent analysis is needed.";
@@ -634,6 +695,9 @@ function diagnosticsForAgent(agent: ReturnType<typeof compactAgentResultForMcp>)
634695
stderr_tail: agent.stderr || undefined,
635696
stdout_tail: agent.stdoutTail || undefined,
636697
structured_output_error: agent.structuredOutputError || undefined,
698+
validation_error: agent.validationError || undefined,
699+
timeout_reason: agent.timeoutReason || undefined,
700+
commands: agentCommands(agent),
637701
};
638702
}
639703

@@ -650,13 +714,13 @@ function nativeAgentPayload(
650714
const agent = compactAgentResultForMcp(result);
651715
const recovery = recoveryForAgentResult(result);
652716
const resultValue = agent.structuredOutput ?? agent.finalMessage;
653-
const answer = stringifyResultValue(resultValue, agent.finalMessage);
654717
const structuredOutputError = typeof agent.structuredOutputError === "string" ? agent.structuredOutputError : undefined;
655-
const visibleAnswer =
656-
!agent.ok && structuredOutputError
657-
? `${answer}\n\nStructured output parse failed: ${structuredOutputError}`
658-
: answer;
718+
const visibleAnswer = visibleAgentAnswer(agent, recovery);
659719
const sessionId = context.session && typeof context.session === "object" ? (context.session as { id?: string }).id : undefined;
720+
const sessionFallbackReason =
721+
context.session && typeof context.session === "object"
722+
? (context.session as { appServerFallbackReason?: string }).appServerFallbackReason
723+
: undefined;
660724
return {
661725
ok: agent.ok,
662726
status: agent.status,
@@ -665,18 +729,23 @@ function nativeAgentPayload(
665729
session_id: sessionId,
666730
turn: context.turn,
667731
structured: agent.structuredOutput,
732+
commands: agentCommands(agent),
733+
safety: agentSafety(agent),
668734
hint: recovery?.recommendedAction ?? suggestedActionForAgent(agent, recovery),
669735
error: recovery
670736
? {
671737
message: structuredOutputError
672738
? `Structured output parse failed: ${structuredOutputError}`
673-
: agent.eventSummary.errors[0] ?? agent.stderr ?? `Codex task ${agent.status}`,
739+
: agentFallbackErrorText(agent, recovery) ?? `Codex task ${agent.status}`,
674740
recoverable: recovery.recoverable,
675741
kind: recovery.reason,
676742
retry_after_ms: recovery.retryAfterMs,
677743
}
678744
: undefined,
679-
diagnostics: diagnosticsForAgent(agent),
745+
diagnostics: {
746+
...diagnosticsForAgent(agent),
747+
app_server_fallback_reason: sessionFallbackReason || undefined,
748+
},
680749
};
681750
}
682751

@@ -703,18 +772,21 @@ function nativeParallelResponse(
703772
const normalized = agents.map((agent, index) => {
704773
const recovery = recoveries[index];
705774
const task = context.descriptions[index] ?? {};
706-
const answer = stringifyResultValue(agent.structuredOutput ?? agent.finalMessage, agent.finalMessage);
775+
const resultValue = agent.structuredOutput ?? agent.finalMessage;
776+
const answer = visibleAgentAnswer(agent, recovery);
707777
return {
708778
name: agent.name ?? task.name ?? `codex-task-${index + 1}`,
709779
ok: agent.ok,
710780
status: agent.status,
711-
summary: summarizeResultValue(agent.structuredOutput ?? agent.finalMessage, answer, `Codex task ${agent.status}`),
781+
summary: summarizeResultValue(resultValue, answer, `Codex task ${agent.status}`),
712782
result: answer,
713783
session_id: task.session_id,
714784
structured: agent.structuredOutput,
785+
commands: agentCommands(agent),
786+
safety: agentSafety(agent),
715787
error: recovery
716788
? {
717-
message: agent.eventSummary.errors[0] ?? agent.stderr ?? `Codex task ${agent.status}`,
789+
message: agentFallbackErrorText(agent, recovery) ?? `Codex task ${agent.status}`,
718790
recoverable: recovery.recoverable,
719791
kind: recovery.reason,
720792
retry_after_ms: recovery.retryAfterMs,
@@ -750,18 +822,21 @@ function nativeTaskGroupResponse(runs: NativeTaskGroupRun[]): CallToolResult {
750822
if (run.result) {
751823
const agent = compactAgentResultForMcp(run.result);
752824
const recovery = recoveryForAgentResult(run.result);
753-
const answer = stringifyResultValue(agent.structuredOutput ?? agent.finalMessage, agent.finalMessage);
825+
const resultValue = agent.structuredOutput ?? agent.finalMessage;
826+
const answer = visibleAgentAnswer(agent, recovery);
754827
return {
755828
name: agent.name ?? run.task.name ?? run.task.description ?? `codex-task-${index + 1}`,
756829
ok: agent.ok,
757830
status: agent.status,
758-
summary: summarizeResultValue(agent.structuredOutput ?? agent.finalMessage, answer, `Codex task ${agent.status}`),
831+
summary: summarizeResultValue(resultValue, answer, `Codex task ${agent.status}`),
759832
result: answer,
760833
session_id: run.session?.id,
761834
structured: agent.structuredOutput,
835+
commands: agentCommands(agent),
836+
safety: agentSafety(agent),
762837
error: recovery
763838
? {
764-
message: agent.eventSummary.errors[0] ?? agent.stderr ?? `Codex task ${agent.status}`,
839+
message: agentFallbackErrorText(agent, recovery) ?? `Codex task ${agent.status}`,
765840
recoverable: recovery.recoverable,
766841
kind: recovery.reason,
767842
retry_after_ms: recovery.retryAfterMs,
@@ -772,15 +847,17 @@ function nativeTaskGroupResponse(runs: NativeTaskGroupRun[]): CallToolResult {
772847
}
773848

774849
const recovery = recoveryForError(run.error ?? new Error("Codex task failed before producing a result."), "codex_task_group");
850+
const message = redactSensitiveText(run.error instanceof Error ? run.error.message : String(run.error ?? "Codex task failed."));
851+
const result = recovery.recommendedAction ? `${message}\n\nNext: ${recovery.recommendedAction}` : message;
775852
return {
776853
name: run.task.name ?? run.task.description ?? `codex-task-${index + 1}`,
777854
ok: false,
778855
status: "failed",
779-
summary: "Codex task failed before producing a result.",
780-
result: "",
856+
summary: firstUsefulLine(message, "Codex task failed before producing a result."),
857+
result,
781858
session_id: run.session?.id,
782859
error: {
783-
message: redactSensitiveText(run.error instanceof Error ? run.error.message : String(run.error ?? "Codex task failed.")),
860+
message,
784861
recoverable: recovery.recoverable,
785862
kind: recovery.reason,
786863
retry_after_ms: recovery.retryAfterMs,
@@ -815,18 +892,22 @@ function nativeTaskPrompt(args: { description?: string; prompt: string; subagent
815892
return `${prefix.join("\n")}\n\n${args.prompt}`;
816893
}
817894

818-
function sessionProgressPayload(session: unknown): Record<string, unknown> {
895+
function sessionProgressPayload(session: unknown, preferredResult?: unknown): Record<string, unknown> {
819896
if (!session || typeof session !== "object") return {};
820897
const value = session as Record<string, unknown>;
821898
const partial = value.partial && typeof value.partial === "object" ? (value.partial as Record<string, unknown>) : undefined;
822899
const lastResult = value.lastResult && typeof value.lastResult === "object" ? (value.lastResult as Record<string, unknown>) : undefined;
900+
const preferred =
901+
preferredResult && typeof preferredResult === "object" ? (preferredResult as Record<string, unknown>) : undefined;
823902
const activeTurn = value.activeTurn && typeof value.activeTurn === "object" ? (value.activeTurn as Record<string, unknown>) : undefined;
824903
const updatedAt = typeof value.updatedAt === "string" ? Date.parse(value.updatedAt) : NaN;
825904
const createdAt = typeof activeTurn?.createdAt === "string" ? Date.parse(activeTurn.createdAt) : NaN;
826905
const elapsedBase = Number.isFinite(createdAt) ? createdAt : updatedAt;
827906
const partialResult =
828907
typeof partial?.lastAgentMessage === "string"
829908
? partial.lastAgentMessage
909+
: typeof preferred?.finalMessage === "string"
910+
? preferred.finalMessage
830911
: typeof lastResult?.finalMessage === "string"
831912
? lastResult.finalMessage
832913
: undefined;
@@ -1038,7 +1119,9 @@ function toRunOptions(args: {
10381119
skipGitRepoCheck: args.skip_git_repo_check,
10391120
ignoreRules: args.ignore_rules,
10401121
isolatedCodexHome: args.isolated_codex_home,
1041-
mcpConfigPolicy: args.mcp_config_policy ?? (args.codex_mcp_servers ? "explicit" : undefined),
1122+
mcpConfigPolicy:
1123+
args.mcp_config_policy ??
1124+
(args.codex_mcp_servers && Object.keys(args.codex_mcp_servers).length > 0 ? "explicit" : undefined),
10421125
codexMcpServers: args.codex_mcp_servers,
10431126
forwardSensitiveEnv: args.forward_sensitive_env,
10441127
idleTimeoutMs: args.idle_timeout_ms,
@@ -1667,6 +1750,7 @@ registerTool(
16671750
.describe("Short human-readable task label, like Claude's native Task description."),
16681751
prompt: z
16691752
.string()
1753+
.trim()
16701754
.min(1)
16711755
.describe(
16721756
"Self-contained Codex task prompt. Include scope, read-only expectation, output shape, and file/line reference requirements when reviewing code.",
@@ -1941,6 +2025,7 @@ const nativeTaskGroupTaskSchema = z.object({
19412025
.describe("Short task label, like Claude's native Task description."),
19422026
prompt: z
19432027
.string()
2028+
.trim()
19442029
.min(1)
19452030
.describe(
19462031
"Self-contained Codex prompt for this independent task. Keep overlap low across parallel tasks.",
@@ -2130,7 +2215,7 @@ registerTool(
21302215
"Use the result directly, or use codex_followup mode queue for another prompt in this Codex context.",
21312216
diagnostics: {
21322217
session: compactSession,
2133-
...sessionProgressPayload(compactSession),
2218+
...sessionProgressPayload(compactSession, waitResult),
21342219
},
21352220
error: recovery
21362221
? {
@@ -2144,7 +2229,7 @@ registerTool(
21442229
);
21452230
}
21462231

2147-
const description = args.description ?? (mode === "steer" ? "Steer Codex session" : "Continue Codex session");
2232+
const description = args.description;
21482233
const runOptions = publicRunOptions({
21492234
...args,
21502235
description,

src/lifecycle.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ export async function cleanupRuntime(reason: string, graceMs = 2_500): Promise<v
7474
killChildProcess(child, "SIGTERM");
7575
}
7676

77-
const waitMs = Math.max(50, Math.min(graceMs, 1_000));
77+
const waitMs = Math.max(50, Math.min(graceMs, 60_000));
7878
await new Promise((resolve) => setTimeout(resolve, waitMs));
7979

8080
for (const [child, meta] of trackedChildren) {

src/logging.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { createHash, randomUUID } from "node:crypto";
22
import { appendFileSync, chmodSync, mkdirSync, renameSync, statSync } from "node:fs";
33
import path from "node:path";
4-
import { redactJsonValue, redactSensitiveText } from "./redaction.js";
4+
import { isSensitiveKey, redactJsonValue, redactSensitiveText } from "./redaction.js";
55

66
export const logLevels = ["debug", "info", "warn", "error", "silent"] as const;
77
export type LogLevel = (typeof logLevels)[number];
@@ -15,8 +15,6 @@ const levelWeight: Record<LogLevel, number> = {
1515
silent: Number.POSITIVE_INFINITY,
1616
};
1717

18-
const sensitiveKeyRe = /(api[_-]?key|token|secret|password|private[_-]?key|cookie|session|credential|auth)/i;
19-
2018
let logWriter: (line: string) => void = (line) => {
2119
writeDefaultLog(line);
2220
};
@@ -83,7 +81,7 @@ function shortenRaw(text: string, max = maxStringChars()): string | { chars: num
8381
}
8482

8583
export function summarizeForLog(value: unknown, key = "", depth = 0): unknown {
86-
if (sensitiveKeyRe.test(key)) return "[REDACTED]";
84+
if (key && isSensitiveKey(key)) return "[REDACTED]";
8785

8886
if (typeof value === "string") {
8987
return shorten(value);

src/recovery.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,21 @@ export function recoveryForError(error: unknown, context = "tool_call"): Recover
5151
};
5252
}
5353

54+
if (
55+
lower.includes("enoent") ||
56+
lower.includes("enotdir") ||
57+
lower.includes("eisdir") ||
58+
lower.includes("no such file or directory") ||
59+
lower.includes("not a directory") ||
60+
lower.includes("not executable")
61+
) {
62+
return {
63+
recoverable: false,
64+
reason: "invalid_path",
65+
recommendedAction: "Fix the configured path or working directory before retrying.",
66+
};
67+
}
68+
5469
if (lower.includes("timed out") || lower.includes("timeout")) {
5570
return {
5671
recoverable: true,

0 commit comments

Comments
 (0)