Skip to content

Commit 62ac3e9

Browse files
xuiocodex
andcommitted
Harden app-server session recovery
Close poisoned app-server children after ambiguous turn starts and interrupt timeouts. Recreate closed app-server wrappers through thread/resume and persist sessions once a thread id exists. Co-Authored-By: OpenAI Codex <noreply@openai.com>
1 parent d410a7b commit 62ac3e9

5 files changed

Lines changed: 271 additions & 65 deletions

File tree

dist/index.js

Lines changed: 62 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -23665,6 +23665,7 @@ var CodexAppServerSession = class _CodexAppServerSession {
2366523665
return {
2366623666
id: this.id,
2366723667
protocol: "stdio",
23668+
closed: this.closed,
2366823669
userAgent: this.userAgent,
2366923670
codexHome: this.codexHome,
2367023671
threadId: this.threadId || void 0,
@@ -23714,12 +23715,16 @@ var CodexAppServerSession = class _CodexAppServerSession {
2371423715
} catch (error2) {
2371523716
this.acceptingStartNotifications = false;
2371623717
this.pendingStartNotifications = [];
23718+
await this.close().catch(() => {
23719+
});
2371723720
throw error2;
2371823721
}
2371923722
const turnId = turnResponse.turn?.id;
2372023723
if (!turnId) {
2372123724
this.acceptingStartNotifications = false;
2372223725
this.pendingStartNotifications = [];
23726+
await this.close().catch(() => {
23727+
});
2372323728
throw new AppServerUnavailableError("Codex app-server did not return a turn id.");
2372423729
}
2372523730
this.capabilities.turnStart = true;
@@ -23836,12 +23841,20 @@ var CodexAppServerSession = class _CodexAppServerSession {
2383623841
if (this.activeTurn) this.activeTurn.timeoutReason = timeoutReason;
2383723842
const message = `Codex app-server did not report turn completion within ${graceMs}ms after ${reason} interrupt.`;
2383823843
summary.errors.push(message);
23839-
resolveOnce(finish(reason === "cancelled" ? "cancelled" : "timeout", message));
23844+
const result2 = finish(reason === "cancelled" ? "cancelled" : "timeout", message);
23845+
this.activeTurn = void 0;
23846+
resolveOnce(result2);
23847+
void this.close("cancelled").catch(() => {
23848+
});
2384023849
}, graceMs);
2384123850
forceFinishTimeout.unref();
2384223851
}).catch((error2) => {
2384323852
summary.errors.push(`Codex app-server interrupt failed: ${error2.message}`);
23844-
resolveOnce(finish(reason === "cancelled" ? "cancelled" : "timeout", error2.message));
23853+
const result2 = finish(reason === "cancelled" ? "cancelled" : "timeout", error2.message);
23854+
this.activeTurn = void 0;
23855+
resolveOnce(result2);
23856+
void this.close("cancelled").catch(() => {
23857+
});
2384523858
});
2384623859
publishSnapshot(true);
2384723860
};
@@ -24324,13 +24337,13 @@ function snapshot2(session) {
2432424337
cwd: session.cwd,
2432524338
codexThreadId: session.codexThreadId,
2432624339
protocol: session.protocol,
24327-
supportsRealSteering: session.protocol === "app-server" && Boolean(session.appServer?.status().supports.turnSteer),
24340+
supportsRealSteering: session.protocol === "app-server" && Boolean(session.appServer && !session.appServer.status().closed && session.appServer.status().supports.turnSteer),
2432824341
appServer: session.appServer?.status(),
2432924342
appServerFallbackReason: session.appServerFallbackReason,
2433024343
durable: session.persisted ? {
2433124344
persisted: true,
2433224345
recovered: session.recovered,
24333-
canResume: Boolean(session.codexThreadId && session.turns > 0),
24346+
canResume: Boolean(session.codexThreadId),
2433424347
stateFile: session.stateFile
2433524348
} : void 0,
2433624349
turns: session.turns,
@@ -24694,7 +24707,7 @@ var CodexSessionManager = class {
2469424707
this.prune();
2469524708
const session = this.sessions.get(id);
2469624709
if (!session) return { error: `Unknown session_id: ${id}` };
24697-
if (!session.codexThreadId || session.turns < 1) {
24710+
if (!session.codexThreadId) {
2469824711
return {
2469924712
session: snapshot2(session),
2470024713
recovered: false,
@@ -24707,19 +24720,14 @@ var CodexSessionManager = class {
2470724720
return { session: snapshot2(session), recovered: true };
2470824721
}
2470924722
try {
24710-
if (!session.appServer) {
24711-
session.appServer = await CodexAppServerSession.create(
24712-
{
24713-
...session.baseOptions,
24714-
prompt: "",
24715-
projectDir: session.projectDir ?? session.baseOptions.projectDir,
24716-
cwd: session.cwd ?? session.baseOptions.cwd,
24717-
ephemeral: false
24718-
},
24719-
{ sessionId: session.id },
24720-
session.codexThreadId
24721-
);
24722-
session.codexThreadId = session.appServer.threadId;
24723+
if (!session.appServer || session.appServer.status().closed) {
24724+
await this.ensureAppServer(session, {
24725+
...session.baseOptions,
24726+
prompt: "",
24727+
projectDir: session.projectDir ?? session.baseOptions.projectDir,
24728+
cwd: session.cwd ?? session.baseOptions.cwd,
24729+
ephemeral: false
24730+
});
2472324731
} else {
2472424732
await session.appServer.readThread(false);
2472524733
}
@@ -24933,17 +24941,11 @@ var CodexSessionManager = class {
2493324941
);
2493424942
}
2493524943
async runAppServerTurn(session, options, controller) {
24944+
let appServerWasReady = false;
2493624945
try {
24937-
if (!session.appServer) {
24938-
session.appServer = await CodexAppServerSession.create(
24939-
options,
24940-
{ sessionId: session.id },
24941-
session.codexThreadId && session.turns > 0 ? session.codexThreadId : void 0
24942-
);
24943-
session.codexThreadId = session.appServer.threadId;
24944-
session.appServerFallbackReason = void 0;
24945-
}
24946-
return await session.appServer.startTurn(
24946+
const appServer = await this.ensureAppServer(session, options);
24947+
appServerWasReady = true;
24948+
return await appServer.startTurn(
2494724949
options,
2494824950
controller.signal,
2494924951
(partial2) => {
@@ -24957,7 +24959,8 @@ var CodexSessionManager = class {
2495724959
{ sessionTurnId: session.activeTurn?.id }
2495824960
);
2495924961
} catch (error2) {
24960-
if (session.turns === 0 && !session.appServer && shouldFallbackToExec(error2)) {
24962+
if (session.appServer?.status().closed) session.appServer = void 0;
24963+
if (session.turns === 0 && !appServerWasReady && !session.appServer && shouldFallbackToExec(error2)) {
2496124964
session.appServerFallbackReason = error2 instanceof Error ? error2.message : String(error2);
2496224965
logger.warn("session.app_server_fallback_to_exec", {
2496324966
sessionId: session.id,
@@ -24974,6 +24977,33 @@ var CodexSessionManager = class {
2497424977
throw error2;
2497524978
}
2497624979
}
24980+
async ensureAppServer(session, options) {
24981+
if (session.appServer?.status().closed) {
24982+
logger.warn("session.app_server_discard_closed", {
24983+
sessionId: session.id,
24984+
appServer: session.appServer.status()
24985+
});
24986+
session.appServer = void 0;
24987+
}
24988+
if (session.appServer) return session.appServer;
24989+
if (!session.appServerStarting) {
24990+
session.appServerStarting = CodexAppServerSession.create(
24991+
options,
24992+
{ sessionId: session.id },
24993+
session.codexThreadId ? session.codexThreadId : void 0
24994+
).then((appServer) => {
24995+
session.appServer = appServer;
24996+
session.codexThreadId = appServer.threadId;
24997+
session.appServerFallbackReason = void 0;
24998+
session.updatedAt = (/* @__PURE__ */ new Date()).toISOString();
24999+
this.persist();
25000+
return appServer;
25001+
}).finally(() => {
25002+
session.appServerStarting = void 0;
25003+
});
25004+
}
25005+
return session.appServerStarting;
25006+
}
2497725007
completeTurn(session, turn, result) {
2497825008
session.turns += 1;
2497925009
session.lastResult = result;
@@ -25090,7 +25120,7 @@ var CodexSessionManager = class {
2509025120
logger.info("session.state.loaded", { stateFile: store.file, sessions: this.sessions.size });
2509125121
}
2509225122
recordFromState(state) {
25093-
const hasThread = Boolean(state.codexThreadId && state.turns > 0);
25123+
const hasThread = Boolean(state.codexThreadId);
2509425124
if (!hasThread && state.status === "active") return void 0;
2509525125
return {
2509625126
id: state.id,
@@ -25103,6 +25133,7 @@ var CodexSessionManager = class {
2510325133
codexThreadId: state.codexThreadId,
2510425134
protocol: state.protocol,
2510525135
appServerFallbackReason: void 0,
25136+
appServerStarting: void 0,
2510625137
turns: state.turns,
2510725138
partial: void 0,
2510825139
error: state.error,
@@ -25125,7 +25156,7 @@ var CodexSessionManager = class {
2512525156
persist() {
2512625157
const store = this.stateStore;
2512725158
if (!store) return;
25128-
const states = [...this.sessions.values()].filter((session) => session.codexThreadId && session.turns > 0).map((session) => ({
25159+
const states = [...this.sessions.values()].filter((session) => session.codexThreadId).map((session) => ({
2512925160
id: session.id,
2513025161
name: session.name,
2513125162
status: session.status === "running" ? "active" : session.status,

src/app-server.ts

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export interface AppServerCapabilities {
4646
export interface AppServerStatus {
4747
id: string;
4848
protocol: "stdio";
49+
closed: boolean;
4950
userAgent?: string;
5051
codexHome?: string;
5152
threadId?: string;
@@ -370,6 +371,7 @@ export class CodexAppServerSession {
370371
return {
371372
id: this.id,
372373
protocol: "stdio",
374+
closed: this.closed,
373375
userAgent: this.userAgent,
374376
codexHome: this.codexHome,
375377
threadId: this.threadId || undefined,
@@ -427,12 +429,14 @@ export class CodexAppServerSession {
427429
} catch (error) {
428430
this.acceptingStartNotifications = false;
429431
this.pendingStartNotifications = [];
432+
await this.close().catch(() => {});
430433
throw error;
431434
}
432435
const turnId = turnResponse.turn?.id;
433436
if (!turnId) {
434437
this.acceptingStartNotifications = false;
435438
this.pendingStartNotifications = [];
439+
await this.close().catch(() => {});
436440
throw new AppServerUnavailableError("Codex app-server did not return a turn id.");
437441
}
438442
this.capabilities.turnStart = true;
@@ -555,13 +559,19 @@ export class CodexAppServerSession {
555559
if (this.activeTurn) this.activeTurn.timeoutReason = timeoutReason;
556560
const message = `Codex app-server did not report turn completion within ${graceMs}ms after ${reason} interrupt.`;
557561
summary.errors.push(message);
558-
resolveOnce(finish(reason === "cancelled" ? "cancelled" : "timeout", message));
562+
const result = finish(reason === "cancelled" ? "cancelled" : "timeout", message);
563+
this.activeTurn = undefined;
564+
resolveOnce(result);
565+
void this.close("cancelled").catch(() => {});
559566
}, graceMs);
560567
forceFinishTimeout.unref();
561568
})
562569
.catch((error) => {
563570
summary.errors.push(`Codex app-server interrupt failed: ${error.message}`);
564-
resolveOnce(finish(reason === "cancelled" ? "cancelled" : "timeout", error.message));
571+
const result = finish(reason === "cancelled" ? "cancelled" : "timeout", error.message);
572+
this.activeTurn = undefined;
573+
resolveOnce(result);
574+
void this.close("cancelled").catch(() => {});
565575
});
566576
publishSnapshot(true);
567577
};

0 commit comments

Comments
 (0)