Skip to content

Commit cd1a700

Browse files
authored
fix(app-bind): self-heal orphaned runtime-proxy bind with no backup (#614) (#615)
Adds self-healing recovery for an orphaned runtime-proxy app-bind: when config.toml is bound but the app-bind state/backup files are gone, `unbind-app` now restores it (falling back to the openai provider), `getAppBindStatus` exposes `unmanagedBind`, and status reports "bound but unmanaged" instead of "not configured". Includes the half-orphan duplicate-model_provider fix and full unit + integration regression coverage.
1 parent c66731e commit cd1a700

4 files changed

Lines changed: 404 additions & 13 deletions

File tree

lib/runtime/app-bind.ts

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ import { fileURLToPath } from "node:url";
99
import { withFileOperationRetry } from "../fs-retry.js";
1010
import { getCodexMultiAuthDir } from "../runtime-paths.js";
1111
import {
12+
configHasRuntimeRotationProvider,
1213
restoreConfigTomlFromRuntimeRotationProvider,
14+
restoreConfigTomlFromRuntimeRotationProviderWithoutBackup,
1315
rewriteConfigTomlForRuntimeRotationProvider,
1416
} from "./config-toml.js";
1517

@@ -81,6 +83,13 @@ export interface AppBindRouterStatus {
8183
export interface AppBindStatus {
8284
bound: boolean;
8385
running: boolean;
86+
/**
87+
* True when config.toml is bound to the runtime proxy but the app-bind state
88+
* file is gone (orphaned bind, #614). In this case `bound` is also true and
89+
* `state` is null — the config needs `unbind-app` to recover even though the
90+
* normal state-file tracking is missing.
91+
*/
92+
unmanagedBind: boolean;
8493
state: AppBindState | null;
8594
router: AppBindRouterStatus | null;
8695
paths: AppBindPaths;
@@ -612,9 +621,19 @@ export async function getAppBindStatus(options: AppBindOptions = {}): Promise<Ap
612621
const paths = resolveAppBindPaths(options);
613622
const state = await readAppBindState(paths.statePath);
614623
const router = await readRouterStatus(paths.statusPath);
624+
// When no state file is present, the bind may still be live in config.toml
625+
// (orphaned bind, #614). Detect that from the config directly so status and
626+
// downstream callers don't report a bound config as "not configured".
627+
let unmanagedBind = false;
628+
if (state === null) {
629+
const current = await readConfigIfExists(paths.configPath);
630+
unmanagedBind =
631+
current.existed && configHasRuntimeRotationProvider(current.content);
632+
}
615633
return {
616-
bound: state !== null,
634+
bound: state !== null || unmanagedBind,
617635
running: router !== null && router.state === "running" && isProcessAlive(router.pid),
636+
unmanagedBind,
618637
state,
619638
router,
620639
paths,
@@ -765,6 +784,7 @@ async function unbindCodexAppRuntimeRotationLocked(
765784
}
766785

767786
const backup = await readAppBindBackup(paths.backupPath);
787+
let selfHealed = false;
768788
if (backup) {
769789
const current = await readConfigIfExists(backup.configPath);
770790
if (state && current.existed && sha256(current.content) !== state.boundConfigHash) {
@@ -786,6 +806,22 @@ async function unbindCodexAppRuntimeRotationLocked(
786806
restoreConfigTomlFromAppBind(current.content, ""),
787807
);
788808
}
809+
} else {
810+
// Orphaned-bind recovery (#614): no backup and no state file, but the
811+
// config may still be bound to the runtime proxy (e.g. the state/backup
812+
// were lost while config.toml stayed rewritten). The state-file checks
813+
// above can't see this, so consult the config directly and self-heal it
814+
// back to a working provider when it is bound.
815+
const current = await readConfigIfExists(paths.configPath);
816+
if (current.existed && configHasRuntimeRotationProvider(current.content)) {
817+
await atomicWriteFile(
818+
paths.configPath,
819+
restoreConfigTomlFromRuntimeRotationProviderWithoutBackup(
820+
current.content,
821+
),
822+
);
823+
selfHealed = true;
824+
}
789825
}
790826

791827
for (const candidate of [
@@ -801,15 +837,31 @@ async function unbindCodexAppRuntimeRotationLocked(
801837
}
802838

803839
const status = await getAppBindStatus(options);
840+
let message: string;
841+
if (backup) {
842+
message = `Unbound Codex app config ${backup.configPath}`;
843+
} else if (selfHealed) {
844+
message = `Restored Codex app config ${paths.configPath} from an orphaned runtime-proxy bind (no backup was present)`;
845+
} else {
846+
message = "Codex app bind was not configured";
847+
}
804848
return {
805849
status,
806-
message: backup
807-
? `Unbound Codex app config ${backup.configPath}`
808-
: "Codex app bind was not configured",
850+
message,
809851
};
810852
}
811853

812854
export function formatAppBindStatus(status: AppBindStatus): string {
855+
if (status.unmanagedBind && !status.state) {
856+
return [
857+
`Codex app bind: bound but unmanaged (config=${status.paths.configPath} points at the runtime proxy, but no app-bind state/backup is present)`,
858+
[
859+
"Run `codex-multi-auth rotation unbind-app` to restore the original",
860+
"Codex provider/config. This recovers the orphaned bind even though no",
861+
"backup was saved (#614).",
862+
].join(" "),
863+
].join("\n");
864+
}
813865
if (!status.bound || !status.state) return "Codex app bind: not configured";
814866
const parts = [
815867
status.running ? "running" : "configured but router not running",

lib/runtime/config-toml.ts

Lines changed: 87 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -136,15 +136,31 @@ export function restoreTopLevelModelProvider(
136136
}
137137

138138
if (!handled && originalLine) {
139-
// Splice the restored line into the root table — appending at tail
140-
// would land it inside whatever section appears last in `output`.
141-
const firstSectionIdx = output.findIndex(
142-
(line) => readTomlTableName(line) !== null,
143-
);
144-
if (firstSectionIdx === -1) {
145-
output.push(originalLine);
146-
} else {
147-
output.splice(firstSectionIdx, 0, originalLine);
139+
// Only splice the original line back when the current config has no
140+
// top-level model_provider at all (bind stripped it). If a non-proxy
141+
// top-level model_provider already exists — e.g. a half-orphaned config
142+
// where the proxy block is present but the provider line already points
143+
// elsewhere — inserting another line would create a duplicate top-level
144+
// key and produce invalid TOML. In that case the existing line is
145+
// already correct, so leave it untouched.
146+
const hasTopLevelModelProvider = (() => {
147+
for (const line of output) {
148+
if (readTomlTableName(line) !== null) return false;
149+
if (/^\s*model_provider\s*=/.test(line)) return true;
150+
}
151+
return false;
152+
})();
153+
if (!hasTopLevelModelProvider) {
154+
// Splice the restored line into the root table — appending at tail
155+
// would land it inside whatever section appears last in `output`.
156+
const firstSectionIdx = output.findIndex(
157+
(line) => readTomlTableName(line) !== null,
158+
);
159+
if (firstSectionIdx === -1) {
160+
output.push(originalLine);
161+
} else {
162+
output.splice(firstSectionIdx, 0, originalLine);
163+
}
148164
}
149165
}
150166

@@ -271,3 +287,65 @@ export function restoreConfigTomlFromRuntimeRotationProvider(
271287
),
272288
);
273289
}
290+
291+
/**
292+
* Detects whether a config.toml is currently bound to the runtime rotation
293+
* proxy — either the top-level `model_provider` points at the proxy id, or the
294+
* proxy `[model_providers.<id>]` block is present. Used to recover an orphaned
295+
* bind whose app-bind state/backup files were lost: in that situation the
296+
* state-file-based status check reports "not configured" even though the config
297+
* is still bound, so unbind/status must consult the config itself.
298+
*/
299+
export function configHasRuntimeRotationProvider(rawConfig: string): boolean {
300+
if (rawConfig.length === 0) return false;
301+
const providerTable = `model_providers.${RUNTIME_ROTATION_PROXY_PROVIDER_ID}`;
302+
let inTopLevel = true;
303+
for (const line of rawConfig.split(/\r?\n/)) {
304+
const tableName = readTomlTableName(line);
305+
if (tableName !== null) {
306+
if (tableName === providerTable) return true;
307+
inTopLevel = false;
308+
continue;
309+
}
310+
if (
311+
inTopLevel &&
312+
/^\s*model_provider\s*=/.test(line) &&
313+
line.includes(RUNTIME_ROTATION_PROXY_PROVIDER_ID)
314+
) {
315+
return true;
316+
}
317+
}
318+
return false;
319+
}
320+
321+
/**
322+
* Restores a bound config when no backup of the user's original config exists
323+
* (the orphaned-bind recovery path). Strips the proxy provider block and any
324+
* bind-written top-level lines, and — because there is no original
325+
* `model_provider` line to bring back — falls back to `defaultProvider`
326+
* (Codex's native `"openai"`) so the config is left on a working provider
327+
* rather than the dangling proxy id.
328+
*/
329+
export function restoreConfigTomlFromRuntimeRotationProviderWithoutBackup(
330+
currentConfig: string,
331+
defaultProvider = "openai",
332+
): string {
333+
const lineEnding = currentConfig.includes("\r\n") ? "\r\n" : "\n";
334+
// Synthesize a minimal "original" config carrying only the default
335+
// top-level model_provider, so the shared restore path rewrites the proxy
336+
// line back to a usable provider instead of leaving it dangling.
337+
const syntheticOriginal = `model_provider = ${tomlStringLiteral(defaultProvider)}${lineEnding}`;
338+
const restored = restoreConfigTomlFromRuntimeRotationProvider(
339+
currentConfig,
340+
syntheticOriginal,
341+
);
342+
// Normalize line endings to match the input config. The shared restore path
343+
// derives its EOL from intermediate state, which can collapse to "\n" when
344+
// the bound config was almost entirely proxy content; pin it back to the
345+
// original style so a CRLF (Windows-authored) config stays CRLF.
346+
if (lineEnding === "\r\n") {
347+
return restored.replace(/\r?\n/g, "\r\n");
348+
}
349+
return restored.replace(/\r\n/g, "\n");
350+
}
351+

test/app-bind.test.ts

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { afterEach, describe, expect, it } from "vitest";
99
import {
1010
bindCodexAppRuntimeRotation,
1111
formatAppBindStatus,
12+
getAppBindStatus,
1213
resolveAppBindPaths,
1314
restoreConfigTomlFromAppBind,
1415
rewriteConfigTomlForAppBind,
@@ -813,3 +814,135 @@ describe("Codex app runtime rotation bind", () => {
813814
expect(await readFile(logPath, "utf8")).toContain("log truncated");
814815
});
815816
});
817+
818+
describe("orphaned app-bind recovery (#614)", () => {
819+
const boundConfig = [
820+
'model_provider = "codex-multi-auth-runtime-proxy"',
821+
"disable_response_storage = false",
822+
"[profiles.default]",
823+
'model = "gpt-5"',
824+
"",
825+
"[model_providers.codex-multi-auth-runtime-proxy]",
826+
'name = "codex-multi-auth"',
827+
'base_url = "http://127.0.0.1:51758"',
828+
"requires_openai_auth = false",
829+
'wire_api = "responses"',
830+
"",
831+
].join("\n");
832+
833+
async function seedOrphanedBind(): Promise<{
834+
root: string;
835+
codexHome: string;
836+
env: NodeJS.ProcessEnv;
837+
}> {
838+
const root = await createTempRoot("codex-app-bind-orphan-");
839+
const codexHome = join(root, "codex-home");
840+
const env = {
841+
CODEX_MULTI_AUTH_DIR: join(root, "multi-auth"),
842+
CODEX_MULTI_AUTH_APP_BIND_CODEX_HOME: codexHome,
843+
};
844+
await mkdir(codexHome, { recursive: true });
845+
// Bound config on disk, but NO state file and NO backup (the orphan case).
846+
await writeFile(join(codexHome, "config.toml"), boundConfig, "utf8");
847+
return { root, codexHome, env };
848+
}
849+
850+
it("reports unmanagedBind when config is bound but no state file exists", async () => {
851+
const { root, env } = await seedOrphanedBind();
852+
const status = await getAppBindStatus({ platform: "linux", home: root, env });
853+
expect(status.bound).toBe(true);
854+
expect(status.unmanagedBind).toBe(true);
855+
expect(status.state).toBeNull();
856+
expect(formatAppBindStatus(status)).toContain("bound but unmanaged");
857+
});
858+
859+
it("self-heals a bound config with no backup/state on unbind", async () => {
860+
const { root, codexHome, env } = await seedOrphanedBind();
861+
862+
const unbound = await unbindCodexAppRuntimeRotation({
863+
platform: "linux",
864+
home: root,
865+
env,
866+
spawnDetached: false,
867+
});
868+
869+
const restored = await readFile(join(codexHome, "config.toml"), "utf8");
870+
expect(restored).toContain('model_provider = "openai"');
871+
expect(restored).not.toContain("codex-multi-auth-runtime-proxy");
872+
expect(restored).not.toContain("disable_response_storage");
873+
expect(restored).toContain("[profiles.default]");
874+
expect(unbound.message).toContain("orphaned runtime-proxy bind");
875+
expect(unbound.status.bound).toBe(false);
876+
expect(unbound.status.unmanagedBind).toBe(false);
877+
});
878+
879+
it("self-heals a half-orphan (proxy block present, model_provider already native) without duplicating keys", async () => {
880+
const root = await createTempRoot("codex-app-bind-half-orphan-");
881+
const codexHome = join(root, "codex-home");
882+
const env = {
883+
CODEX_MULTI_AUTH_DIR: join(root, "multi-auth"),
884+
CODEX_MULTI_AUTH_APP_BIND_CODEX_HOME: codexHome,
885+
};
886+
await mkdir(codexHome, { recursive: true });
887+
// Top-level provider is already native, but the proxy block lingers — the
888+
// partial-orphan case that previously produced a duplicate model_provider.
889+
await writeFile(
890+
join(codexHome, "config.toml"),
891+
[
892+
'model_provider = "openai"',
893+
"[profiles.default]",
894+
'model = "gpt-5"',
895+
"",
896+
"[model_providers.codex-multi-auth-runtime-proxy]",
897+
'name = "codex-multi-auth"',
898+
'wire_api = "responses"',
899+
"",
900+
].join("\n"),
901+
"utf8",
902+
);
903+
904+
const unbound = await unbindCodexAppRuntimeRotation({
905+
platform: "linux",
906+
home: root,
907+
env,
908+
spawnDetached: false,
909+
});
910+
911+
const restored = await readFile(join(codexHome, "config.toml"), "utf8");
912+
const providerLines = (
913+
restored.match(/^\s*model_provider\s*=/gm) ?? []
914+
).length;
915+
expect(providerLines).toBe(1);
916+
expect(restored).toContain('model_provider = "openai"');
917+
expect(restored).not.toContain("codex-multi-auth-runtime-proxy");
918+
expect(restored).toContain("[profiles.default]");
919+
expect(unbound.status.bound).toBe(false);
920+
});
921+
922+
it("is a no-op for an already-clean config", async () => {
923+
const root = await createTempRoot("codex-app-bind-clean-");
924+
const codexHome = join(root, "codex-home");
925+
const env = {
926+
CODEX_MULTI_AUTH_DIR: join(root, "multi-auth"),
927+
CODEX_MULTI_AUTH_APP_BIND_CODEX_HOME: codexHome,
928+
};
929+
await mkdir(codexHome, { recursive: true });
930+
await writeFile(
931+
join(codexHome, "config.toml"),
932+
'model_provider = "openai"\n',
933+
"utf8",
934+
);
935+
936+
const unbound = await unbindCodexAppRuntimeRotation({
937+
platform: "linux",
938+
home: root,
939+
env,
940+
spawnDetached: false,
941+
});
942+
943+
expect(unbound.message).toBe("Codex app bind was not configured");
944+
expect(await readFile(join(codexHome, "config.toml"), "utf8")).toBe(
945+
'model_provider = "openai"\n',
946+
);
947+
});
948+
});

0 commit comments

Comments
 (0)