Skip to content

Commit 6e13a4c

Browse files
authored
Fix runtime rotation pool diagnostics (#480)
* fix: recover stale runtime rotation pool * fix: serialize stale runtime reload recovery * fix: address runtime diagnostics review bugs * test: cover reset runtime review paths
1 parent 99b8efc commit 6e13a4c

16 files changed

Lines changed: 1126 additions & 25 deletions

lib/accounts.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import { MODEL_FAMILIES, type ModelFamily } from "./prompts/codex.js";
1717
import {
1818
getHealthTracker,
1919
getTokenTracker,
20+
resetTrackers,
2021
selectHybridAccount,
2122
type AccountWithMetrics,
2223
type HybridSelectionOptions,
@@ -39,7 +40,7 @@ import {
3940
getAccountIdentityKey,
4041
getRuntimeAccountIdentityKey,
4142
} from "./storage/identity.js";
42-
import { getCircuitBreaker } from "./circuit-breaker.js";
43+
import { getCircuitBreaker, resetAllCircuitBreakers } from "./circuit-breaker.js";
4344
import {
4445
getStoragePathState,
4546
runWithStoragePathState,
@@ -644,6 +645,31 @@ export class AccountManager {
644645
);
645646
}
646647

648+
getAccountRuntimeSkipReason(
649+
index: number,
650+
family: ModelFamily,
651+
model?: string | null,
652+
): string | null {
653+
const account = this.getAccountByIndex(index);
654+
if (!account) return "missing";
655+
if (account.enabled === false) return "disabled";
656+
if (!this.hasEnabledWorkspaces(account)) return "workspace-disabled";
657+
clearExpiredRateLimits(account);
658+
if (isRateLimitedForFamily(account, family, model)) return "rate-limited";
659+
if (this.isAccountCoolingDown(account)) {
660+
return account.cooldownReason
661+
? `cooling-down:${account.cooldownReason}`
662+
: "cooling-down";
663+
}
664+
if (!this.isCircuitAvailable(account)) return "circuit-open";
665+
return null;
666+
}
667+
668+
static resetVolatileRuntimeState(): void {
669+
resetTrackers();
670+
resetAllCircuitBreakers();
671+
}
672+
647673
setActiveIndex(index: number): ManagedAccount | null {
648674
if (!Number.isFinite(index)) return null;
649675
if (index < 0 || index >= this.accounts.length) return null;

lib/codex-manager.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2542,6 +2542,7 @@ async function runForecast(args: string[]): Promise<number> {
25422542
formatWaitTime,
25432543
defaultDisplay: DEFAULT_DASHBOARD_DISPLAY_SETTINGS,
25442544
formatQuotaSnapshotLine,
2545+
loadRuntimeObservabilitySnapshot: loadPersistedRuntimeObservabilitySnapshot,
25452546
});
25462547
}
25472548

lib/codex-manager/commands/forecast.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { extractAccountEmail, sanitizeEmail } from "../../accounts.js";
33
import {
44
buildForecastExplanation,
55
type ForecastAccountResult,
6+
type RuntimeForecastOverlay,
67
} from "../../forecast.js";
78
import {
89
applyRefreshedAccountPatch,
@@ -22,6 +23,7 @@ interface ForecastCliOptions {
2223
json: boolean;
2324
explain: boolean;
2425
model: string;
26+
runtimeOverlay: boolean;
2527
}
2628

2729
type ParsedArgsResult<T> =
@@ -80,6 +82,9 @@ export interface ForecastCommandDeps {
8082
now: number;
8183
refreshFailure?: TokenFailure;
8284
liveQuota?: CodexQuotaSnapshot;
85+
quotaCache?: QuotaCacheData | null;
86+
allAccounts?: readonly AccountMetadataV3[];
87+
runtimeOverlay?: RuntimeForecastOverlay | null;
8388
}>,
8489
) => ForecastAccountResult[];
8590
summarizeForecast: (results: ForecastAccountResult[]) => {
@@ -107,6 +112,7 @@ export interface ForecastCommandDeps {
107112
) => "success" | "warning" | "danger";
108113
formatWaitTime: (ms: number) => string;
109114
defaultDisplay: DashboardDisplaySettings;
115+
loadRuntimeObservabilitySnapshot?: () => Promise<RuntimeForecastOverlay | null>;
110116
logInfo?: (message: string) => void;
111117
logError?: (message: string) => void;
112118
getNow?: () => number;
@@ -131,6 +137,7 @@ function printForecastUsage(logInfo: (message: string) => void): void {
131137
" --json, -j Print machine-readable JSON output",
132138
" --explain Include structured recommendation reasoning",
133139
" --model, -m Probe model for live mode (default: gpt-5.3-codex)",
140+
" --no-runtime-overlay Ignore persisted runtime skip diagnostics",
134141
].join("\n"),
135142
);
136143
}
@@ -143,6 +150,7 @@ function parseForecastArgs(
143150
json: false,
144151
explain: false,
145152
model: "gpt-5.3-codex",
153+
runtimeOverlay: true,
146154
};
147155

148156
for (let i = 0; i < args.length; i += 1) {
@@ -160,6 +168,10 @@ function parseForecastArgs(
160168
options.explain = true;
161169
continue;
162170
}
171+
if (arg === "--no-runtime-overlay") {
172+
options.runtimeOverlay = false;
173+
continue;
174+
}
163175
if (arg === "--model" || arg === "-m") {
164176
const value = args[i + 1];
165177
if (!value) return { ok: false, message: "Missing value for --model" };
@@ -205,7 +217,11 @@ export async function runForecastCommand(
205217
? (await deps.loadDashboardDisplaySettings().catch(() => null)) ??
206218
deps.defaultDisplay
207219
: deps.defaultDisplay;
208-
const quotaCache = options.live ? await deps.loadQuotaCache() : null;
220+
const quotaCache = await deps.loadQuotaCache();
221+
const runtimeOverlay =
222+
options.runtimeOverlay && deps.loadRuntimeObservabilitySnapshot
223+
? await deps.loadRuntimeObservabilitySnapshot().catch(() => null)
224+
: null;
209225
const workingQuotaCache = quotaCache
210226
? deps.cloneQuotaCacheData(quotaCache)
211227
: null;
@@ -346,6 +362,9 @@ export async function runForecastCommand(
346362
now,
347363
refreshFailure: refreshFailures.get(index),
348364
liveQuota: liveQuotaByIndex.get(index),
365+
quotaCache,
366+
allAccounts: storage.accounts,
367+
runtimeOverlay,
349368
}));
350369
const forecastResults = deps.evaluateForecastAccounts(forecastInputs);
351370
const summary = deps.summarizeForecast(forecastResults);
@@ -373,6 +392,7 @@ export async function runForecastCommand(
373392
command: "forecast",
374393
model: requestedModel,
375394
liveProbe: options.live,
395+
runtimeOverlay: options.runtimeOverlay,
376396
summary,
377397
recommendation,
378398
explanation: options.explain ? explanation : undefined,

lib/codex-manager/commands/report.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,15 @@ export async function runReportCommand(
315315
const refreshFailures = new Map<number, TokenFailure>();
316316
const liveQuotaByIndex = new Map<number, CodexQuotaSnapshot>();
317317
const probeErrors: string[] = [];
318+
let runtimeSnapshot: RuntimeObservabilitySnapshot | null | undefined;
319+
let runtimeSnapshotLoadError: string | null = null;
320+
try {
321+
runtimeSnapshot = await deps.loadRuntimeObservabilitySnapshot?.();
322+
} catch (error) {
323+
runtimeSnapshot = null;
324+
runtimeSnapshotLoadError = error instanceof Error ? error.message : String(error);
325+
deps.logError?.(`Runtime observability snapshot unavailable: ${runtimeSnapshotLoadError}`);
326+
}
318327
let consideredLiveAccounts = 0;
319328
let executedLiveProbes = 0;
320329

@@ -453,6 +462,7 @@ export async function runReportCommand(
453462
now,
454463
refreshFailure: refreshFailures.get(index),
455464
liveQuota: liveQuotaByIndex.get(index),
465+
runtimeOverlay: runtimeSnapshot,
456466
})),
457467
)
458468
: [];
@@ -521,7 +531,9 @@ export async function runReportCommand(
521531
formatQuotaSnapshotLine,
522532
),
523533
},
524-
runtime: await deps.loadRuntimeObservabilitySnapshot?.(),
534+
runtimeOverlay: runtimeSnapshot !== null && runtimeSnapshot !== undefined,
535+
runtime: runtimeSnapshot,
536+
runtimeSnapshotLoadError,
525537
};
526538
if (report.forecast.recommendation.recommendedIndex !== null) {
527539
const selectedIndex = report.forecast.recommendation.recommendedIndex;

lib/codex-manager/commands/rotation.ts

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import { existsSync, readFileSync, statSync } from "node:fs";
22
import { join } from "node:path";
3-
import { formatAccountLabel, formatCooldown, formatWaitTime } from "../../accounts.js";
3+
import {
4+
AccountManager,
5+
formatAccountLabel,
6+
formatCooldown,
7+
formatWaitTime,
8+
} from "../../accounts.js";
49
import { parseBooleanEnv } from "../../env-parsing.js";
510
import { getCodexMultiAuthDir } from "../../runtime-paths.js";
611
import { saveAccountsWithRetry } from "../forecast-report-shared.js";
@@ -32,7 +37,10 @@ import {
3237
isQuotaCacheEntryExhausted,
3338
} from "../../quota-readiness.js";
3439
import type { QuotaCacheData } from "../../quota-cache.js";
35-
import type { RuntimeObservabilitySnapshot } from "../../runtime/runtime-observability.js";
40+
import {
41+
recordRuntimeReset,
42+
type RuntimeObservabilitySnapshot,
43+
} from "../../runtime/runtime-observability.js";
3644
import {
3745
appRuntimeHelperStatusToSignal as appRuntimeHelperStatusToRuntimeSignal,
3846
resolveAccountCurrentMarkers,
@@ -88,16 +96,84 @@ function printRotationUsage(logInfo: (message: string) => void): void {
8896
" codex-multi-auth rotation bind-app",
8997
" codex-multi-auth rotation unbind-app",
9098
" codex-multi-auth rotation reset-rate-limits [--all | --account <idx>] [--dry-run] [--json]",
99+
" codex-multi-auth rotation reset-runtime [--json]",
91100
"",
92101
"Behavior:",
93102
" - Runtime rotation is enabled by default for request-bearing Codex sessions",
94103
" - Binds the packaged Codex desktop app to the same localhost router when enabled or repaired",
95104
" - Use CODEX_MULTI_AUTH_RUNTIME_ROTATION_PROXY=0 to disable the proxy for the current process without changing persistent settings",
96105
" - reset-rate-limits clears stored rateLimitResetTimes and active coolingDownUntil entries; use when `fix --live` confirms quota is available but the proxy still returns 503 pool-exhausted",
106+
" - reset-runtime clears process-local runtime trackers and re-applies the Codex app bind when available",
97107
].join("\n"),
98108
);
99109
}
100110

111+
async function runResetRuntime(
112+
args: string[],
113+
deps: RotationCommandDeps,
114+
): Promise<number> {
115+
const logInfo = deps.logInfo ?? console.log;
116+
const logError = deps.logError ?? console.error;
117+
let json = false;
118+
for (const arg of args) {
119+
if (arg === "--json" || arg === "-j") {
120+
json = true;
121+
continue;
122+
}
123+
if (arg === "--help" || arg === "-h" || arg === "help") {
124+
logInfo("Usage: codex-multi-auth rotation reset-runtime [--json]");
125+
return 0;
126+
}
127+
logError(`Unknown reset-runtime option: ${arg}`);
128+
return 1;
129+
}
130+
131+
AccountManager.resetVolatileRuntimeState();
132+
recordRuntimeReset("rotation-reset-runtime");
133+
let unbind: AppBindResult | null = null;
134+
let bind: AppBindResult | null = null;
135+
let appBindRestarted = false;
136+
if (deps.unbindCodexApp && deps.bindCodexApp) {
137+
try {
138+
unbind = await deps.unbindCodexApp();
139+
bind = await deps.bindCodexApp();
140+
appBindRestarted = true;
141+
} catch (error) {
142+
const message = error instanceof Error ? error.message : String(error);
143+
if (json) {
144+
logInfo(
145+
JSON.stringify({
146+
ok: false,
147+
command: "rotation reset-runtime",
148+
resetVolatileRuntimeState: true,
149+
appBindRestarted,
150+
error: message,
151+
}),
152+
);
153+
} else {
154+
logError(`Runtime reset completed, but app bind restart failed: ${message}`);
155+
}
156+
return 1;
157+
}
158+
}
159+
const payload = {
160+
ok: true,
161+
command: "rotation reset-runtime",
162+
resetVolatileRuntimeState: true,
163+
appBindRestarted,
164+
unbindStatus: unbind?.status.state ?? null,
165+
bindStatus: bind?.status.state ?? null,
166+
};
167+
if (json) {
168+
logInfo(JSON.stringify(payload));
169+
} else {
170+
logInfo("Runtime rotation volatile state reset.");
171+
if (appBindRestarted) logInfo("Codex app bind restarted.");
172+
else logInfo("Codex app bind helpers unavailable; new wrapper sessions will use the reset state.");
173+
}
174+
return 0;
175+
}
176+
101177
interface ResetRateLimitsOptions {
102178
scope: "all" | "account";
103179
accountIndex: number | null;
@@ -660,6 +736,9 @@ export async function runRotationCommand(
660736
if (subcommand === "reset-rate-limits") {
661737
return runResetRateLimits(rest, deps);
662738
}
739+
if (subcommand === "reset-runtime") {
740+
return runResetRuntime(rest, deps);
741+
}
663742
if (rest.length > 0) {
664743
logError(`Unknown rotation option: ${rest[0]}`);
665744
return 1;

lib/codex-manager/repair-commands.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import {
3737
import { setCodexCliActiveSelection } from "../codex-cli/writer.js";
3838
import { MODEL_FAMILIES, type ModelFamily } from "../prompts/codex.js";
3939
import { resolveNormalizedModel } from "../request/helpers/model-map.js";
40+
import { loadPersistedRuntimeObservabilitySnapshot } from "../runtime/runtime-observability.js";
4041
import type { AccountIdSource, TokenFailure, TokenResult } from "../types.js";
4142

4243
type TokenSuccess = Extract<TokenResult, { type: "success" }>;
@@ -1922,6 +1923,9 @@ export async function runDoctor(
19221923
});
19231924

19241925
const now = Date.now();
1926+
const runtimeSnapshot =
1927+
(await loadPersistedRuntimeObservabilitySnapshot().catch(() => null)) ??
1928+
null;
19251929
const forecastResults = evaluateForecastAccounts(
19261930
storageForChecks.accounts.map((account, index) => ({
19271931
index,
@@ -1930,6 +1934,15 @@ export async function runDoctor(
19301934
now,
19311935
})),
19321936
);
1937+
const runtimeForecastResults = evaluateForecastAccounts(
1938+
storageForChecks.accounts.map((account, index) => ({
1939+
index,
1940+
account,
1941+
isCurrent: index === activeIndex,
1942+
now,
1943+
runtimeOverlay: runtimeSnapshot,
1944+
})),
1945+
);
19331946
const recommendation = recommendForecastAccount(forecastResults);
19341947
if (
19351948
recommendation.recommendedIndex !== null
@@ -1949,6 +1962,31 @@ export async function runDoctor(
19491962
});
19501963
}
19511964

1965+
const divergent = forecastResults.filter((result) => {
1966+
const runtimeResult = runtimeForecastResults[result.index];
1967+
return (
1968+
result.availability === "ready" &&
1969+
runtimeResult?.availability === "unavailable"
1970+
);
1971+
});
1972+
addCheck({
1973+
key: "forecast-runtime-alignment",
1974+
severity: divergent.length > 0 ? "warn" : "ok",
1975+
message:
1976+
divergent.length > 0
1977+
? `${divergent.length} account(s) look ready on disk but unavailable in runtime state`
1978+
: "Forecast and runtime availability are aligned",
1979+
details:
1980+
divergent.length > 0
1981+
? divergent
1982+
.map((result) => {
1983+
const runtimeResult = runtimeForecastResults[result.index];
1984+
return `account ${result.index + 1}: ${runtimeResult?.reasons.join("; ") || "runtime unavailable"}`;
1985+
})
1986+
.join(" | ")
1987+
: undefined,
1988+
});
1989+
19521990
if (activeExists) {
19531991
const activeAccount = storageForChecks.accounts[activeIndex];
19541992
const managerActiveEmail = sanitizeEmail(activeAccount?.email);

0 commit comments

Comments
 (0)