Skip to content

Commit 37548ef

Browse files
committed
fix(codex-manager): close the menu quota-refresh write races
Two related races flagged by review on #540/#547, both pre-existing before the login-machinery extraction: - Last-write-wins clobber: refreshQuotaCacheForMenu probed against a snapshot clone and then saved it whole-file, silently discarding any entries a concurrent writer (deep check, second session) persisted while the probes ran. The save now reloads the freshest persisted cache and re-applies this run's successful probe results onto it, falling back to the clone if the reload fails. - Orphaned in-flight refresh: leaving the dashboard (add-account, cancel, empty-storage onboarding) abandoned a running refresh whose background cache save could race the subsequent account-pool write (Windows EBUSY/EPERM on sibling files). The three exit paths now drain the pending refresh first; the wait is bounded by the per-probe HTTP timeouts and never rejects. New regression suite pins the rebase-on-save behavior (concurrent entry preserved, reload-failure fallback, no save when nothing changed). The cli suite's loadQuotaCache mock now returns a fresh object per call like the real fresh-disk-read implementation, and the two refresh-orchestration tests settle pass 1 deterministically via the statusMessage observable instead of relying on microtask counts. https://claude.ai/code/session_01XNtnkLbBiXZxfQQYLMpucB
1 parent edd6562 commit 37548ef

4 files changed

Lines changed: 229 additions & 14 deletions

File tree

lib/codex-manager/login-flow.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,20 @@ function clearMenuQuotaAutoRefreshSkip(state: MenuQuotaRefreshState): void {
8686
state.menuQuotaRefreshGeneration += 1;
8787
}
8888

89+
// The menu quota refresh runs fire-and-forget behind the dashboard. On the
90+
// paths that leave the loop (add-account's storage write, process exit) an
91+
// in-flight refresh must finish first so its cache save cannot race the
92+
// account-pool write (Windows EBUSY/EPERM on sibling files) or be orphaned
93+
// mid-write. The chain never rejects (it ends in .catch/.finally), and the
94+
// wait is bounded by the per-probe HTTP timeouts.
95+
async function drainPendingMenuQuotaRefresh(
96+
state: MenuQuotaRefreshState,
97+
): Promise<void> {
98+
if (state.pendingMenuQuotaRefresh) {
99+
await state.pendingMenuQuotaRefresh;
100+
}
101+
}
102+
89103
const log = createLogger("codex-manager");
90104

91105
async function clearAccountsAndReset(): Promise<void> {
@@ -126,6 +140,7 @@ async function runLoginDashboardLoop(
126140
while (true) {
127141
const existingStorage = await loadAccounts();
128142
if (!existingStorage || existingStorage.accounts.length === 0) {
143+
await drainPendingMenuQuotaRefresh(menuState);
129144
return "add-account";
130145
}
131146
const currentStorage = existingStorage;
@@ -203,6 +218,7 @@ async function runLoginDashboardLoop(
203218

204219
if (menuResult.mode === "cancel") {
205220
console.log("Cancelled.");
221+
await drainPendingMenuQuotaRefresh(menuState);
206222
return "exit";
207223
}
208224
if (menuResult.mode === "check") {
@@ -304,6 +320,7 @@ async function runLoginDashboardLoop(
304320
continue;
305321
}
306322
if (menuResult.mode === "add") {
323+
await drainPendingMenuQuotaRefresh(menuState);
307324
return "add-account";
308325
}
309326
}

lib/codex-manager/login-menu-data.ts

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,15 @@ import {
88
DEFAULT_DASHBOARD_DISPLAY_SETTINGS,
99
} from "../dashboard-settings.js";
1010
import {
11+
loadQuotaCache,
1112
type QuotaCacheData,
1213
type QuotaCacheEntry,
1314
saveQuotaCache,
1415
} from "../quota-cache.js";
15-
import { fetchCodexQuotaSnapshot } from "../quota-probe.js";
16+
import {
17+
type CodexQuotaSnapshot,
18+
fetchCodexQuotaSnapshot,
19+
} from "../quota-probe.js";
1620
import {
1721
buildQuotaEmailFallbackState,
1822
hasSafeQuotaEmailFallback,
@@ -181,6 +185,10 @@ export async function refreshQuotaCacheForMenu(
181185
let processed = 0;
182186
onProgress?.(processed, total);
183187
let changed = false;
188+
const appliedSnapshots: {
189+
account: AccountMetadataV3;
190+
snapshot: CodexQuotaSnapshot;
191+
}[] = [];
184192
for (const target of targets) {
185193
processed += 1;
186194
onProgress?.(processed, total);
@@ -191,22 +199,45 @@ export async function refreshQuotaCacheForMenu(
191199
accessToken: target.accessToken,
192200
model: MENU_QUOTA_REFRESH_MODEL,
193201
});
194-
changed =
195-
updateQuotaCacheForAccount(
196-
nextCache,
197-
target.account,
198-
snapshot,
199-
storage.accounts,
200-
emailFallbackState,
201-
) || changed;
202+
const applied = updateQuotaCacheForAccount(
203+
nextCache,
204+
target.account,
205+
snapshot,
206+
storage.accounts,
207+
emailFallbackState,
208+
);
209+
if (applied) appliedSnapshots.push({ account: target.account, snapshot });
210+
changed = applied || changed;
202211
} catch {
203212
// Keep existing cached values if probing fails.
204213
}
205214
}
206215

207216
if (changed) {
217+
// The probes above ran against a snapshot clone of the cache the menu
218+
// loaded; another writer (a deep check, a second session) may have saved
219+
// entries meanwhile, and writing the stale clone back whole-file would
220+
// silently discard them (last write wins). Re-apply this run's results
221+
// onto the freshest persisted cache instead.
222+
let cacheToSave = nextCache;
223+
try {
224+
const persisted = await loadQuotaCache();
225+
for (const { account, snapshot } of appliedSnapshots) {
226+
updateQuotaCacheForAccount(
227+
persisted,
228+
account,
229+
snapshot,
230+
storage.accounts,
231+
emailFallbackState,
232+
);
233+
}
234+
cacheToSave = persisted;
235+
} catch {
236+
// Fall back to the snapshot clone; saving slightly stale data beats
237+
// dropping this run's probe results.
238+
}
208239
try {
209-
await saveQuotaCache(nextCache);
240+
await saveQuotaCache(cacheToSave);
210241
} catch (error) {
211242
// Quota cache is a derived artifact; a transient Windows EBUSY/EPERM
212243
// here must not fail the menu refresh, but it should not vanish into
@@ -216,6 +247,7 @@ export async function refreshQuotaCacheForMenu(
216247
`Quota cache save failed: ${error instanceof Error ? error.message : String(error)}`,
217248
);
218249
}
250+
return cacheToSave;
219251
}
220252

221253
return nextCache;

test/codex-manager-cli.test.ts

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -750,10 +750,14 @@ describe("codex manager cli commands", () => {
750750
primary: {},
751751
secondary: {},
752752
});
753-
loadQuotaCacheMock.mockResolvedValue({
753+
// Fresh object per call, like the real loadQuotaCache (a fresh disk read
754+
// each time): refreshQuotaCacheForMenu rebases onto and mutates the
755+
// loaded cache before saving, and a shared singleton here would leak
756+
// that mutation into every later load in the same test.
757+
loadQuotaCacheMock.mockImplementation(async () => ({
754758
byAccountId: {},
755759
byEmail: {},
756-
});
760+
}));
757761
loadFlaggedAccountsMock.mockResolvedValue({
758762
version: 1,
759763
accounts: [],
@@ -9073,7 +9077,7 @@ describe("codex manager cli commands", () => {
90739077

90749078
let promptCallCount = 0;
90759079
promptLoginModeMock
9076-
.mockImplementationOnce(async (accounts) => {
9080+
.mockImplementationOnce(async (accounts, options) => {
90779081
promptCallCount += 1;
90789082
expect(promptCallCount).toBe(1);
90799083
expect(
@@ -9086,6 +9090,14 @@ describe("codex manager cli commands", () => {
90869090
queueMicrotask(() => {
90879091
releaseFirstRefresh.resolve();
90889092
});
9093+
// Wait for the first refresh to fully settle (statusMessage clears in
9094+
// the same .finally that releases the pending slot) so the second
9095+
// menu pass deterministically starts its own refresh; the refresh
9096+
// chain gained an await (the save-time cache rebase), so returning
9097+
// immediately could reach the pass-2 guard while pass 1 is pending.
9098+
await vi.waitFor(() => {
9099+
expect(options?.statusMessage?.()).toBeUndefined();
9100+
});
90899101
return { mode: "manage", deleteAccountIndex: 99 };
90909102
})
90919103
.mockImplementationOnce(async (accounts, options) => {
@@ -9258,13 +9270,18 @@ describe("codex manager cli commands", () => {
92589270

92599271
let promptCallCount = 0;
92609272
promptLoginModeMock
9261-
.mockImplementationOnce(async () => {
9273+
.mockImplementationOnce(async (_accounts, options) => {
92629274
promptCallCount += 1;
92639275
expect(promptCallCount).toBe(1);
92649276

92659277
queueMicrotask(() => {
92669278
releaseFirstRefresh.resolve();
92679279
});
9280+
// See the stale-refresh test above: settle pass 1's refresh before
9281+
// returning so pass 2's auto-fetch guard sees a free slot.
9282+
await vi.waitFor(() => {
9283+
expect(options?.statusMessage?.()).toBeUndefined();
9284+
});
92689285
return { mode: "manage", deleteAccountIndex: 99 };
92699286
})
92709287
.mockImplementationOnce(async (_accounts, options) => {
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import { beforeEach, describe, expect, it, vi } from "vitest";
2+
import type { QuotaCacheData } from "../lib/quota-cache.js";
3+
import type { AccountStorageV3 } from "../lib/storage.js";
4+
5+
const { loadQuotaCacheMock, saveQuotaCacheMock, fetchCodexQuotaSnapshotMock } =
6+
vi.hoisted(() => ({
7+
loadQuotaCacheMock: vi.fn(),
8+
saveQuotaCacheMock: vi.fn(),
9+
fetchCodexQuotaSnapshotMock: vi.fn(),
10+
}));
11+
12+
vi.mock("../lib/quota-cache.js", () => ({
13+
loadQuotaCache: loadQuotaCacheMock,
14+
saveQuotaCache: saveQuotaCacheMock,
15+
}));
16+
17+
vi.mock("../lib/quota-probe.js", () => ({
18+
fetchCodexQuotaSnapshot: fetchCodexQuotaSnapshotMock,
19+
}));
20+
21+
const { refreshQuotaCacheForMenu } = await import(
22+
"../lib/codex-manager/login-menu-data.js"
23+
);
24+
25+
function createStorage(now: number): AccountStorageV3 {
26+
return {
27+
version: 3,
28+
activeIndex: 0,
29+
activeIndexByFamily: { codex: 0 },
30+
accounts: [
31+
{
32+
email: "a@example.com",
33+
accountId: "acc_a",
34+
refreshToken: "refresh-a",
35+
accessToken: "access-a",
36+
expiresAt: now + 3_600_000,
37+
addedAt: now - 60_000,
38+
lastUsed: now - 60_000,
39+
},
40+
{
41+
email: "b@example.com",
42+
accountId: "acc_b",
43+
refreshToken: "refresh-b",
44+
accessToken: "access-b",
45+
expiresAt: now + 3_600_000,
46+
addedAt: now - 60_000,
47+
lastUsed: now - 60_000,
48+
},
49+
],
50+
};
51+
}
52+
53+
function emptyCache(): QuotaCacheData {
54+
return { byAccountId: {}, byEmail: {} };
55+
}
56+
57+
function snapshotFor(model: string) {
58+
return {
59+
status: 200,
60+
model,
61+
primary: { usedPercent: 10, windowMinutes: 300, resetAtMs: 1 },
62+
secondary: { usedPercent: 5, windowMinutes: 10080, resetAtMs: 2 },
63+
};
64+
}
65+
66+
const CONCURRENT_ENTRY = {
67+
updatedAt: 1,
68+
status: 429,
69+
model: "gpt-5-codex",
70+
primary: { usedPercent: 100, windowMinutes: 300, resetAtMs: 99 },
71+
secondary: {},
72+
};
73+
74+
beforeEach(() => {
75+
loadQuotaCacheMock.mockReset();
76+
saveQuotaCacheMock.mockReset();
77+
fetchCodexQuotaSnapshotMock.mockReset();
78+
saveQuotaCacheMock.mockResolvedValue(undefined);
79+
fetchCodexQuotaSnapshotMock.mockResolvedValue(snapshotFor("gpt-5-codex"));
80+
});
81+
82+
describe("refreshQuotaCacheForMenu", () => {
83+
it("rebases its results onto the freshest persisted cache before saving", async () => {
84+
// Regression for the last-write-wins clobber: a concurrent writer (deep
85+
// check, second session) saved acc_concurrent while the menu refresh was
86+
// probing against its stale snapshot clone. The whole-file save must keep
87+
// that entry, not silently discard it.
88+
loadQuotaCacheMock.mockResolvedValue({
89+
byAccountId: { acc_concurrent: { ...CONCURRENT_ENTRY } },
90+
byEmail: {},
91+
});
92+
93+
const result = await refreshQuotaCacheForMenu(
94+
createStorage(Date.now()),
95+
emptyCache(),
96+
60_000,
97+
);
98+
99+
expect(saveQuotaCacheMock).toHaveBeenCalledTimes(1);
100+
const saved = saveQuotaCacheMock.mock.calls[0][0] as QuotaCacheData;
101+
expect(saved.byAccountId.acc_concurrent).toMatchObject({ status: 429 });
102+
expect(saved.byAccountId.acc_a).toMatchObject({ status: 200 });
103+
expect(saved.byAccountId.acc_b).toMatchObject({ status: 200 });
104+
expect(result).toBe(saved);
105+
});
106+
107+
it("falls back to saving its own snapshot clone when the reload fails", async () => {
108+
loadQuotaCacheMock.mockRejectedValue(new Error("EBUSY"));
109+
110+
const result = await refreshQuotaCacheForMenu(
111+
createStorage(Date.now()),
112+
emptyCache(),
113+
60_000,
114+
);
115+
116+
expect(saveQuotaCacheMock).toHaveBeenCalledTimes(1);
117+
const saved = saveQuotaCacheMock.mock.calls[0][0] as QuotaCacheData;
118+
expect(saved.byAccountId.acc_a).toMatchObject({ status: 200 });
119+
expect(saved.byAccountId.acc_b).toMatchObject({ status: 200 });
120+
expect(result).toBe(saved);
121+
});
122+
123+
it("does not reload or save when every probe fails", async () => {
124+
fetchCodexQuotaSnapshotMock.mockRejectedValue(new Error("network"));
125+
126+
const cache = emptyCache();
127+
const result = await refreshQuotaCacheForMenu(
128+
createStorage(Date.now()),
129+
cache,
130+
60_000,
131+
);
132+
133+
expect(loadQuotaCacheMock).not.toHaveBeenCalled();
134+
expect(saveQuotaCacheMock).not.toHaveBeenCalled();
135+
expect(result).toEqual(cache);
136+
});
137+
138+
it("returns the input cache untouched when there are no accounts", async () => {
139+
const cache = emptyCache();
140+
const result = await refreshQuotaCacheForMenu(
141+
{ version: 3, activeIndex: 0, activeIndexByFamily: {}, accounts: [] },
142+
cache,
143+
60_000,
144+
);
145+
expect(result).toBe(cache);
146+
expect(fetchCodexQuotaSnapshotMock).not.toHaveBeenCalled();
147+
expect(saveQuotaCacheMock).not.toHaveBeenCalled();
148+
});
149+
});

0 commit comments

Comments
 (0)