|
1 | 1 | // Synchronous SQLITE_BUSY retry wrapper for better-sqlite3 writes. |
2 | 2 | // |
3 | | -// Background: the Storage constructor sets busy_timeout=5000 and |
| 3 | +// Background: the Storage constructor sets busy_timeout=15000 and |
4 | 4 | // journal_mode=WAL, which together absorb the overwhelming majority of |
5 | 5 | // contention between the worker daemon, MCP server, and CLI hooks. The |
6 | 6 | // remaining tail comes from edge cases — a long checkpoint, a migration |
7 | 7 | // that holds a write transaction, or a misbehaving hook that opens its |
8 | | -// own connection. In those cases SQLite still raises SQLITE_BUSY after |
9 | | -// the busy_timeout window expires, which the 168h gain telemetry saw |
10 | | -// surface once on task_claim_quota_release_expired. |
| 8 | +// own connection — plus sustained pressure from the codex-fleet shape |
| 9 | +// (~30+ concurrent writers). In those cases SQLite still raises |
| 10 | +// SQLITE_BUSY after the busy_timeout window expires. |
11 | 11 | // |
12 | 12 | // This helper gives callers a defensive Node-level retry on top of |
13 | | -// SQLite's own busy_timeout. Five attempts with backoff 5/20/80/250ms |
14 | | -// cap total wait at ~355ms — small enough that the caller is not |
15 | | -// noticeably slower, large enough that a transient checkpoint or |
16 | | -// short-held write transaction has time to clear. |
| 13 | +// SQLite's own busy_timeout. Eight attempts with backoff |
| 14 | +// 10/40/160/640/1000/1000/1000ms cap total wait at ~3.85s — bounded |
| 15 | +// enough to keep CLI hooks under the 150ms p95 budget on the happy |
| 16 | +// path while giving a transient checkpoint or fleet-burst window time |
| 17 | +// to clear. |
17 | 18 |
|
18 | 19 | export interface BusyRetryOptions { |
19 | | - /** Maximum number of attempts (including the first). Defaults to 5. */ |
| 20 | + /** Maximum number of attempts (including the first). Defaults to 8. */ |
20 | 21 | maxAttempts?: number; |
21 | | - /** Base delay in milliseconds; backoff is base * 4^(attempt-1) capped at 250ms. Defaults to 5. */ |
| 22 | + /** Base delay in milliseconds; backoff is base * 4^(attempt-1) capped at maxDelayMs. Defaults to 10. */ |
22 | 23 | baseDelayMs?: number; |
23 | | - /** Maximum per-attempt delay in milliseconds. Defaults to 250. */ |
| 24 | + /** Maximum per-attempt delay in milliseconds. Defaults to 1000. */ |
24 | 25 | maxDelayMs?: number; |
25 | 26 | } |
26 | 27 |
|
@@ -55,9 +56,9 @@ function sleepSync(ms: number): void { |
55 | 56 | * `maxAttempts` retries. |
56 | 57 | */ |
57 | 58 | export function withBusyRetry<T>(fn: () => T, opts: BusyRetryOptions = {}): T { |
58 | | - const maxAttempts = opts.maxAttempts ?? 5; |
59 | | - const baseDelayMs = opts.baseDelayMs ?? 5; |
60 | | - const maxDelayMs = opts.maxDelayMs ?? 250; |
| 59 | + const maxAttempts = opts.maxAttempts ?? 8; |
| 60 | + const baseDelayMs = opts.baseDelayMs ?? 10; |
| 61 | + const maxDelayMs = opts.maxDelayMs ?? 1000; |
61 | 62 | let attempt = 0; |
62 | 63 | // The loop body always either returns or throws, so the linter is |
63 | 64 | // happy with the `while (true)` shape. |
|
0 commit comments