Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 101 additions & 9 deletions Sources/CodexBarCLI/CLIServeCommand.swift
Original file line number Diff line number Diff line change
Expand Up @@ -419,8 +419,17 @@ private enum CLIServeArgumentError: LocalizedError {
}
}

private struct CLIServeProviderTimeoutError: LocalizedError {
let provider: UsageProvider

var errorDescription: String? {
"\(self.provider.rawValue) usage timed out"
}
}

extension CodexBarCLI {
static let defaultServeRequestTimeout: TimeInterval = 30
private static let maximumServeRequestTimeout: TimeInterval = 86400

static func runServe(_ values: ParsedValues) async {
let output = CLIOutputPreferences(format: .json, jsonOnly: true, pretty: false)
Expand Down Expand Up @@ -522,7 +531,7 @@ extension CodexBarCLI {
} else {
parsed = Self.defaultServeRequestTimeout
}
guard parsed >= 0 else { return nil }
guard parsed.isFinite, parsed >= 0 else { return nil }
return parsed
}

Expand Down Expand Up @@ -561,7 +570,8 @@ extension CodexBarCLI {
await Self.serveUsage(
provider: provider,
config: snapshot.config,
refreshInterval: runtime.refreshInterval)
refreshInterval: runtime.refreshInterval,
requestTimeout: runtime.requestTimeout)
}
case let .cost(provider):
let snapshot: CLIServeConfigSnapshot
Expand Down Expand Up @@ -635,7 +645,7 @@ extension CodexBarCLI {
seconds timeout: TimeInterval,
makeResponse: @Sendable @escaping () async -> CLILocalHTTPResponse) async -> CLILocalHTTPResponse
{
let clampedTimeout = min(max(timeout, 0), 86400)
let clampedTimeout = min(max(timeout, 0), Self.maximumServeRequestTimeout)
guard clampedTimeout > 0 else {
return await makeResponse()
}
Expand Down Expand Up @@ -691,7 +701,8 @@ extension CodexBarCLI {
private static func serveUsage(
provider rawProvider: String?,
config: CodexBarConfig,
refreshInterval: TimeInterval) async -> CLILocalHTTPResponse
refreshInterval: TimeInterval,
requestTimeout: TimeInterval) async -> CLILocalHTTPResponse
{
let selection: ProviderSelection
do {
Expand All @@ -710,6 +721,12 @@ extension CodexBarCLI {
return Self.serveError(status: .internalServerError, message: error.localizedDescription)
}

// For finite request deadlines, bound each provider early enough to
// return the healthy rows before the outer deadline discards them all.
// A disabled request deadline adds no serve-level provider bound; the
// providers' existing internal timeouts still apply.
let providerTimeout = Self.serveProviderTimeout(requestTimeout: requestTimeout)

let browserDetection = BrowserDetection()
let command = UsageCommandContext(
format: .json,
Expand All @@ -718,7 +735,7 @@ extension CodexBarCLI {
antigravityPlanDebug: false,
augmentDebug: false,
webDebugDumpHTML: false,
webTimeout: 60,
webTimeout: providerTimeout ?? 60,
verbose: false,
useColor: false,
resetStyle: Self.resetTimeDisplayStyleFromDefaults(),
Expand All @@ -731,23 +748,98 @@ extension CodexBarCLI {
persistCLISessions: true,
persistentCLISessionIdleWindow: Self.serveCLISessionIdleWindow(refreshInterval: refreshInterval))

var output = UsageCommandOutput()
for provider in selection.asList {
let providerOutput = await ProviderInteractionContext.$current.withValue(.background) {
let output = await Self.serveCollectUsageOutputs(
providers: selection.asList,
providerTimeout: providerTimeout)
{ provider in
await ProviderInteractionContext.$current.withValue(.background) {
await Self.fetchUsageOutputs(
provider: provider,
status: nil,
tokenContext: tokenContext,
command: command)
}
output.merge(providerOutput)
}

return Self.serveJSON(
output.payload,
usageCacheKeys: output.payload.map(\.cacheAccountKey))
}

/// Per-provider fetch budget for `/usage`. Finite provider work is bounded
/// below the outer request deadline so the empty 504 stays a last resort.
/// `nil` preserves the documented disabled serve deadline without changing
/// provider-specific internal timeouts.
static func serveProviderTimeout(requestTimeout: TimeInterval) -> TimeInterval? {
guard requestTimeout > 0, requestTimeout.isFinite else { return nil }
let clampedTimeout = min(requestTimeout, Self.maximumServeRequestTimeout)
// 0.8x keeps the budget strictly below the finite deadline at every
// value (including sub-second and capped timeouts), so the empty-504
// deadline can never preempt a provider's own bound.
return clampedTimeout * 0.8
}

/// Collects usage for each provider concurrently. When `providerTimeout` is
/// non-nil, a provider that exceeds its budget contributes a provider error
/// row instead of blocking the others, so the overall response still renders
/// every healthy provider. (Per-account error rows that carry a
/// cache key are merged with last-known-good by `CLIServeResponseCache`; a
/// timeout row is account-agnostic and is not reconstructed, matching the
/// existing "a timeout cannot prove the active account" cache rule.) Each
/// provider's timeout clock starts when its task is spawned, so a hung
/// provider cannot serialize the others' deadlines; results are merged in the
/// caller's provider order regardless of completion order.
static func serveCollectUsageOutputs(
providers: [UsageProvider],
providerTimeout: TimeInterval?,
fetch: @Sendable @escaping (UsageProvider) async -> UsageCommandOutput) async -> UsageCommandOutput
{
let grace = providerTimeout.map { Duration.seconds(max(0, $0)) }
let indexed = await withTaskGroup(of: (Int, UsageCommandOutput).self) { group in
for (index, provider) in providers.enumerated() {
group.addTask {
guard let grace else {
let output = await fetch(provider)
return (index, output)
}
let task = Task<UsageCommandOutput, Error> { await fetch(provider) }
let join = BoundedTaskJoin(sourceTask: task)
switch await join.value(joinGrace: grace) {
case let .value(output):
return (index, output)
case .failure, .timedOut:
return (index, Self.serveProviderTimeoutOutput(provider: provider))
}
}
}
var collected: [(Int, UsageCommandOutput)] = []
for await item in group {
collected.append(item)
}
return collected
}

var output = UsageCommandOutput()
for (_, providerOutput) in indexed.sorted(by: { $0.0 < $1.0 }) {
output.merge(providerOutput)
}
return output
}

/// Provider-level error row for a fetch that exceeded its per-provider budget.
static func serveProviderTimeoutOutput(provider: UsageProvider) -> UsageCommandOutput {
var output = UsageCommandOutput()
output.exitCode = .failure
output.payload.append(Self.makeProviderErrorPayload(
provider: provider,
account: nil,
source: "auto",
Comment on lines +833 to +836

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve cache keys for provider timeout rows

When an identified provider (for example Codex with visible accounts or any token-account provider) has a previous good /usage response and this new provider-level timeout fires, this synthetic error row is emitted with account: nil and no cacheAccountKey. mergeLastGoodUsageItems only restores cached usage when it can build the (provider, accountID) key from usageCacheKeys, so these timeout rows never match the saved good rows and the endpoint returns the raw timeout error instead of the intended last-known-good fallback.

Useful? React with 👍 / 👎.

status: nil,
error: CLIServeProviderTimeoutError(provider: provider),
kind: .provider))
return output
}

private static func serveCost(provider rawProvider: String?, config: CodexBarConfig) async -> CLILocalHTTPResponse {
let selection: ProviderSelection
do {
Expand Down
77 changes: 77 additions & 0 deletions Tests/CodexBarTests/CLIServeRouterTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,10 @@ struct CLIServeRouterTests {
positional: [],
options: ["requestTimeout": ["-0.5"]],
flags: [])) == nil)
#expect(CodexBarCLI.decodeServeRequestTimeout(from: ParsedValues(
positional: [],
options: ["requestTimeout": ["inf"]],
flags: [])) == nil)
#expect(CodexBarCLI.decodeServeRequestTimeout(from: ParsedValues(
positional: [],
options: ["requestTimeout": ["0"]],
Expand Down Expand Up @@ -221,6 +225,79 @@ struct CLIServeRouterTests {
#expect(!CodexBarCLI.shouldCacheServeResponse(routeError))
}

@Test
func `serve provider timeout stays below the request deadline`() throws {
let thirtySecondTimeout = try #require(CodexBarCLI.serveProviderTimeout(requestTimeout: 30))
let tenSecondTimeout = try #require(CodexBarCLI.serveProviderTimeout(requestTimeout: 10))
#expect(abs(thirtySecondTimeout - 24) < 1e-9)
#expect(abs(tenSecondTimeout - 8) < 1e-9)
// Outer deadline disabled (0) or non-finite: add no serve-level provider bound.
#expect(CodexBarCLI.serveProviderTimeout(requestTimeout: 0) == nil)
#expect(CodexBarCLI.serveProviderTimeout(requestTimeout: .infinity) == nil)
// Finite deadlines stay strictly below the request timeout at every
// value, including sub-second ones.
let oneSecondTimeout = try #require(CodexBarCLI.serveProviderTimeout(requestTimeout: 1))
let halfSecondTimeout = try #require(CodexBarCLI.serveProviderTimeout(requestTimeout: 0.5))
#expect(oneSecondTimeout < 1)
#expect(abs(halfSecondTimeout - 0.4) < 1e-9)
// Oversized finite deadlines share the outer 24-hour cap and cannot
// overflow Duration conversion.
let oversizedTimeout = try #require(CodexBarCLI.serveProviderTimeout(
requestTimeout: .greatestFiniteMagnitude))
#expect(abs(oversizedTimeout - 69120) < 1e-9)
#expect(oversizedTimeout < 86400)
}

@Test
func `serve usage collection bounds a hung provider without blocking others`() async {
let providers: [UsageProvider] = [.codex, .claude, .gemini]
let start = Date()
let output = await CodexBarCLI.serveCollectUsageOutputs(
providers: providers,
providerTimeout: 0.1)
{ provider in
if provider == .claude {
try? await Task.sleep(for: .seconds(30))
return UsageCommandOutput(sections: ["late:\(provider.rawValue)"])
}
return UsageCommandOutput(sections: ["ok:\(provider.rawValue)"])
}
let elapsed = Date().timeIntervalSince(start)

// The hung provider must not serialize or stall the others.
#expect(elapsed < 5)
// Fast providers render in caller order; the hung one yields no section.
#expect(output.sections == ["ok:codex", "ok:gemini"])
// The hung provider degrades to a single provider error row.
#expect(output.payload.count == 1)
#expect(output.payload.first?.provider == UsageProvider.claude.rawValue)
#expect(output.payload.first?.error != nil)
#expect(output.payload.first?.error?.kind == .provider)
// The timeout row is account-agnostic: it carries no cache key, so the
// cache's keyed last-good merge intentionally does not reconstruct it
// (a timeout cannot prove which account is active).
#expect(output.payload.first?.cacheAccountKey == nil)
#expect(output.payload.first?.account == nil)
#expect(output.exitCode == .failure)
}

@Test
func `serve usage collection adds no join bound when request deadline is disabled`() async {
let output = await CodexBarCLI.serveCollectUsageOutputs(
providers: [.codex, .claude],
providerTimeout: nil)
{ provider in
if provider == .codex {
try? await Task.sleep(for: .milliseconds(25))
}
return UsageCommandOutput(sections: ["ok:\(provider.rawValue)"])
}

#expect(output.sections == ["ok:codex", "ok:claude"])
#expect(output.payload.isEmpty)
#expect(output.exitCode == .success)
}

@Test
func `serve cache uses stable Codex account identities`() {
let storedID = UUID()
Expand Down