Skip to content

Commit 66316ab

Browse files
committed
Fix menubar stuck loading with non-blocking pipe I/O and watchdog
Replace blocking availableData drain with non-blocking POSIX read that respects Task cancellation. Handle EINTR from child SIGCHLD, close pipe fds after drain to prevent deadlock on oversized output, and escalate SIGTERM to SIGKILL after 0.5s grace period. Add 60-second loading watchdog as safety net that auto-clears stuck state on each refresh loop tick. Fixes #282
1 parent 46e43a0 commit 66316ab

3 files changed

Lines changed: 65 additions & 38 deletions

File tree

mac/Sources/CodeBurnMenubar/AppStore.swift

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ final class AppStore {
2727
var currency: String = "USD"
2828
var isLoading: Bool { loadingCount > 0 }
2929
private var loadingCount: Int = 0
30+
private var loadingStartedAt: Date?
3031
var lastError: String?
3132
var subscription: SubscriptionUsage?
3233
var subscriptionError: String?
@@ -131,9 +132,21 @@ final class AppStore {
131132

132133
func resetLoadingState() {
133134
loadingCount = 0
135+
loadingStartedAt = nil
134136
inFlightKeys.removeAll()
135137
}
136138

139+
private let loadingWatchdogSeconds: TimeInterval = 60
140+
141+
@discardableResult
142+
func clearStaleLoadingIfNeeded() -> Bool {
143+
guard isLoading, let started = loadingStartedAt,
144+
Date().timeIntervalSince(started) > loadingWatchdogSeconds else { return false }
145+
NSLog("CodeBurn: loading stuck for %ds — auto-clearing", Int(Date().timeIntervalSince(started)))
146+
resetLoadingState()
147+
return true
148+
}
149+
137150
private func invalidateStaleDayCache() {
138151
let formatter = DateFormatter()
139152
formatter.dateFormat = "yyyy-MM-dd"
@@ -157,6 +170,7 @@ final class AppStore {
157170
inFlightKeys.insert(key)
158171
let didShowLoading = showLoading || cache[key] == nil
159172
if didShowLoading {
173+
if loadingCount == 0 { loadingStartedAt = Date() }
160174
loadingCount += 1
161175
}
162176
// Diagnostic anchor: if this key has been empty for a long time (the
@@ -172,7 +186,10 @@ final class AppStore {
172186
}
173187
defer {
174188
inFlightKeys.remove(key)
175-
if didShowLoading { loadingCount = max(loadingCount - 1, 0) }
189+
if didShowLoading {
190+
loadingCount = max(loadingCount - 1, 0)
191+
if loadingCount == 0 { loadingStartedAt = nil }
192+
}
176193
}
177194
do {
178195
let fresh = try await DataClient.fetch(period: key.period, provider: key.provider, includeOptimize: includeOptimize)

mac/Sources/CodeBurnMenubar/CodeBurnApp.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
259259
}
260260
while !Task.isCancelled {
261261
guard let self else { return }
262+
self.store.clearStaleLoadingIfNeeded()
262263
// Skip the loop's tick if a wake / manual / distributed-
263264
// notification refresh just ran. Without this gate, every
264265
// wake produced two refreshes (forceRefresh from the wake

mac/Sources/CodeBurnMenubar/Data/DataClient.swift

Lines changed: 46 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -61,41 +61,27 @@ struct DataClient {
6161
throw DataClientError.spawn(error.localizedDescription)
6262
}
6363

64-
// Wall-clock timeout: if the CLI hangs (parser stuck, disk stall), kill it.
65-
// Log when this fires so a recurring stuck-popover state has an actual
66-
// diagnostic — historically users saw "Loading..." forever with no signal
67-
// about what failed; the only way to debug was to read process state at
68-
// the wrong time. The log line names the subcommand so we can correlate
69-
// with a specific period/provider combination.
7064
let timeoutTask = Task.detached(priority: .utility) {
7165
try? await Task.sleep(nanoseconds: spawnTimeoutSeconds * 1_000_000_000)
7266
if process.isRunning {
7367
NSLog("CodeBurn: CLI subprocess timed out after %llus for %@ — terminating",
7468
spawnTimeoutSeconds, subcommand.joined(separator: " "))
75-
process.terminate()
69+
terminateWithEscalation(process)
7670
}
7771
}
7872
defer { timeoutTask.cancel() }
7973

80-
// If the caller cancels its Task (rapid period/provider tab clicks
81-
// cancel switchTask in AppStore), terminate the in-flight subprocess.
82-
// Without this the cancelled Task returns immediately but the spawned
83-
// CLI keeps running to completion, piling up zombie codeburn processes
84-
// on rapid UI interactions. We hold a strong reference to the Process
85-
// in the cancellation handler so the closure can find it even if the
86-
// surrounding scope has gone async.
74+
let outHandle = outPipe.fileHandleForReading
75+
let errHandle = errPipe.fileHandleForReading
8776
let (out, err) = await withTaskCancellationHandler {
88-
// Drain both pipes concurrently so a large stderr can't deadlock stdout
89-
// (the child blocks on write once the pipe buffer fills). `drain`
90-
// also enforces a byte cap.
91-
async let stdoutData = drain(outPipe.fileHandleForReading, limit: maxPayloadBytes)
92-
async let stderrData = drain(errPipe.fileHandleForReading, limit: maxStderrBytes)
77+
async let stdoutData = drain(outHandle, limit: maxPayloadBytes)
78+
async let stderrData = drain(errHandle, limit: maxStderrBytes)
9379
return await (stdoutData, stderrData)
9480
} onCancel: {
95-
if process.isRunning {
96-
process.terminate()
97-
}
81+
terminateWithEscalation(process)
9882
}
83+
try? outHandle.close()
84+
try? errHandle.close()
9985
process.waitUntilExit()
10086

10187
if out.count >= maxPayloadBytes {
@@ -106,22 +92,45 @@ struct DataClient {
10692
return ProcessResult(stdout: out, stderr: stderrString, exitCode: process.terminationStatus)
10793
}
10894

109-
/// Pulls bytes off a pipe until EOF or `limit`. Intentionally uses `availableData`, which
110-
/// returns empty on EOF -- no blocking once the child exits.
95+
private static func terminateWithEscalation(_ process: Process) {
96+
guard process.isRunning else { return }
97+
process.terminate()
98+
let pid = process.processIdentifier
99+
DispatchQueue.global(qos: .utility).asyncAfter(deadline: .now() + 0.5) {
100+
if process.isRunning { kill(pid, SIGKILL) }
101+
}
102+
}
103+
111104
private static func drain(_ handle: FileHandle, limit: Int) async -> Data {
112-
await Task.detached(priority: .utility) {
113-
var buffer = Data()
114-
while buffer.count < limit {
115-
let chunk = handle.availableData
116-
if chunk.isEmpty { break }
117-
let remaining = limit - buffer.count
118-
if chunk.count > remaining {
119-
buffer.append(chunk.prefix(remaining))
120-
break
121-
}
122-
buffer.append(chunk)
105+
let fd = handle.fileDescriptor
106+
let flags = Darwin.fcntl(fd, F_GETFL)
107+
if flags >= 0 {
108+
_ = Darwin.fcntl(fd, F_SETFL, flags | O_NONBLOCK)
109+
} else {
110+
NSLog("CodeBurn: fcntl F_GETFL failed on fd %d, drain may block", fd)
111+
}
112+
113+
var buffer = Data()
114+
var chunk = [UInt8](repeating: 0, count: 65_536)
115+
116+
while buffer.count < limit && !Task.isCancelled {
117+
let toRead = min(chunk.count, limit - buffer.count)
118+
let n = chunk.withUnsafeMutableBufferPointer { ptr in
119+
Darwin.read(fd, ptr.baseAddress!, toRead)
123120
}
124-
return buffer
125-
}.value
121+
if n > 0 {
122+
buffer.append(contentsOf: chunk.prefix(n))
123+
} else if n == 0 {
124+
break
125+
} else if errno == EAGAIN || errno == EWOULDBLOCK {
126+
try? await Task.sleep(nanoseconds: 5_000_000)
127+
} else if errno == EINTR {
128+
continue
129+
} else {
130+
NSLog("CodeBurn: drain read() failed on fd %d: errno %d", fd, errno)
131+
break
132+
}
133+
}
134+
return buffer
126135
}
127136
}

0 commit comments

Comments
 (0)