Skip to content

Commit 7671cf9

Browse files
anandgupta42claude
andcommitted
fix: escalating circuit breaker for doom loops in headless mode
When `doom_loop` permission is auto-accepted (headless mode or config `"allow"`), the per-tool repeat counter resets every 30 calls and loops run indefinitely. Observed: 10,943 `apply_patch` calls in one session. Add escalating circuit breaker: - 1st threshold (30 calls): ask permission (existing behavior) - 2nd threshold (60 calls): inject non-synthetic warning the LLM sees, telling it to change approach - 3rd threshold (90 calls): force-stop the session via `blocked = true` Also: - Add `if (blocked) break` after the switch to exit the stream loop immediately on force-stop (not just the switch) - Add `escalation_level` to `doom_loop_detected` telemetry event for distinguishing ask/warn/stop in analytics Closes #657 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0203b6a commit 7671cf9

2 files changed

Lines changed: 75 additions & 3 deletions

File tree

packages/opencode/src/altimate/telemetry/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,9 @@ export namespace Telemetry {
243243
session_id: string
244244
tool_name: string
245245
repeat_count: number
246+
// altimate_change start — escalation level for distinguishing ask/warn/stop in analytics
247+
escalation_level?: number
248+
// altimate_change end
246249
}
247250
| {
248251
type: "environment_census"

packages/opencode/src/session/processor.ts

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,15 @@ export namespace SessionProcessor {
2727
// 30 catches pathological patterns while avoiding false positives for power users.
2828
const TOOL_REPEAT_THRESHOLD = 30
2929
// altimate_change end
30+
// altimate_change start — escalating circuit breaker for doom loops
31+
// When the repeat threshold is hit and auto-accepted (headless, config allow), the
32+
// counter resets and the loop continues indefinitely. Escalation levels:
33+
// 1st hit (30 calls): ask permission (existing behavior)
34+
// 2nd hit (60 calls): ask + inject synthetic warning telling model to change approach
35+
// 3rd hit (90 calls): force-stop the session — the model is stuck
36+
const DOOM_LOOP_WARN_ESCALATION = 2 // hits before injecting warning
37+
const DOOM_LOOP_STOP_ESCALATION = 3 // hits before force-stopping
38+
// altimate_change end
3039
const log = Log.create({ service: "session.processor" })
3140

3241
export type Info = Awaited<ReturnType<typeof create>>
@@ -42,6 +51,9 @@ export namespace SessionProcessor {
4251
// altimate_change start — per-tool call counter for varied-input loop detection
4352
const toolCallCounts: Record<string, number> = {}
4453
// altimate_change end
54+
// altimate_change start — escalation counter: how many times each tool has hit TOOL_REPEAT_THRESHOLD
55+
const toolLoopHits: Record<string, number> = {}
56+
// altimate_change end
4557
let snapshot: string | undefined
4658
let blocked = false
4759
let attempt = 0
@@ -201,20 +213,74 @@ export namespace SessionProcessor {
201213
})
202214
}
203215

204-
// altimate_change start — per-tool repeat counter (catches varied-input loops like todowrite 2,080x)
216+
// altimate_change start — per-tool repeat counter with escalating circuit breaker
205217
// Counter is scoped to the processor lifetime (create() call), so it accumulates
206218
// across multiple process() invocations within a session. This is intentional:
207219
// cross-turn accumulation catches slow-burn loops that stay under the threshold
208220
// per-turn but add up over the session.
209221
toolCallCounts[value.toolName] = (toolCallCounts[value.toolName] ?? 0) + 1
210222
if (toolCallCounts[value.toolName] >= TOOL_REPEAT_THRESHOLD) {
223+
toolLoopHits[value.toolName] = (toolLoopHits[value.toolName] ?? 0) + 1
224+
const hits = toolLoopHits[value.toolName]
225+
const totalCalls = hits * TOOL_REPEAT_THRESHOLD
226+
211227
Telemetry.track({
212228
type: "doom_loop_detected",
213229
timestamp: Date.now(),
214230
session_id: input.sessionID,
215231
tool_name: value.toolName,
216-
repeat_count: toolCallCounts[value.toolName],
232+
repeat_count: totalCalls,
233+
escalation_level: hits,
217234
})
235+
236+
// Escalation level 3+: force-stop — the model is irretrievably stuck
237+
if (hits >= DOOM_LOOP_STOP_ESCALATION) {
238+
log.warn("doom loop circuit breaker: force-stopping session", {
239+
tool: value.toolName,
240+
totalCalls,
241+
hits,
242+
sessionID: input.sessionID,
243+
})
244+
await Session.updatePart({
245+
id: PartID.ascending(),
246+
messageID: input.assistantMessage.id,
247+
sessionID: input.assistantMessage.sessionID,
248+
type: "text",
249+
synthetic: true,
250+
text:
251+
`⚠️ altimate-code: session stopped — \`${value.toolName}\` was called ${totalCalls}+ times, ` +
252+
`indicating the agent is stuck in a loop. Please start a new session with a revised prompt.`,
253+
time: { start: Date.now(), end: Date.now() },
254+
})
255+
blocked = true
256+
toolCallCounts[value.toolName] = 0
257+
break
258+
}
259+
260+
// Escalation level 2: warn the model via synthetic message
261+
if (hits >= DOOM_LOOP_WARN_ESCALATION) {
262+
log.warn("doom loop escalation: injecting warning", {
263+
tool: value.toolName,
264+
totalCalls,
265+
hits,
266+
sessionID: input.sessionID,
267+
})
268+
await Session.updatePart({
269+
id: PartID.ascending(),
270+
messageID: input.assistantMessage.id,
271+
sessionID: input.assistantMessage.sessionID,
272+
type: "text",
273+
// synthetic: false so the LLM actually sees this warning and can course-correct
274+
text:
275+
`⚠️ altimate-code: \`${value.toolName}\` has been called ${totalCalls}+ times this session. ` +
276+
`You appear to be stuck in a loop. Stop repeating the same approach. ` +
277+
`Either try a fundamentally different strategy or explain to the user what is blocking you. ` +
278+
`The session will be force-stopped if this continues.`,
279+
time: { start: Date.now(), end: Date.now() },
280+
})
281+
}
282+
283+
// Escalation level 1: ask permission (existing behavior)
218284
const agent = await Agent.get(input.assistantMessage.agent)
219285
await PermissionNext.ask({
220286
permission: "doom_loop",
@@ -223,7 +289,7 @@ export namespace SessionProcessor {
223289
metadata: {
224290
tool: value.toolName,
225291
input: value.input,
226-
repeat_count: toolCallCounts[value.toolName],
292+
repeat_count: totalCalls,
227293
},
228294
always: [value.toolName],
229295
ruleset: agent.permission,
@@ -478,6 +544,9 @@ export namespace SessionProcessor {
478544
continue
479545
}
480546
if (needsCompaction) break
547+
// altimate_change start — exit stream loop immediately on doom loop force-stop
548+
if (blocked) break
549+
// altimate_change end
481550
}
482551
} catch (e: any) {
483552
log.error("process", {

0 commit comments

Comments
 (0)