Skip to content

Commit 31cda5e

Browse files
committed
fix(core): enforce step settlement ordering
1 parent 1b41b09 commit 31cda5e

11 files changed

Lines changed: 545 additions & 138 deletions

File tree

packages/core/src/session/compaction.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,32 +18,32 @@ const TOOL_OUTPUT_MAX_CHARS = 2_000
1818
const SUMMARY_OUTPUT_TOKENS = 4_096
1919
const SUMMARY_TEMPLATE = `Output exactly the Markdown structure shown inside <template> and keep the section order unchanged. Do not include the <template> tags in your response.
2020
<template>
21-
## Goal
21+
## Continuation Goal
2222
- [single-sentence task summary]
2323
24-
## Constraints & Preferences
24+
## Operating Constraints
2525
- [user constraints, preferences, specs, or "(none)"]
2626
2727
## Progress
28-
### Done
28+
### Completed
2929
- [completed work or "(none)"]
3030
31-
### In Progress
31+
### In Flight
3232
- [current work or "(none)"]
3333
3434
### Blocked
3535
- [blockers or "(none)"]
3636
37-
## Key Decisions
37+
## Decisions To Preserve
3838
- [decision and why, or "(none)"]
3939
40-
## Next Steps
40+
## Resume From Here
4141
- [ordered next actions or "(none)"]
4242
43-
## Critical Context
43+
## Context To Preserve
4444
- [important technical facts, errors, open questions, or "(none)"]
4545
46-
## Relevant Files
46+
## Working Files
4747
- [file or directory path: why it matters, or "(none)"]
4848
</template>
4949

packages/core/src/session/runner/llm.ts

Lines changed: 86 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import {
1111
type ProviderErrorEvent,
1212
} from "@opencode-ai/llm"
1313
import { SessionError } from "@opencode-ai/schema/session-error"
14-
import { Cause, Effect, Exit, FiberSet, Layer, Option, Semaphore, Stream } from "effect"
14+
import { Cause, Effect, Exit, Fiber, FiberSet, Layer, Option, Semaphore, Stream } from "effect"
1515
import { AgentV2 } from "../../agent"
1616
import { Config } from "../../config"
1717
import { Database } from "../../database/database"
@@ -148,9 +148,6 @@ const layer = Layer.effect(
148148
}
149149
})
150150

151-
const awaitToolFibers = (fibers: FiberSet.FiberSet<void, ToolOutputStore.Error | UserInterruptedError>) =>
152-
Effect.raceFirst(FiberSet.join(fibers), FiberSet.awaitEmpty(fibers))
153-
154151
// Match V1: dismissing a question halts the loop instead of becoming model-facing tool output.
155152
const isQuestionRejected = (cause: Cause.Cause<unknown>) =>
156153
cause.reasons.some((reason) => Cause.isDieReason(reason) && reason.defect instanceof QuestionV2.RejectedError)
@@ -188,6 +185,7 @@ const layer = Layer.effect(
188185
session.id,
189186
)
190187
const toolFibers = yield* FiberSet.make<void, ToolOutputStore.Error | UserInterruptedError>()
188+
const ownedToolFibers: Array<Fiber.Fiber<void, ToolOutputStore.Error | UserInterruptedError>> = []
191189
let needsContinuation = false
192190
let currentStep = step
193191
if (promotion) {
@@ -265,37 +263,39 @@ const layer = Layer.effect(
265263
}
266264
needsContinuation = true
267265
const assistantMessageID = yield* publisher.assistantMessageID(event.id)
268-
yield* Effect.uninterruptibleMask((restore) =>
269-
restore(
270-
toolMaterialization.settle({
271-
sessionID: session.id,
272-
agent: agent.id,
273-
assistantMessageID,
274-
call: event,
275-
}),
276-
).pipe(
277-
Effect.flatMap((settlement) =>
278-
publish(
279-
LLMEvent.toolResult({
280-
id: event.id,
281-
name: event.name,
282-
result: settlement.result,
283-
output: settlement.output,
284-
}),
285-
settlement.outputPaths ?? [],
286-
settlement.error,
287-
).pipe(
288-
Effect.andThen(
289-
settlement.error?.type === "permission.rejected"
290-
? serialized(publisher.failAssistant(settlement.error)).pipe(
291-
Effect.andThen(Effect.fail(new UserInterruptedError())),
292-
)
293-
: Effect.void,
266+
ownedToolFibers.push(
267+
yield* Effect.uninterruptibleMask((restore) =>
268+
restore(
269+
toolMaterialization.settle({
270+
sessionID: session.id,
271+
agent: agent.id,
272+
assistantMessageID,
273+
call: event,
274+
}),
275+
).pipe(
276+
Effect.flatMap((settlement) =>
277+
publish(
278+
LLMEvent.toolResult({
279+
id: event.id,
280+
name: event.name,
281+
result: settlement.result,
282+
output: settlement.output,
283+
}),
284+
settlement.outputPaths ?? [],
285+
settlement.error,
286+
).pipe(
287+
Effect.andThen(
288+
settlement.error?.type === "permission.rejected"
289+
? serialized(publisher.failAssistant(settlement.error)).pipe(
290+
Effect.andThen(Effect.fail(new UserInterruptedError())),
291+
)
292+
: Effect.void,
293+
),
294294
),
295295
),
296296
),
297-
),
298-
).pipe(FiberSet.run(toolFibers))
297+
).pipe(FiberSet.run(toolFibers)),
298+
)
299299
}),
300300
),
301301
Effect.ensuring(serialized(publisher.flush())),
@@ -345,8 +345,8 @@ const layer = Layer.effect(
345345
return { _tag: "RestartAfterOverflowCompaction", step: currentStep } as const
346346

347347
// An unrecovered held-back overflow becomes the step's durable provider error. A
348-
// thrown LLM failure fails hosted tool calls and the assistant unless a provider
349-
// error was already recorded from the stream.
348+
// thrown LLM failure records the assistant failure unless a provider error was
349+
// already recorded from the stream. Terminal publication waits for owned tools.
350350
if (overflowFailure) yield* publish(overflowFailure)
351351
const llmFailure = streamFailure instanceof LLMError ? streamFailure : undefined
352352
if (llmFailure && !publisher.hasProviderError()) {
@@ -363,39 +363,39 @@ const layer = Layer.effect(
363363
step: currentStep,
364364
})
365365
}
366-
yield* serialized(
367-
publisher.failUnsettledTools(
368-
{ type: "tool.result-missing", message: "Provider did not return a tool result" },
369-
true,
370-
),
371-
)
372366
yield* serialized(publisher.failAssistant(error))
373367
}
374368
// Provider error events only arrive from the stream, so the flag is final here.
375369
const providerFailed = publisher.hasProviderError()
376370

377-
// Settle tool fibers: an interrupted stream abandons unstarted tool work first.
371+
// Settle every owned tool fiber. FiberSet.join returns on the first failure, so retain
372+
// the individual fibers and await all exits before publishing the terminal step event.
378373
if (streamInterrupted) yield* FiberSet.clear(toolFibers)
379-
const settled = yield* restore(awaitToolFibers(toolFibers)).pipe(Effect.exit)
380-
const toolsInterrupted = settled._tag === "Failure" && Cause.hasInterrupts(settled.cause)
381-
const questionDismissed = settled._tag === "Failure" && isQuestionRejected(settled.cause)
382-
const settledError =
383-
settled._tag === "Failure" ? Option.getOrUndefined(Cause.findErrorOption(settled.cause)) : undefined
384-
const permissionRejected = settledError instanceof UserInterruptedError
374+
const settled = yield* restore(
375+
Effect.forEach(ownedToolFibers, Fiber.await, { concurrency: "unbounded" }),
376+
).pipe(Effect.exit)
377+
const settledCauses =
378+
settled._tag === "Failure"
379+
? [settled.cause]
380+
: settled.value.flatMap((exit) => (exit._tag === "Failure" ? [exit.cause] : []))
381+
const toolsInterrupted = settledCauses.some(Cause.hasInterrupts)
382+
const questionDismissed = settledCauses.some(isQuestionRejected)
383+
const permissionRejected = settledCauses.some(
384+
(cause) => Option.getOrUndefined(Cause.findErrorOption(cause)) instanceof UserInterruptedError,
385+
)
385386

386387
if (questionDismissed || permissionRejected || streamInterrupted || toolsInterrupted) {
387388
yield* FiberSet.clear(toolFibers)
388389
yield* serialized(publisher.failUnsettledTools({ type: "aborted", message: "Tool execution interrupted" }))
389390
yield* serialized(publisher.failAssistant({ type: "aborted", message: "Step interrupted" }))
390-
// Match V1: dismissing a question halts the loop like an interruption.
391-
if (questionDismissed || permissionRejected) return yield* new UserInterruptedError()
392391
}
393392
// A settled tool fiber failure is one of two things. A defect from a tool
394393
// implementation becomes a failed tool call the model can read, and the step still
395394
// settles so the model may recover. A typed infrastructure failure (tool output
396395
// could not be persisted) also fails the assistant and then fails the drain.
397-
const settledFailure =
398-
settled._tag === "Failure" && !toolsInterrupted && !permissionRejected ? settled.cause : undefined
396+
const settledFailure = settledCauses.find(
397+
(cause) => !Cause.hasInterrupts(cause) && !isQuestionRejected(cause) && !permissionRejected,
398+
)
399399
const infraError =
400400
settledFailure === undefined ? undefined : Option.getOrUndefined(Cause.findErrorOption(settledFailure))
401401
if (settledFailure !== undefined) {
@@ -405,26 +405,50 @@ const layer = Layer.effect(
405405
if (infraError !== undefined) yield* serialized(publisher.failAssistant(error))
406406
}
407407

408-
const stepSettlement = publisher.stepSettlement()
409-
const stepEndedCleanly =
410-
!streamInterrupted && !toolsInterrupted && infraError === undefined && !providerFailed
411-
if (stepSettlement && stepEndedCleanly) yield* publishStepEnd(stepSettlement)
412-
// A provider error orphans recorded local calls; a clean stream can still leave
413-
// hosted calls without results.
408+
// Fail unresolved calls before the terminal step event. Local calls have joined, so
409+
// these sweeps only close calls that could not produce a truthful settlement.
414410
if (providerFailed)
415411
yield* serialized(publisher.failUnsettledTools({ type: "aborted", message: "Tool execution interrupted" }))
416-
if (stream._tag === "Success" && !providerFailed)
412+
if (llmFailure && !providerFailed)
417413
yield* serialized(
418414
publisher.failUnsettledTools(
419-
{ type: "tool.result-missing", message: "Provider did not return a tool result" },
415+
{
416+
type: "tool.result-missing",
417+
message: "Provider did not return a tool result",
418+
},
420419
true,
421420
),
422421
)
422+
const hostedResultMissing =
423+
stream._tag === "Success" && !providerFailed
424+
? yield* serialized(
425+
publisher.failUnsettledTools(
426+
{ type: "tool.result-missing", message: "Provider did not return a tool result" },
427+
true,
428+
),
429+
)
430+
: false
431+
if (hostedResultMissing && !publisher.stepSettlement())
432+
yield* serialized(
433+
publisher.failAssistant({
434+
type: "tool.result-missing",
435+
message: "Provider did not return a tool result",
436+
}),
437+
)
423438

424-
if (stream._tag === "Failure") return yield* Effect.failCause(stream.cause)
425-
if (settled._tag === "Failure" && (toolsInterrupted || infraError !== undefined))
426-
return yield* Effect.failCause(settled.cause)
427439
const stepFailure = publisher.stepFailure()
440+
const stepSettlement = publisher.stepSettlement()
441+
const stepEndedCleanly =
442+
!streamInterrupted && !toolsInterrupted && infraError === undefined && !providerFailed && !stepFailure
443+
if (stepSettlement && stepEndedCleanly) yield* publishStepEnd(stepSettlement)
444+
if (stepFailure) yield* serialized(publisher.publishStepFailure())
445+
446+
if (stream._tag === "Failure") return yield* Effect.failCause(stream.cause)
447+
// Match V1: dismissing a question halts the loop like an interruption.
448+
if (questionDismissed || permissionRejected) return yield* new UserInterruptedError()
449+
if ((toolsInterrupted || infraError !== undefined) && settledFailure)
450+
return yield* Effect.failCause(settledFailure)
451+
if (toolsInterrupted && settled._tag === "Failure") return yield* Effect.failCause(settled.cause)
428452
if (stepFailure) return yield* new StepFailedError({ error: stepFailure })
429453
return {
430454
_tag: "Completed",

packages/core/src/session/runner/publish-llm-event.ts

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ export const createLLMEventPublisher = (events: EventV2.Interface, input: Input)
6868
>()
6969
let assistantMessageID = input.assistantMessageID
7070
let stepStarted = false
71-
let assistantFailed = false
71+
let stepFailed = false
7272
let providerFailed = false
7373
let retryEvidence = false
7474
let stepFailure: SessionError.Error | undefined
@@ -206,26 +206,32 @@ export const createLLMEventPublisher = (events: EventV2.Interface, input: Input)
206206
yield* flushFragments()
207207
})
208208

209-
const failAssistant = Effect.fnUntraced(function* (error: SessionError.Error) {
210-
if (assistantFailed) return
209+
const failAssistant = Effect.fnUntraced(function* (error: SessionError.Error, replace = false) {
211210
yield* flush()
211+
yield* startAssistant()
212+
if (replace || stepFailure === undefined) stepFailure = error
213+
})
214+
215+
const publishStepFailure = Effect.fnUntraced(function* () {
216+
if (stepFailed || stepFailure === undefined) return
212217
const assistantMessageID = yield* startAssistant()
213-
assistantFailed = true
214-
stepFailure = error
218+
stepFailed = true
215219
yield* events.publish(SessionEvent.Step.Failed, {
216220
sessionID: input.sessionID,
217221
assistantMessageID,
218-
error,
222+
error: stepFailure,
219223
})
220224
})
221225

222226
const failUnsettledTools = Effect.fn("SessionRunner.failUnsettledTools")(function* (
223227
error: SessionError.Error,
224228
hostedOnly = false,
225229
) {
230+
let failed = false
226231
for (const [callID, tool] of tools) {
227232
if (tool.settled || (hostedOnly && !tool.providerExecuted)) continue
228233
tool.settled = true
234+
failed = true
229235
yield* events.publish(SessionEvent.Tool.Failed, {
230236
sessionID: input.sessionID,
231237
assistantMessageID: tool.assistantMessageID,
@@ -234,6 +240,7 @@ export const createLLMEventPublisher = (events: EventV2.Interface, input: Input)
234240
executed: tool.providerExecuted,
235241
})
236242
}
243+
return failed
237244
})
238245

239246
const assistantMessageIDForTool = (callID: string) => {
@@ -396,7 +403,7 @@ export const createLLMEventPublisher = (events: EventV2.Interface, input: Input)
396403
if (stepSettlement) return yield* Effect.die(new Error("Duplicate step finish"))
397404
if (event.reason === "content-filter") {
398405
providerFailed = true
399-
yield* failAssistant({ type: "provider.content-filter", message: "Provider blocked the response" })
406+
yield* failAssistant({ type: "provider.content-filter", message: "Provider blocked the response" }, true)
400407
return
401408
}
402409
stepSettlement = { finish: event.reason, tokens: tokens(event.usage) }
@@ -405,7 +412,7 @@ export const createLLMEventPublisher = (events: EventV2.Interface, input: Input)
405412
return
406413
case "provider-error":
407414
providerFailed = true
408-
yield* failAssistant({ type: "provider.unknown", message: event.message })
415+
yield* failAssistant({ type: "provider.unknown", message: event.message }, true)
409416
return
410417
}
411418
})
@@ -414,6 +421,7 @@ export const createLLMEventPublisher = (events: EventV2.Interface, input: Input)
414421
publish,
415422
flush,
416423
failAssistant,
424+
publishStepFailure,
417425
failUnsettledTools,
418426
hasProviderError: () => providerFailed,
419427
hasRetryEvidence: () => retryEvidence,

packages/core/test/session-compaction.test.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,32 @@ test("compaction describes tool media without embedding base64", () => {
6868
expect(serialized).not.toContain(base64)
6969
})
7070

71+
test("compaction prompt requires the continuation checkpoint headings in order", () => {
72+
const prompt = SessionCompaction.buildPrompt({ context: ["Conversation history"] })
73+
expect(prompt.match(/^#{2,3} .+$/gm)).toEqual([
74+
"## Continuation Goal",
75+
"## Operating Constraints",
76+
"## Progress",
77+
"### Completed",
78+
"### In Flight",
79+
"### Blocked",
80+
"## Decisions To Preserve",
81+
"## Resume From Here",
82+
"## Context To Preserve",
83+
"## Working Files",
84+
])
85+
expect(prompt).toContain("single-sentence task summary")
86+
expect(prompt).toContain("user constraints, preferences, specs")
87+
expect(prompt).toContain("completed work")
88+
expect(prompt).toContain("current work")
89+
expect(prompt).toContain("blockers")
90+
expect(prompt).toContain("decision and why")
91+
expect(prompt).toContain("ordered next actions")
92+
expect(prompt).toContain("important technical facts, errors, open questions")
93+
expect(prompt).toContain("file or directory path: why it matters")
94+
expect(prompt).toContain("Keep every section, even when empty.")
95+
})
96+
7197
it.effect("manual compaction summarizes short context instead of no-op", () =>
7298
Effect.gen(function* () {
7399
requests = []

0 commit comments

Comments
 (0)