electric-sql · kevin-dp · Jun 24, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
diff --git a/.changeset/context-compaction.md b/.changeset/context-compaction.md
@@ -0,0 +1,26 @@
+---
+'@electric-ax/agents-runtime': patch
+'@electric-ax/agents-server-ui': patch
+---
+
+Context compaction for the agents runtime. Modelled on OpenAI Codex's
+summarization but adapted to the event-sourced timeline (a `context_inserted`
+checkpoint placed at a stored watermark, so reconstruction folds older messages
+into a summary):
+
+- A context-window usage gauge (cache-inclusive `context_input_tokens` +
+  `context_window` persisted per step) and `<token_budget>` notices injected at
+  25 / 50 / 75% usage.
+- Oversized tool-output truncation, and a synchronous mid-turn compaction floor
+  at the 90% hard ceiling (runs before every model step via the adapter's
+  `transformContext` hook).
+- Non-blocking background (turn-end) compaction that starts at 85%: a detached
+  summarize whose checkpoint is applied at the next turn's start, or immediately
+  if it finishes while idle. Each generation uses a watermark-unique checkpoint
+  id so a new run can't supersede a prior completed one. Summarize calls are
+  bounded by a hard timeout.
+- UI: a "Compacting…" indicator (blocking vs. background) and a collapsible
+  "Context compacted" entry in the conversation timeline.
+
+Thresholds are env-tunable (`ELECTRIC_AGENTS_COMPACT_CEILING`,
+`ELECTRIC_AGENTS_COMPACT_BG_CEILING`).
diff --git a/packages/agents-runtime/src/client.ts b/packages/agents-runtime/src/client.ts
@@ -46,6 +46,18 @@ export {
 // drift from the runtime dispatcher.
 export { isGoalCommandText, parseGoalCommand } from './goal-command'
 export { formatTokenCount } from './token-budget'
+export {
+  CONTEXT_USAGE_BACKGROUND_START,
+  CONTEXT_USAGE_HARD_CEILING,
+  computeContextUsage,
+  contextUsageLevel,
+  formatContextUsagePercent,
+} from './token-accountant'
+export type {
+  ContextUsage,
+  ContextUsageInput,
+  ContextUsageLevel,
+} from './token-accountant'
 export type { GoalCommand } from './goal-command'
 
 export type {

diff --git a/packages/agents-runtime/src/compaction-midturn.ts b/packages/agents-runtime/src/compaction-midturn.ts
@@ -0,0 +1,118 @@
+import { COMPACTION_CHECKPOINT_NAME } from './compaction'
+import type { CompactionStatus } from './compaction'
+
+/** A pi-agent message — loose shape; we only build and slice these. */
+export type AgentMessageLike = {
+  role: string
+  content: unknown
+  timestamp?: number
+}
+
+export interface MidTurnCompactorDeps {
+  /** Summarize the given (older) messages into a handoff summary string. */
+  summarize: (messages: Array<AgentMessageLike>) => Promise<string>
+  /** Persist the checkpoint lifecycle row (UI marker + future reconstruction). */
+  writeCheckpoint: (status: CompactionStatus, content: string) => void
+  /** Compaction fires at/above this fraction of the context window. */
+  ceiling: number
+}
+
+export interface CompactContextInput {
+  messages: Array<AgentMessageLike>
+  /** Estimated tokens of the outgoing context (real last-step usage + tail). */
+  currentTokens: number
+  contextWindow: number
+}
+
+export type CompactContextFn = (
+  input: CompactContextInput
+) => Promise<Array<AgentMessageLike> | null>
+
+export function buildCompactionSummaryMessage(
+  summary: string
+): AgentMessageLike {
+  return {
+    role: `user`,
+    content: [
+      {
+        type: `text`,
+        text: `<${COMPACTION_CHECKPOINT_NAME}>\n${summary}\n</${COMPACTION_CHECKPOINT_NAME}>`,
+      },
+    ],
+    timestamp: Date.now(),
+  }
+}
+
+/**
+ * Build the per-turn mid-turn compaction hook for one agent run. The returned
+ * function is wired to pi-agent's `transformContext` — it runs before every
+ * model step. Once the estimated outgoing context crosses the ceiling it folds
+ * the WHOLE context into a summary and continues from `[summary, ...anything
+ * appended since]` (Codex-style — no verbatim pre-compaction tail).
+ *
+ * Summarizing everything is what keeps the persisted checkpoint sound: the
+ * `writeCheckpoint` wiring stamps it with `watermark = timeline head`, and a
+ * verbatim tail kept below that head would be dropped on the next turn's
+ * reconstruction yet excluded from the summary — silently losing context. By
+ * covering everything up to the head, summary and watermark agree.
+ *
+ * The summary is cached for the rest of the turn: later steps reuse it
+ * (returning the compacted view) instead of re-summarizing. Coverage is only
+ * extended once new messages appended this turn push back over the ceiling, and
+ * a re-summarization chains off the previous summary (prev summary + new
+ * middle) rather than re-reading the whole already-summarized bulk.
+ *
+ * Returns `null` to leave the context untouched (no compaction needed/active).
+ */
+export function createMidTurnCompactor(
+  deps: MidTurnCompactorDeps
+): CompactContextFn {
+  let state: { summary: string; coveredCount: number } | null = null
+
+  const compactedView = (
+    messages: Array<AgentMessageLike>
+  ): Array<AgentMessageLike> | null =>
+    state
+      ? [
+          buildCompactionSummaryMessage(state.summary),
+          ...messages.slice(state.coveredCount),
+        ]
+      : null
+
+  return async ({ messages, currentTokens, contextWindow }) => {
+    const overCeiling = currentTokens >= deps.ceiling * contextWindow
+
+    // Under the ceiling: keep the compacted view sticky if we already compacted
+    // this turn, otherwise leave the context untouched.
+    if (!overCeiling) return compactedView(messages)
+
+    // Fold the ENTIRE current context (everything maps to timeline items at or
+    // below the head the checkpoint will store), so summary and watermark agree.
+    const coveredCount = messages.length
+
+    // Nothing new to fold beyond the existing summary's coverage.
+    if (coveredCount <= 0) return compactedView(messages)
+    if (state && coveredCount <= state.coveredCount)
+      return compactedView(messages)
+
+    // Chain off the previous summary so we don't re-summarize the whole bulk.
+    const toSummarize = state
+      ? [
+          buildCompactionSummaryMessage(state.summary),
+          ...messages.slice(state.coveredCount, coveredCount),
+        ]
+      : messages.slice(0, coveredCount)
+
+    deps.writeCheckpoint(`running`, ``)
+    try {
+      const summary = await deps.summarize(toSummarize)
+      deps.writeCheckpoint(`complete`, summary)
+      state = { summary, coveredCount }
+      return compactedView(messages)
+    } catch {
+      deps.writeCheckpoint(`failed`, ``)
+      // Fall back to any compaction we already had (or leave untouched).
+      return compactedView(messages)
+    }
+  }
+}
diff --git a/packages/agents-runtime/src/compaction-summarize.ts b/packages/agents-runtime/src/compaction-summarize.ts
@@ -0,0 +1,132 @@
+import { completeSimple } from '@mariozechner/pi-ai'
+import { resolvePiModel, toAgentHistory } from './pi-adapter'
+import {
+  COMPACTION_SUMMARIZATION_PROMPT,
+  COMPACTION_SUMMARY_PREFIX,
+} from './compaction'
+import type { LLMMessage, SummarizeCompleteFn } from './types'
+
+export type { SummarizeCompleteFn }
+
+const DEFAULT_SUMMARY_MAX_TOKENS = 2048
+
+/**
+ * Hard deadline for a single summarization request.
+ *
+ * pi-ai's anthropic provider applies a client-side timeout (and abort) ONLY
+ * when the caller passes `timeoutMs`/`signal`, and it never retries. Background
+ * compaction fires this call CONCURRENTLY with the agent's own streaming turn on
+ * the same (OAuth) token; if that concurrent stream stalls, an unbounded call
+ * hangs forever — wedging the pending slot and blocking all future attempts.
+ * Bounding it turns a stall into a failure the caller can retry next turn-end.
+ */
+const DEFAULT_SUMMARY_TIMEOUT_MS = 120_000
+
+/**
+ * Summarize a conversation into a compaction handoff summary.
+ *
+ * Uses the conversation's own model by default: a cheaper, small-window model
+ * would overflow on a near-full context — the whole reason we are compacting.
+ * The full history is sent followed by Codex's summarization prompt; the summary
+ * is prefixed with Codex's preamble so the resuming model knows it's a handoff.
+ */
+interface SummarizeCoreInput {
+  model: string | object
+  provider?: string
+  apiKey?: string
+  maxTokens?: number
+  /** Hard deadline for the model call; defaults to {@link DEFAULT_SUMMARY_TIMEOUT_MS}. */
+  timeoutMs?: number
+  complete?: SummarizeCompleteFn
+}
+
+/**
+ * Core summarization over already-converted history messages (pi-agent's
+ * `AgentMessage[]` shape). Appends Codex's summarization prompt, calls the
+ * model, and prefixes the result. Both the LLMMessage path and the mid-turn
+ * AgentMessage path funnel through here.
+ */
+async function summarizeConverted(
+  historyMessages: ReadonlyArray<unknown>,
+  input: SummarizeCoreInput
+): Promise<string> {
+  const complete =
+    input.complete ?? (completeSimple as unknown as SummarizeCompleteFn)
+  const model = resolvePiModel({
+    model: input.model as never,
+    ...(input.provider && { provider: input.provider as never }),
+  })
+  const context = {
+    messages: [
+      ...historyMessages,
+      {
+        role: `user`,
+        content: COMPACTION_SUMMARIZATION_PROMPT,
+        timestamp: Date.now(),
+      },
+    ],
+  }
+
+  // Bound the call: pass `timeoutMs`/`signal` (which the anthropic provider
+  // honours) AND race against a hard timer, so a stalled stream that ignores the
+  // abort still rejects rather than hanging the background slot forever.
+  const timeoutMs = input.timeoutMs ?? DEFAULT_SUMMARY_TIMEOUT_MS
+  const controller = new AbortController()
+  let timer: ReturnType<typeof setTimeout> | undefined
+  const timeout = new Promise<never>((_, reject) => {
+    timer = setTimeout(() => {
+      controller.abort()
+      reject(new Error(`[compaction] summarize timed out after ${timeoutMs}ms`))
+    }, timeoutMs)
+  })
+
+  const call = complete(model, context, {
+    maxTokens: input.maxTokens ?? DEFAULT_SUMMARY_MAX_TOKENS,
+    ...(input.apiKey && { apiKey: input.apiKey }),
+    signal: controller.signal,
+    timeoutMs,
+  })
+  // If the timeout wins the race, `call` rejects later (aborted) — swallow it so
+  // the loser doesn't surface as an unhandled rejection.
+  call.catch(() => {})
+
+  let res: Awaited<ReturnType<SummarizeCompleteFn>>
+  try {
+    res = await Promise.race([call, timeout])
+  } finally {
+    if (timer) clearTimeout(timer)
+  }
+
+  const textBlock = res.content.find((block) => block.type === `text`)
+  const text = textBlock && `text` in textBlock ? (textBlock.text ?? ``) : ``
+  if (text.trim().length === 0) {
+    throw new Error(
+      `[compaction] empty summary (stopReason=${res.stopReason ?? `none`} error=${res.errorMessage ?? `none`})`
+    )
+  }
+
+  return `${COMPACTION_SUMMARY_PREFIX}\n${text}`
+}
+
+/**
+ * Summarize a conversation (LLMMessage form) into a compaction handoff summary.
+ *
+ * Uses the conversation's own model by default: a cheaper, small-window model
+ * would overflow on a near-full context — the whole reason we are compacting.
+ */
+export async function summarizeMessages(
+  input: SummarizeCoreInput & { messages: ReadonlyArray<LLMMessage> }
+): Promise<string> {
+  return summarizeConverted(toAgentHistory([...input.messages]), input)
+}
+
+/**
+ * Summarize already-converted `AgentMessage[]` (what `transformContext` hands
+ * us mid-turn) — same as `summarizeMessages` but skips the LLMMessage→Agent
+ * conversion since the messages are already in that shape.
+ */
+export async function summarizeAgentMessages(
+  input: SummarizeCoreInput & { messages: ReadonlyArray<unknown> }
+): Promise<string> {
+  return summarizeConverted(input.messages, input)
+}
diff --git a/packages/agents-runtime/src/compaction.ts b/packages/agents-runtime/src/compaction.ts
@@ -0,0 +1,69 @@
+/**
+ * Context compaction.
+ *
+ * When the conversation approaches the context window, it is summarized into a
+ * durable "checkpoint" — a `context_inserted` row tagged `kind: "compaction"` —
+ * and the messages it summarizes are dropped from the reconstructed history. The
+ * checkpoint carries a watermark: reconstruction hides everything up to it and
+ * emits the summary in their place (see `timelineMessages`).
+ */
+
+/** `attrs.kind` marking a `context_inserted` row as a compaction checkpoint. */
+export const COMPACTION_CHECKPOINT_KIND = `compaction`
+
+/** `name` (and thus the rendered tag) for a compaction checkpoint entry. */
+export const COMPACTION_CHECKPOINT_NAME = `compaction_summary`
+
+/** Stable id for the (single, self-superseding) compaction checkpoint entry. */
+export const COMPACTION_CHECKPOINT_ID = `compaction`
+
+/**
+ * Lifecycle of a compaction checkpoint, carried in `attrs.status`:
+ * - `running`  — summarization in flight (UI shows a live "Compacting…" entry)
+ * - `complete` — summary ready; acts as the timeline watermark
+ * - `failed`   — summarization failed; turn proceeded uncompacted
+ */
+export type CompactionStatus = `running` | `complete` | `failed`
+
+/**
+ * Whether a `context_inserted` row's attrs mark it as a compaction checkpoint
+ * (any status).
+ */
+export function isCompactionCheckpointAttrs(
+  attrs: Record<string, string | number | boolean> | undefined
+): boolean {
+  return attrs?.kind === COMPACTION_CHECKPOINT_KIND
+}
+
+/**
+ * Whether attrs mark a *completed* compaction checkpoint — the only state that
+ * acts as the reconstruction watermark. A `running` (or crashed) checkpoint
+ * must never hide history.
+ */
+export function isCompleteCompactionCheckpointAttrs(
+  attrs: Record<string, string | number | boolean> | undefined
+): boolean {
+  return (
+    attrs?.kind === COMPACTION_CHECKPOINT_KIND && attrs?.status === `complete`
+  )
+}
+
+/**
+ * Summarization prompt, reused verbatim from OpenAI Codex. Appended as a user
+ * message after the conversation being compacted.
+ */
+export const COMPACTION_SUMMARIZATION_PROMPT = `You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
+
+Include:
+- Current progress and key decisions made
+- Important context, constraints, or user preferences
+- What remains to be done (clear next steps)
+- Any critical data, examples, or references needed to continue
+
+Be concise, structured, and focused on helping the next LLM seamlessly continue the work.`
+
+/**
+ * Prefix prepended to the produced summary when it is reinserted as the
+ * checkpoint, reused verbatim from Codex.
+ */
+export const COMPACTION_SUMMARY_PREFIX = `Another language model started to solve this problem and produced a summary of its thinking process. You also have access to the state of the tools that were used by that language model. Use this to build on the work that has already been done and avoid duplicating work. Here is the summary produced by the other language model, use the information in this summary to assist with your own analysis:`