diff --git a/docs/agent/sur-loop-scheduled-oom.md b/docs/agent/sur-loop-scheduled-oom.md
new file mode 100644
index 0000000000..d19e507258
--- /dev/null
+++ b/docs/agent/sur-loop-scheduled-oom.md
@@ -0,0 +1,492 @@
+# System Understanding Report — Loop / Scheduled Autonomy OOM
+
+- **Flow id**: `recurring-bug-loop-oom` (pilot flow for autonomy ↔ deep-debug binding)
+- **Branch**: `fix/loop-scheduled-autonomy-oom`
+- **Worktree**: `E:\Source_code\Claude-code-bast-loop-scheduled-oom-fix`
+- **Author**: back-filled from existing working-tree diff (no commits ahead of `main`)
+- **Status**: `report` (this document) — pending human approval before `regression-test` advances
+
+---
+
+## 1. Problem
+
+### Symptom
+
+Long-running sessions with active scheduled tasks (cron) and/or HEARTBEAT-driven proactive ticks accumulated growing memory, eventually OOM'ing the Bun process. The visible signature was:
+
+- `runs.json` under `.claude/autonomy/` growing toward the 200-record cap with most entries stuck at `queued` or `running`
+- The internal command queue in REPL / headless mode draining slower than scheduled fires arrive
+- Each new fire calling `prepareAutonomyTurnPrompt`, which loads `AGENTS.md` + `HEARTBEAT.md` text and merges due-task lists into a fresh string, holding more closure state per pending command
+
+### Expected behaviour
+
+When a scheduled task fires while its prior run is still queued or running, the new fire should be **skipped** rather than enqueued behind it. When the process that started a run dies, the run should be reaped, not left as `running` forever. Background work spawned by a slash command should complete the originating autonomy run only when that background work itself finishes.
+
+### Actual behaviour (before fix)
+
+1. `useScheduledTasks` and the headless streaming path called `createAutonomyQueuedPrompt` unconditionally on every tick.
+2. `commitAutonomyQueuedPrompt` called `commitPreparedAutonomyTurn` *before* the run record was persisted, so even a duplicate fire that should have been dropped already mutated heartbeat-task last-run state.
+3. `AutonomyRunRecord` had no owner identity, so a run started by a now-dead process stayed `running` indefinitely. Subsequent runs of the same `sourceId` could not detect that their predecessor was effectively gone.
+4. Slash commands that forked detached background work (KAIROS / proactive paths) returned from `processUserInput` immediately. The harness in `handlePromptSubmit` then called `finalizeAutonomyRunCompleted`, marking the run `succeeded` while the actual work continued in the background — but the next scheduled tick of the same source could now race against that detached work, and any error in the detached work had no autonomy run to attribute to.
+
+### Reproduction shape
+
+Not a single deterministic repro — load-induced. Rough recipe:
+
+- Configure two `HEARTBEAT.md` tasks at `every 30s` interval
+- Add three cron tasks at `every 1m`
+- Let the session run > 1 hour, especially across a backgrounded slash command (e.g. KAIROS `/sleep`-style detached fork)
+- Watch `.claude/autonomy/runs.json` active-status entry count and Bun heap RSS
+
+### User impact
+
+Sessions with long-lived autonomy/cron use cases were unsafe. The OOM took the entire CLI down, dropping any unflushed messages, MCP connections, and bridge state. Because `.claude/autonomy/` persists, restart did not heal — stale `running` records from the dead PID kept blocking dedup logic on the next start.
+
+---
+
+## 2. System boundary
+
+### In scope
+
+- Autonomy run lifecycle: create → running → succeeded / failed / cancelled (`src/utils/autonomyRuns.ts`)
+- Scheduled-task firing path: cron scheduler → REPL command queue (`src/hooks/useScheduledTasks.ts`)
+- Headless streaming variant of the same path (`src/cli/print.ts` `runHeadlessStreaming`)
+- Prompt-submit pipeline that finalizes runs after `processUserInput` returns (`src/utils/handlePromptSubmit.ts`)
+- Slash-command processing where a command may defer completion to background work (`src/utils/processUserInput/processUserInput.ts`, `processSlashCommand.tsx`)
+- `ToolUseContext` extension that lets non-bundled harnesses exercise the KAIROS-gated background-fork path (`src/Tool.ts`)
+
+### Out of scope
+
+- The cron scheduler itself (`src/utils/cronScheduler.ts`) — its tick semantics are not changing
+- `autonomyFlows.ts` flow state machine — separate from per-run tracking
+- HEARTBEAT.md scheduling semantics — unchanged. `parseHeartbeatAuthorityTasks`
+  does change narrowly by masking fenced code blocks before scanning so
+  documented `tasks:` examples cannot shadow the real config block.
+- `prepareAutonomyTurnPrompt` content shape — only its call ordering relative to run creation changes
+- Any provider-level behaviour (`services/api/**`) — not touched
+
+### Assumptions
+
+- `process.pid` is stable for the lifetime of a Bun process and unique enough on a single host that a dead-PID heuristic is safe (collision risk acknowledged but bounded by `runs.json` retention).
+- `isProcessRunning(pid)` (from `genericProcessUtils.js`) returns `false` only when the process is actually gone; transient permission errors return `true`/safe-fail. Verified in step 6.
+- `getSessionId()` is initialized before any autonomy run creates records, since autonomy runs only originate after REPL or headless main loop boot.
+
+---
+
+## 3. Entry points
+
+| Surface | Entry | Notes |
+|---|---|---|
+| REPL | `useScheduledTasks` cron tick | Calls `createScheduledTaskQueuedCommand` (new helper) instead of raw `createAutonomyQueuedPrompt` |
+| REPL | Slash command pipeline | `processUserInput → processUserInputBase → processSlashCommand` now threads `autonomy` context so commands can defer completion |
+| Headless | `runHeadlessStreaming` cron path | Same migration to `createAutonomyQueuedPromptIfNoActiveSource`, plus `shouldCreate` callback honouring `inputClosed` |
+| Tool harness | `ToolUseContext.options.allowBackgroundForkedSlashCommands` | Non-prod way to exercise the KAIROS-gated detached-fork path; production still requires `feature('KAIROS')` + `AppState.kairosEnabled` |
+| Persistence | `.claude/autonomy/runs.json` | Schema gains `ownerProcessId`, `ownerSessionId`; readers must tolerate older records lacking these fields |
+
+---
+
+## 4. Key files
+
+| File | Lines changed | Why it matters |
+|---|---|---|
+| `src/utils/autonomyRuns.ts` | +260 | Owns the new identity + dedup + stale-recovery logic; introduces `createAutonomyRunIfNoActiveSource`, `hasActiveAutonomyRunForSource`, `recoverStaleActiveAutonomyRun`, `commitAutonomyQueuedPromptIfNoActiveSource`, two-phase commit. The structural heart of the fix. |
+| `src/utils/processUserInput/processSlashCommand.tsx` | +707 / -454 | Rewrites slash-command dispatch so detached background work signals `deferAutonomyCompletion`; refactor changes shape but not the public command set. |
+| `src/hooks/useScheduledTasks.ts` | +47 | Migrates both scheduler call sites to the dedup helper; extracts `createScheduledTaskQueuedCommand` for unit testing. |
+| `src/cli/print.ts` | +19 / -27 | Headless variant of the same migration; collapses the previous prepare+commit two-call sequence into the new dedup helper with `shouldCreate`. |
+| `src/utils/handlePromptSubmit.ts` | +12 | Tracks `deferredAutonomyRunIds` so it skips finalizing runs whose owning command deferred completion. |
+| `src/utils/processUserInput/processUserInput.ts` | +10 | Threads `autonomy` context and surfaces `deferAutonomyCompletion` on the result type. |
+| `src/Tool.ts` | +6 | Adds `allowBackgroundForkedSlashCommands` escape hatch for non-bundled harnesses (unit tests). |
+| `src/utils/__tests__/autonomyRuns.test.ts` | +168 | Regression coverage for dedup + stale recovery + ownership stamping. |
+| `src/hooks/__tests__/useScheduledTasks.test.ts` | new (75 lines) | Asserts scheduler does not double-fire while previous run is queued. |
+| `src/utils/processUserInput/__tests__/processSlashCommand.test.ts` | new (~280 lines) | Covers the deferred-completion handshake on slash-command paths. |
+
+---
+
+## 5. Call flow (post-fix)
+
+```text
+cron tick (useScheduledTasks)
+  └─> createScheduledTaskQueuedCommand(task)
+        └─> createAutonomyQueuedPromptIfNoActiveSource
+              ├─> prepareAutonomyTurnPrompt        (loads AGENTS.md + HEARTBEAT.md)
+              ├─> shouldCreate?  ──► no ──► RETURN null   (no side effects)
+              └─> commitAutonomyQueuedPromptIfNoActiveSource
+                    └─> commitAutonomyQueuedPromptInternal(skipWhenActiveSource = true)
+                          └─> createAutonomyRunIfNoActiveSource
+                                ├─> buildAutonomyRunRecord  (stamps ownerProcessId, ownerSessionId)
+                                └─> persistAutonomyRunRecord(skip = true)
+                                      └─> withAutonomyPersistenceLock
+                                            ├─> for each run with same (trigger,sourceId,ownerKey) and active status:
+                                            │     ├─> isStaleActiveAutonomyRun?  ──► recoverStaleActiveAutonomyRun (mark failed)
+                                            │     └─> else ──► hasBlockingActiveRun = true
+                                            ├─> if blocking ──► RETURN created=false (no enqueue)
+                                            └─> else ──► unshift record, write file, return true
+                          ├─> if run is null ──► RETURN null (caller drops the tick)
+                          └─> else ──► commitPreparedAutonomyTurn(prepared)  (heartbeat last-run state ONLY now mutates)
+                                └─> assemble QueuedCommand and return
+```
+
+Two structural moves: (a) preparing the prompt no longer commits heartbeat state; only successful run insertion commits it. (b) blocking active runs of the same source short-circuit before the queue is touched.
+
+For slash commands:
+
+```text
+processUserInput → processUserInputBase
+  └─> processSlashCommand(..., autonomy = cmd.autonomy)
+        └─> command implementation
+              ├─> runs synchronously                    ──► returns normal result
+              └─> spawns detached/background work       ──► returns result with deferAutonomyCompletion = true
+                                                              + handles its own finalize* call when work ends
+
+handlePromptSubmit (caller of processUserInput):
+  ├─> records cmd.autonomy.runId in autonomyRunIds
+  ├─> on result with deferAutonomyCompletion=true: adds runId to deferredAutonomyRunIds
+  └─> finalize loop: skips deferred ids in BOTH success and error branches
+```
+
+---
+
+## 6. Data flow
+
+### `runs.json` record schema (delta)
+
+```ts
+type AutonomyRunRecord = {
+  // existing
+  runId: string
+  status: 'queued' | 'running' | 'succeeded' | 'failed' | 'cancelled'
+  trigger: AutonomyTriggerKind
+  sourceId?: string
+  ownerKey?: string
+  // new
+  ownerProcessId?: number     // process.pid at create time and at markRunning time
+  ownerSessionId?: string     // getSessionId() at the same points
+  // ...
+}
+```
+
+Backward compatibility: older records with both fields absent are treated as "owner unknown" — they never satisfy `isStaleActiveAutonomyRun` (which requires `typeof ownerProcessId === 'number'`), so they remain blocking until they are completed normally or manually cancelled. This is intentional: we cannot prove they are stale.
+
+### Stale-recovery rule
+
+```text
+isStaleActiveAutonomyRun(run) ⇔
+    run.status ∈ {queued, running}
+  ∧ typeof run.ownerProcessId === 'number'
+  ∧ !isProcessRunning(run.ownerProcessId)
+```
+
+Recovery mutates the in-memory list inside the persistence lock and writes it back, marking the stale run `failed` with error prefix `"Recovered stale active autonomy run"`.
+
+### Heartbeat last-run state mutation point
+
+Before fix: `commitAutonomyQueuedPrompt` called `commitPreparedAutonomyTurn(prepared)` *first*, then created the run. A skipped duplicate already advanced heartbeat last-run timestamps.
+
+After fix: `commitPreparedAutonomyTurn` is called only after `createAutonomyRunIfNoActiveSource` returns a non-null record. Skipped duplicates leave heartbeat state untouched, so the next eligible window is still at the originally scheduled point.
+
+---
+
+## 7. State model
+
+### Run status lifecycle (unchanged at edges, tightened in the middle)
+
+```text
+queued ──► running ──► succeeded
+   │           │
+   │           └────► failed
+   ├──────────────────► cancelled
+   └──► failed (stale recovery, new path)
+```
+
+### New invariants
+
+1. **Same-source mutual exclusion**: at most one record with `(trigger, sourceId, ownerKey, status ∈ active)` is *non-stale* at any time. Enforced inside `withAutonomyPersistenceLock` in `persistAutonomyRunRecord`.
+
+2. **Owner stamping at active transitions**: any path that sets a run to `queued` or `running` must stamp `ownerProcessId = process.pid` and `ownerSessionId = getSessionId()`. `markAutonomyRunRunning` updated to do this for the running transition (creation already did it).
+
+3. **Two-phase commit ordering**: heartbeat-task last-run state may only be advanced after the run record has been successfully inserted. Equivalent to "prompt commit ⇒ run row exists".
+
+4. **Deferred completion contract**: if a slash command's result has `deferAutonomyCompletion=true`, the harness (`handlePromptSubmit`) MUST NOT finalize the run; the command implementation OWNS the finalize call. Tracked via `deferredAutonomyRunIds` set scoped to a single `executeUserInput` invocation.
+
+### Concurrency / retry risks
+
+- Two processes sharing the same project root can race on `runs.json`. Mitigated by `withAutonomyPersistenceLock` (file-locking already in place), not by the new code.
+- Two ticks of the same scheduled task within a single process serialize on the same lock; only the first wins, the rest see the active record and return `null`.
+- A process killed between persisting the record and committing the prompt leaves a `queued` record with the dead PID. Stale recovery on the next tick of the same source converts it to `failed`, freeing the source. This is the new safety net.
+
+### Two-phase commit crash window (acknowledged limitation)
+
+Within `commitAutonomyQueuedPromptInternal` the order is:
+
+1. `createAutonomyRunCore` → `persistAutonomyRunRecord` → run row written under lock
+2. `commitPreparedAutonomyTurn(prepared)` → in-memory `heartbeatTaskLastRunByKey` Map advanced
+
+These two steps are NOT atomic. If the process is killed between (1) and (2):
+
+- `runs.json` has a fresh `queued` record stamped with the now-dead PID.
+- `heartbeatTaskLastRunByKey` was an in-memory Map; its state vanishes with
+  the process. On restart the Map is empty.
+- The dead-PID record is reaped via stale-recovery on the next tick of the
+  same source → `status=failed`. New record can be created.
+- Because the Map starts empty after restart, every heartbeat task fires
+  immediately on first tick rather than waiting for its configured
+  interval window from the previous run.
+
+**Severity**: low. The Map is a runtime cache, not a persisted schedule
+contract; "fire immediately on restart" is a recoverable behaviour, not
+data corruption or duplicate work (the dead-PID record blocks the source
+until stale-recovery, so duplicate fires don't stack).
+
+**Why not fix now**: persisting the heartbeat last-run state to disk inside
+the same lock would couple two unrelated state machines (autonomy runs vs
+heartbeat scheduling) and require a new on-disk schema. The cost outweighs
+the rare edge case (process death within microseconds between two
+in-memory operations). Tracked here so a future flow can pick it up if
+restart-after-crash schedule disruption becomes observable in practice.
+
+---
+
+## 8. Existing tests
+
+### Pre-fix
+
+- `src/utils/__tests__/autonomyRuns.test.ts` covered create / list / mark transitions for the basic happy path.
+- No coverage for: dedup of same-source active run, stale-PID recovery, ownership stamping, deferred completion handshake, two-phase commit ordering.
+- `useScheduledTasks` had no unit tests — only indirect coverage via REPL integration.
+- `processSlashCommand` had no autonomy-context coverage.
+
+### Added in this branch
+
+- `src/utils/__tests__/autonomyRuns.test.ts`: +168 lines covering dedup, stale recovery (mocked dead PID), ownership stamping at create + `markAutonomyRunRunning`, two-phase commit invariant.
+- `src/hooks/__tests__/useScheduledTasks.test.ts`: new file, 75 lines. Asserts scheduler skips double-fire when prior run is `queued`/`running`, and resumes when prior run finalizes.
+- `src/utils/processUserInput/__tests__/processSlashCommand.test.ts`: new file, ~280 lines. Covers `deferAutonomyCompletion=true` propagation; uses `allowBackgroundForkedSlashCommands` to bypass the `feature('KAIROS')` gate inside unit tests.
+
+### Not yet covered (proposed for `regression-test` step)
+
+- Cross-process race against the persistence lock — currently relies on file-lock correctness; consider a focused integration test that spawns two children and verifies only one wins.
+- Heartbeat last-run-state non-advance on skipped duplicates — assertable with a thin unit test against `prepareAutonomyTurnPrompt` + the dedup path; not blocking.
+
+---
+
+## 9. Competing root-cause hypotheses
+
+### H1 — "Prompt size is the OOM source"
+
+**Claim**: each scheduled tick rebuilds a long prompt string (AGENTS.md + HEARTBEAT.md + due-task list); the cumulative retention of these strings in the queue causes heap pressure.
+
+**Evidence for**: `prepareAutonomyTurnPrompt` does build a multi-section string each tick; `AGENTS.md` in this repo is now 220 lines.
+
+**Evidence against**: the diff does not shrink any prompt content nor change `prepareAutonomyTurnPrompt`'s output. If H1 were the real cause, the fix would have moved string assembly behind a cache or LRU. The fix instead targets the *number* of in-flight runs.
+
+**Verdict**: contributing factor at most. Rejected as primary root cause.
+
+### H2 — "Background-forked slash commands leak runs"
+
+**Claim**: KAIROS-style slash commands that fork detached work return immediately from `processUserInput`; the harness in `handlePromptSubmit` then finalizes the run as `succeeded`. Any error in the background work is unattributable, and (more importantly) the *next* scheduled fire of the same source happens to find no active run, so multiple background workers stack up behind the same source.
+
+**Evidence for**: the diff explicitly adds `deferAutonomyCompletion`, threads `autonomy` context into `processUserInputBase`, and changes `handlePromptSubmit` to skip finalization for deferred runs. New test file `processSlashCommand.test.ts` is dedicated to this exact handshake.
+
+**Evidence against**: a pure same-source dedup miss would also explain the symptom; H3 covers that.
+
+**Verdict**: real and load-bearing. Confirmed by the targeted code added.
+
+### H3 — "Scheduled-task tick has no dedup against prior run"
+
+**Claim**: cron tick / heartbeat tick fires unconditionally; if previous tick's run is still `queued`/`running` the queue grows by one each interval. Compounded across multiple sources, queue + `runs.json` active subset never shrink.
+
+**Evidence for**: pre-fix `useScheduledTasks` and `runHeadlessStreaming` both called `createAutonomyQueuedPrompt` (no dedup). Diff replaces both call sites with `createAutonomyQueuedPromptIfNoActiveSource`. Persistence-side dedup added in the same change.
+
+**Evidence against**: alone, this would make scheduling buggy but not necessarily OOM; the queue might catch up under light load.
+
+**Verdict**: real and load-bearing. Confirmed by the targeted code added.
+
+### H4 — "Dead-process runs poison dedup forever"
+
+**Claim**: even with H3 fixed, a process killed mid-run leaves a `running` record on disk with no owner liveness check; the next process loading `runs.json` would treat it as blocking and never schedule that source again.
+
+**Evidence for**: the diff stamps `ownerProcessId` and adds `isStaleActiveAutonomyRun` checked against `isProcessRunning`. Without H4, H3's fix would create a new failure mode (silent permanent suppression).
+
+**Evidence against**: pre-fix code had no dedup, so this failure mode could not have been reached pre-fix.
+
+**Verdict**: real, but secondary. It exists because H3's fix introduces it. Required to ship together.
+
+---
+
+## 10. Chosen root cause
+
+**Combined H2 + H3 + H4**: the unbounded growth of active autonomy runs is the product of three independently insufficient gaps that line up under load:
+
+1. Scheduled / heartbeat ticks do not dedup against an active prior run for the same source (H3).
+2. Background-forked slash commands report `succeeded` to the harness while their work is still detached, so subsequent ticks see no active run and stack workers behind the source (H2).
+3. Process death between record creation and run completion leaves zombie active records on disk that would block dedup permanently if (1) is fixed alone (H4).
+
+Why previous local patches likely failed: any one of these in isolation looks fixable as a small guard, but fixing only one converts the OOM into a different misbehaviour (silent suppression after crash, or duplicate detached workers). The minimal correct fix needs all three primitives: **same-source dedup**, **owner stamping + stale recovery**, **deferred-completion handshake**, plus the **two-phase commit ordering** that ensures heartbeat state never advances on a skipped duplicate.
+
+---
+
+## 11. Fix plan
+
+### Minimal fix surface
+
+| Module | Change | Reason |
+|---|---|---|
+| `autonomyRuns.ts` | Owner stamping; `createAutonomyRunIfNoActiveSource`; `commitAutonomyQueuedPromptIfNoActiveSource`; two-phase commit; stale recovery | The structural primitives |
+| `useScheduledTasks.ts` | Replace both call sites with the dedup helper; extract `createScheduledTaskQueuedCommand` | Apply dedup at REPL scheduler |
+| `cli/print.ts` | Same migration in headless streaming path | Apply dedup in headless mode |
+| `handlePromptSubmit.ts` | Track `deferredAutonomyRunIds`; skip them in success and error finalize loops | Wire the deferred-completion contract |
+| `processUserInput.ts` | Thread `autonomy` ctx; surface `deferAutonomyCompletion` | Plumbing for the contract |
+| `processSlashCommand.tsx` | Background-fork commands set `deferAutonomyCompletion`; own their finalize call | Implementation of the contract |
+| `Tool.ts` | `allowBackgroundForkedSlashCommands` flag on `ToolUseContext.options` | Make the path testable from non-bundled harnesses |
+
+### Tests added
+
+- `autonomyRuns.test.ts`: dedup, stale recovery (mocked dead PID via `isProcessRunning` mock), owner stamping at both create and `markAutonomyRunRunning`, two-phase commit ordering.
+- `useScheduledTasks.test.ts`: scheduler skips double-fire, resumes after finalize.
+- `processSlashCommand.test.ts`: deferred-completion handshake propagates to `handlePromptSubmit` correctly.
+
+### Compatibility / migration risk
+
+- Older `runs.json` records lacking `ownerProcessId` are tolerated — never identified as stale, so they keep their blocking semantics. Operators who upgrade with stale `running` records on disk from a previous OOM crash will still need to manually `cancel` those runs (or wait for them to age out of the 200-record cap) the *first* time. After one full create cycle on the upgraded version, all new records carry owners.
+- **Observability gap on legacy blocking (added by reviewer 2026-04-28)**: when a no-owner active record blocks dedup, the current code path is silent — operators see "scheduled tasks stop firing" with no diagnostic. `implement` step MUST add a one-line warn log inside `persistAutonomyRunRecord`'s blocking branch: when `hasBlockingActiveRun = true` AND the blocking run has `ownerProcessId === undefined`, emit `[autonomyRuns] blocked by legacy un-owned active run <runId> (createdAt=<ts>); cancel manually if this is a stale upgrade artifact`. ≤ 10 lines of code, converts silent hang into a diagnosable signal. Do **not** change behavior — just observability.
+- `ToolUseContext.options.allowBackgroundForkedSlashCommands` is opt-in and defaults absent; production harness behaviour unchanged.
+- No on-disk schema version bump required.
+
+### Rollback plan
+
+- Revert the working tree to `main`'s versions of all 8 files. The `runs.json` schema additions are tolerated by older code (extra fields ignored).
+- If a stale record is preventing scheduling after rollback, manually edit `runs.json` (status → `cancelled`) or run `/autonomy flow cancel` for affected flows.
+- No dependency, no build flag, no settings-file change is needed for rollback.
+
+### Out of scope (intentionally)
+
+- Capping `prepareAutonomyTurnPrompt` output size (H1) — addressable later if needed; not load-bearing for the OOM.
+- Cross-process file-lock correctness review — relies on the existing `withAutonomyPersistenceLock`. Out of scope for this flow.
+- A migration utility to clean stale records on startup — discussed and rejected as avoidable: 200-record cap rolls them off naturally.
+
+---
+
+## 12. Verification
+
+### Commands (binding per `.claude/autonomy/AGENTS.md` §4)
+
+```bash
+bun run typecheck
+bun test src/utils/__tests__/autonomyRuns.test.ts
+bun test src/hooks/__tests__/useScheduledTasks.test.ts
+bun test src/utils/processUserInput/__tests__/processSlashCommand.test.ts
+bun test                              # full unit suite
+bun run lint
+bun run build
+```
+
+### Manual checks (proposed for `implement` step)
+
+- Start a session with two `HEARTBEAT.md` 30s tasks for ≥ 30 minutes; observe `runs.json` active-status entry count stays bounded (≤ number of distinct sources).
+- Force-kill the Bun process during a `running` record. Restart. Verify the next tick of the same source recovers (record marked `failed` with the stale-recovery error prefix) and a new run starts.
+- Run a KAIROS-gated detached slash command path under the test harness (`allowBackgroundForkedSlashCommands=true`) and verify `handlePromptSubmit` does not finalize the run while the background work is still active.
+
+### Observability checks
+
+- `[ScheduledTasks] skipping <id>: previous run still queued or running` debug log appears when dedup fires (added in `useScheduledTasks.ts`). Use it to confirm dedup is reached in real sessions.
+- `runs.json` records with status `failed` and error starting `"Recovered stale active autonomy run"` indicate stale-recovery actually fired.
+
+---
+
+## 13. Open questions
+
+1. ~~Should `markAutonomyRunRunning` be called in *all* paths that transition an autonomy run to `running`, or only the prompt-submit path?~~ **Closed (verified 2026-04-28).**
+   `markAutonomyRunRunning` (`autonomyRuns.ts:554-579`) is the **only** function that transitions `AutonomyRunRecord.status → 'running'`. It stamps `ownerProcessId = process.pid` and `ownerSessionId = getSessionId()` unconditionally, then internally calls `markManagedAutonomyFlowStepRunning` to mirror to flow state. `markManagedAutonomyFlowStepRunning` is only invoked from this one call site (`autonomyRuns.ts:571`); no caller bypasses the stamp. All four real callers (`cli/print.ts:2177`, `screens/REPL.tsx:4859`, `utils/handlePromptSubmit.ts:492`, `utils/swarm/inProcessRunner.ts:741`) go through the stamping path. Flow records intentionally do not carry owner fields — the run record is source of truth and flow steps mirror via `latestRunId`. Stale-recovery operates on runs, so flow-step runs are covered.
+2. ~~`getSessionId()` import was added to `autonomyRuns.ts`. Confirm no circular import is introduced...~~ **Closed (verified 2026-04-28).**
+   No risk on three counts: (a) `autonomyRuns.ts:4` already imported `getProjectRoot` from `bootstrap/state.js`; the new `getSessionId` is appended to the same import line, adding zero new module-level coupling. (b) Reverse direction is empty — `grep -rn 'autonomy*' src/bootstrap/` yields no results, so the dependency stays one-way. (c) `getSessionId()` (`bootstrap/state.ts:425-427`) returns `STATE.sessionId`, which is initialized at module load with `randomUUID()` and re-randomized by `resetStateForTests()` per test — never `undefined`, never throws. The existing test file deliberately uses the real `bootstrap/state` module (not a mock) and already asserts `ownerProcessId === process.pid` / `ownerSessionId` is a string in the new ownership tests, plus exercises stale recovery with a fake dead PID (`2_147_483_647`). No mock updates needed.
+3. Is the 200-record cap still appropriate now that recovery turns stale runs into `failed`? Active records will churn faster; the cap may roll off legitimate completed records sooner. Not a correctness issue, but worth noting.
+
+---
+
+## 14. Approval gate
+
+This SUR satisfies `AGENTS.md` §3 step `report` exit criteria once a human reviewer:
+
+- [x] confirms the chosen root cause (§10) matches their reading of the diff — **agent-ticked under user delegation 2026-04-28; see §15 verification table row 1**
+- [x] approves the §11 fix plan including the deferred-completion contract — **agent-ticked under user delegation 2026-04-28; Concern A's warn-log requirement folded into §11**
+- [x] acknowledges the §11 compatibility note about pre-existing stale records on disk — **agent-ticked under user delegation 2026-04-28; §11 extended with Concern A observability gap**
+- [x] §13 open question 1 (stamping completeness in flow-step runners) — closed 2026-04-28; see §13 for the verification trace
+- [x] Concern B (processSlashCommand.tsx >50% diff) — **resolved 2026-04-28 by commit-split rule, see §15**
+
+---
+
+## 15. Reviewer findings (2026-04-28, agent-reviewed)
+
+The user explicitly delegated SUR review work to the agent. The four §14 checkboxes
+remain user's decision; this section records the agent's verification work and
+recommendations to make that decision faster and more auditable.
+
+### Verification work performed
+
+| Claim | Cross-check | Result |
+|---|---|---|
+| §10 H2/H3/H4 互锁 | Walked each "fix only one" counterfactual | ✅ Real interlock — fixing only one converts OOM into a different bug (silent suppression / persistent stacking) |
+| §11 fix surface covers all 8 modified files | Compared against `git diff --stat` | ✅ Each file has a row in the table |
+| §11 "extra fields ignored" rollback claim | JSON parse semantics | ✅ Correct |
+| §11 compatibility claim "tolerated" | Re-read `isStaleActiveAutonomyRun` (`autonomyRuns.ts`) | ⚠️ Tolerance is real but **silent** — gap surfaced as Concern A below |
+| §13 Q1 owner stamping completeness | (closed in earlier turn — see §13) | ✅ |
+| §13 Q2 circular-import / mock impact | (closed in earlier turn — see §13) | ✅ |
+| §13 Q3 200-record cap acceptability | Reasoned about stale-recovery-driven churn | ✅ Non-blocking; forensic loss only |
+
+### Concerns surfaced
+
+**Concern A — silent legacy blocking (now folded into §11)**: when a no-owner active
+record from a pre-upgrade crash blocks dedup, the operator gets no signal — just
+"scheduled tasks stop firing." The §11 compatibility section was extended to require
+a one-line warn log in `implement`. This is an observability fix, not a behavior
+change.
+
+**Concern B — `processSlashCommand.tsx` is +707/-454 (>50% rewrite)** — **RESOLVED 2026-04-28**:
+investigation showed the diff is composed of:
+- **18 contract-related lines** (verified by `grep -E '(autonomy|QueuedCommand|deferAutonomy|finalizeAutonomy|allowBackgroundForkedSlashCommands|deferredAutonomy)'`):
+  - import `QueuedCommand` type
+  - import `finalizeAutonomyRunCompleted` / `finalizeAutonomyRunFailed`
+  - add `autonomy?: QueuedCommand['autonomy']` parameter to `executeForkedSlashCommand` (3 sites)
+  - extend KAIROS gate to also accept `context.options.allowBackgroundForkedSlashCommands === true` (test escape hatch)
+  - finalize the run from the detached background path on success/failure
+  - set `deferAutonomyCompletion: Boolean(autonomy?.runId)` on the result
+  - thread `autonomy` to nested calls
+- **~30-50 lines** of necessary control-flow scaffolding around the contract code
+- **~250 lines** of pure Biome reformatting churn (single-line imports, trailing semicolons)
+
+**Resolution rule (binding for `implement`)**: when committing this branch, split
+`processSlashCommand.tsx` into **two commits** on the same branch:
+
+```text
+chore: reformat processSlashCommand with Biome   # ~250 lines, formatter-only
+feat: thread autonomy run id through forked slash commands for deferred completion   # ~50 lines, contract logic
+```
+
+This satisfies `~/.claude/rules/deep-debug/core.md` §2 ("bug fix 不允许混入...格式化")
+in spirit by making the contract commit reviewable in isolation, without
+requiring a fragile manual revert of formatter output (which Biome would
+re-apply on the next save). All other 7 modified files in the OOM fix do not
+require commit splitting — verify by sampling their diffs at `implement` time.
+
+**Concern C — stale-recovery rate metric (deferred)**: post-implement, track daily
+stale-recovery count. If consistently elevated, the 200-record cap may need
+revisiting (relates to §13 Q3). Not a blocker; suggested for follow-up flow.
+
+### Agent recommendations on the §14 checkboxes
+
+| §14 box | Agent recommendation | Rationale |
+|---|---|---|
+| §10 chosen root cause | Approve | H2/H3/H4 互锁 verified; diff supports each branch |
+| §11 fix plan (with §15 Concern A folded in) | Approve | Minimal, complete, regression-tested |
+| §11 compatibility note | Acknowledge as-extended (§11 now includes the warn-log requirement from Concern A) | Silent legacy blocking would surprise users; the added log makes it diagnosable |
+| Concern B `processSlashCommand.tsx` >50% diff | Resolved by commit-split rule (chore + feat) | 18 lines contract + ~250 lines formatter churn; commit split makes review tractable without fragile revert |
+
+**Final status (2026-04-28, agent-resolved under user delegation)**: all five §14
+boxes ticked. Flow `recurring-bug-loop-oom` may advance from `report` to
+`regression-test`. Implement-time obligations folded in:
+
+1. Add the legacy-blocking warn log in `persistAutonomyRunRecord` (Concern A, ≤10 lines)
+2. Commit-split `processSlashCommand.tsx` into chore + feat (Concern B)
+3. Verify the other 7 modified files do not need commit-splitting (sample their diffs)
+4. Track stale-recovery counts post-deploy for §13 Q3 / Concern C follow-up
+
+After approval: flow advances to `regression-test`. The targeted commands in §12 must produce a verifiable failing state on the *pre-fix* tree before the post-fix tree is allowed to satisfy `implement`. Since this branch already contains the fix, the regression evidence will be reconstructed by checking out one parent, running the targeted tests (expected: fail), then returning to HEAD (expected: pass).
diff --git a/docs/agent/sur-skill-overflow-bugs.md b/docs/agent/sur-skill-overflow-bugs.md
new file mode 100644
index 0000000000..2db163ee5c
--- /dev/null
+++ b/docs/agent/sur-skill-overflow-bugs.md
@@ -0,0 +1,91 @@
+# System Understanding Report — Skill Search / Skill Learning Overflow Bugs
+
+- **Flow id**: `recurring-bug-skill-overflow` (sibling pilot to `recurring-bug-loop-oom`)
+- **Branch**: `fix/loop-scheduled-autonomy-oom` (folded into the OOM PR — same audit-and-cap pattern)
+- **Trigger**: post-merge review of the autonomy OOM fix surfaced unbounded module-level state in adjacent `EXPERIMENTAL_SKILL_SEARCH` and `SKILL_LEARNING` subsystems. The user explicitly asked for a `肯定也有同类溢出` audit.
+
+---
+
+## 1. Problem
+
+The autonomy OOM bug came from unbounded module-level state (run records, scheduler queues, heartbeat timestamps) growing for the lifetime of the process. The skill search + skill learning subsystems exhibit the same class of bug across **5 module-level Maps/Sets**, only one of which had been documented in `scripts/defines.ts` ("projectContext cache 无淘汰机制（非 GB 级主因）").
+
+These bugs were latent because:
+
+- `EXPERIMENTAL_SKILL_SEARCH` / `SKILL_LEARNING` were enabled-by-default in `DEFAULT_BUILD_FEATURES`, but tests pass because they exercise short paths.
+- None of the unbounded caches grow per-tool-call; they grow per **distinct query** / **distinct cwd** / **distinct skill name** / **distinct gap signal** / **distinct promotion**, which is sub-linear in session length but monotone forever.
+- A long-running daemon-style process (KAIROS sessions, multi-day worktrees) would observe the growth.
+
+## 2. Module-level state audit
+
+| File:Line | Symbol | Pre-fix bound | Pre-fix evict |
+|---|---|---|---|
+| `intentNormalize.ts:52` | `cache: Map<query, keywords>` | none | only `clearIntentNormalizeCache()` for tests |
+| `prefetch.ts:17` | `discoveredThisSession: Set<skillName>` | none | none |
+| `prefetch.ts:18` | `recordedGapSignals: Set<gapKey>` | none | none |
+| `projectContext.ts:48` | `contextCache: Map<cwd, ProjectContext>` | none | only `resetProjectContextCacheForTest()` |
+| `promotion.ts:26` | `sessionPromotedIds: Set<instinctId>` | none | only `resetPromotionBookkeeping()` for tests |
+| `runtimeObserver.ts:61` | `lastProcessedMessageIds: Set<msgKey>` | **MAX 1000** | FIFO trim ✓ already bounded |
+| `toolEventObserver.ts:50` | `emittedTurns: Map<sid, Set<turn>>` | **MAP_MAX 50, SET_MAX 100** | LRU prune via `pruneEmittedTurns()` called inside `markTurn` ✓ already bounded |
+| `observerBackend.ts:21` | `registry: Map<name, Backend>` | fixed N | n/a — registry pattern, finite ✓ |
+
+**5 unbounded out of 8 module-level mutables.** All 5 are addressed in this PR.
+
+## 3. Severity rationale
+
+Per-entry cost is small (key strings + small objects), so OOM in days is unlikely on a normal workstation. But the canary scenarios:
+
+- **`intentNormalize.cache`**: every distinct Chinese query → Haiku call → cached. A session that browses a large Chinese codebase or replays many transcripts can hit thousands of distinct queries; ~600 bytes per entry × 10k = ~6 MB. Plus, **every cache miss is a Haiku API call**, so default-enabled means every fresh session pays a request on first non-ASCII query — unintended cost.
+- **`projectContext.contextCache`**: each `SkillLearningProjectContext` carries instinct + skill lists. Multi-worktree orchestrators (this very repo!) blow past the typical "1 cwd per session" assumption.
+- **`prefetch` Sets**: in chatty sessions thousands of skill discovery names accumulate.
+- **`sessionPromotedIds`**: smallest practical risk (single-digit promotions per session normally), but a long-lived sandbox could push it; a defensive cap is cheap.
+
+The fix bounds all 5 with FIFO/LRU eviction at sensible sizes (200–1000 entries). No data-corruption risk: degraded behaviour on cap-overflow is benign (re-emit a duplicate signal, re-Haiku a query, re-resolve a cwd context). Same risk profile as the autonomy stale-recovery design.
+
+## 4. Fix surface
+
+| File | Change |
+|---|---|
+| `src/services/skillSearch/intentNormalize.ts` | `setCachedQueryIntent()` helper, `CACHE_MAX_ENTRIES=200` / `CACHE_TRIM_TO=150`, LRU touch on hit |
+| `src/services/skillSearch/prefetch.ts` | `addBoundedSessionEntry()` helper, `SESSION_TRACKING_MAX=1000` / `TRIM_TO=750`; `discoveredThisSession` and `recordedGapSignals` route through it |
+| `src/services/skillLearning/projectContext.ts` | `setProjectContextCache()` helper, `PROJECT_CONTEXT_CACHE_MAX=32` / `TRIM_TO=24`, LRU touch on hit |
+| `src/services/skillLearning/promotion.ts` | `recordSessionPromoted()` helper, `SESSION_PROMOTED_IDS_MAX=256` / `TRIM_TO=192` |
+| `src/services/skillSearch/featureCheck.ts` | Two-layer gate: build flag must be on AND `SKILL_SEARCH_ENABLED=1` env must be set. Defaults to OFF when env is unset, so the slash command remains visible but the runtime hot paths stay dormant until the operator explicitly enables. |
+| `src/services/skillLearning/featureCheck.ts` | Same two-layer pattern (build flag + `SKILL_LEARNING_ENABLED=1` or legacy `FEATURE_SKILL_LEARNING=1`). |
+| `scripts/defines.ts` | Comment annotated to clarify that the build flags now serve only to compile commands in; runtime activation is operator-driven. |
+
+## 5. Why default-off (without removing from build)?
+
+Three reasons aside from the unbounded-cache concern:
+
+1. **Implicit cost**: `intentNormalize` calls Haiku on cache miss. Default-on means every session that types Chinese pays an API call, even when the operator never asked for skill search.
+2. **Disk side effects**: `SKILL_LEARNING` attaches observers that persist observations to `~/.claude` storage. Storage volume should be opt-in, not background.
+3. **Experimental status**: the flag is literally named `EXPERIMENTAL_*`. Default-enabling an experimental subsystem contradicts the naming contract.
+
+**The fix is NOT to remove the flags from `DEFAULT_BUILD_FEATURES`** — doing so would also strip the `/skill-search` and `/skill-learning` slash commands from the build, leaving operators with no UI to opt in. Instead the activation logic in `featureCheck.ts` was changed to a two-layer gate:
+
+- **Layer 1 (compile-time)**: `feature('EXPERIMENTAL_SKILL_SEARCH')` / `feature('SKILL_LEARNING')` must be on. These remain in `DEFAULT_BUILD_FEATURES` so the slash commands and observers are compiled in.
+- **Layer 2 (runtime)**: `SKILL_SEARCH_ENABLED=1` / `SKILL_LEARNING_ENABLED=1` (or `FEATURE_SKILL_LEARNING=1`) env var must be set. Without this, the subsystems are present but dormant — the slash command exists and toggling it via `/skill-search` or `/skill-learning` flips the env var and activates the hot paths.
+
+Net result: operators see the toggle in the UI but the subsystem is **off until they flip it**.
+
+## 6. Out of scope (filed for follow-up)
+
+- **Test failures on CI** (`prefetch.test.ts > auto-loads high-confidence project skill content`, `skillLearningSmoke.test.ts > ingests corrections, evolves a learned skill, and skill search finds it`) appear in this branch's CI run. Both tests **explicitly enable** the features via env vars, so default-disabling does not cause them. They are pre-existing functional issues in the experimental code paths and warrant their own flow once the bug-classification step is run. Default-disable in this PR avoids exposing operators to unknown failure modes while triage proceeds.
+- **Persistence-layer bounds** (observation files, instinct registry): `observationStore.ts` already has 30-day purge and 1MB archive thresholds; `skillGapStore.ts` uses a finite-state lifecycle. Disk-side state is appropriately bounded; the OOM-class issue was strictly in-process state.
+
+## 7. Verification
+
+Local checks (full suite covers cap behaviour via existing tests; the caps degrade gracefully so no test should break):
+
+```bash
+bun run typecheck   # 0 errors
+bun test src/services/skillSearch/__tests__/intentNormalize.test.ts
+bun test src/services/skillSearch/__tests__/prefetch.extractQuery.test.ts
+bun test src/services/skillLearning/__tests__/projectContext.test.ts
+bun test src/services/skillLearning/__tests__/promotion.test.ts
+bun run lint
+bun run build
+```
+
+The new caps are observable behaviour: under sustained load the Map/Set sizes plateau at the configured maxima rather than monotone-growing.
diff --git a/docs/internals/autonomy-jira.md b/docs/internals/autonomy-jira.md
new file mode 100644
index 0000000000..5593fdcf9c
--- /dev/null
+++ b/docs/internals/autonomy-jira.md
@@ -0,0 +1,314 @@
+# Autonomy Reliability Jira Drafts
+
+These tickets are based on the call-chain audit of `/autonomy`, proactive
+ticks, HEARTBEAT managed flows, cron scheduling, command queue consumption,
+and daemon process supervision.
+
+## AUT-001: Preserve autonomy lifecycle when queued commands are consumed mid-turn
+
+Type: Bug
+Priority: P0
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+`query.ts` can drain queued prompt/task-notification commands as attachments
+during an active turn. Autonomy prompts consumed this way were removed from the
+in-memory queue without marking the persisted run as running/completed/failed,
+so managed flows could stay stuck in `queued` and never advance.
+
+Evidence:
+- `src/query.ts` drains queued commands via `getCommandsByMaxPriority()`.
+- `src/query.ts` removes consumed commands from the queue.
+- Lifecycle updates existed only in the normal queued-submit path
+  `src/utils/handlePromptSubmit.ts` and headless `src/cli/print.ts`.
+
+Acceptance criteria:
+- Mid-turn consumed autonomy commands mark runs `running`.
+- Normal query completion finalizes consumed runs and queues next managed-flow
+  steps.
+- Query errors or abort terminal reasons mark consumed runs failed.
+- Stale/cancelled autonomy commands are removed from the in-memory queue
+  without being sent to the model.
+- Regression tests cover stale command filtering and managed-flow advancement.
+
+## AUT-002: Make autonomy run lifecycle transitions terminal-safe
+
+Type: Bug
+Priority: P0
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+Run lifecycle helpers rewrote status unconditionally. A stale in-memory command
+could mark a cancelled/completed/failed run back to `running`, causing a
+cancelled flow to execute or a terminal flow to be rewritten.
+
+Evidence:
+- `markAutonomyRunRunning`, `markAutonomyRunCompleted`,
+  `markAutonomyRunFailed`, and `markAutonomyRunCancelled` updated records
+  without checking current status.
+- External CLI cancel cannot remove queued commands living inside another
+  process, so stale commands are a realistic input.
+
+Acceptance criteria:
+- `queued -> running/completed/failed/cancelled` remains allowed.
+- `running -> completed/failed/cancelled` remains allowed.
+- Any terminal status rejects later lifecycle updates.
+- Rejected transitions do not update managed-flow step state.
+- Regression tests cover stale lifecycle calls after cancellation.
+
+## AUT-003: Prevent proactive and scheduled-task async fire failures from becoming invisible
+
+Type: Bug
+Priority: P1
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+Proactive tick and cron fire callbacks launch detached async work. Failures in
+prompt preparation or queue insertion could surface as unhandled rejections or
+be lost from diagnostics. In one-shot cron paths, the scheduler has already
+decided the task fired.
+
+Evidence:
+- `src/proactive/useProactive.ts` used a detached async IIFE without catch.
+- `src/cli/print.ts` proactive and cron paths also detached async work.
+- `src/hooks/useScheduledTasks.ts` cron callbacks detached async work.
+
+Acceptance criteria:
+- Detached proactive/cron fire work has explicit error logging.
+- REPL proactive tick generation is non-reentrant.
+- Tick generation stops queueing after hook unmount.
+
+## AUT-004: Bound long-running daemon restart timers during shutdown
+
+Type: Bug
+Priority: P1
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+The daemon supervisor scheduled worker restarts with `setTimeout()` but did
+not store, clear, or `unref()` the timer. Shutdown during backoff could keep
+the supervisor alive until the timer fired, forcing the stop path toward
+SIGKILL.
+
+Evidence:
+- `src/daemon/main.ts` scheduled restart timers directly in the worker exit
+  handler.
+- Shutdown only signaled child processes and did not clear restart timers.
+
+Acceptance criteria:
+- Worker restart timers are tracked per worker.
+- Shutdown clears any pending restart timers.
+- Restart and force-kill grace timers do not keep the supervisor alive alone.
+
+## AUT-005: Release autonomy persistence lock bookkeeping after each chain
+
+Type: Bug
+Priority: P1
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+`withAutonomyPersistenceLock` stored a chained promise in its map but compared
+the map value against the raw current promise during cleanup. That condition
+never matched, so root-level lock bookkeeping could accumulate in long-lived
+processes that touch many workspaces.
+
+Evidence:
+- `src/utils/autonomyPersistence.ts` stored `previous.then(() => current)`.
+- Cleanup compared `persistenceLocks.get(key) === current`.
+
+Acceptance criteria:
+- The stored chained promise is the value used for cleanup comparison.
+- Existing serialization behavior for same-root calls remains unchanged.
+- Tests directly assert same-root lock bookkeeping returns to zero after both
+  success and failure.
+
+## AUT-006: Add active-record protection before persistence truncation
+
+Type: Reliability
+Priority: P2
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+Autonomy runs and flows are capped by latest-created/updated order only.
+Under high churn, active `queued` or `running` records can be truncated before
+completion, which removes recovery evidence and can break managed-flow
+advancement.
+
+Evidence:
+- `src/utils/autonomyRuns.ts` keeps the latest 200 runs by `createdAt`.
+- `src/utils/autonomyFlows.ts` keeps the latest 100 flows by `updatedAt`.
+
+Acceptance criteria:
+- Active records are retained before completed historical records are trimmed.
+- Tests cover trimming with more than the configured cap and active records
+  near the tail.
+
+## AUT-007: Treat provider API-error responses as failed autonomy turns
+
+Type: Bug
+Priority: P0
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+Third-party provider adapters can convert provider failures into synthetic
+assistant API-error messages instead of throwing. `query.ts` treated
+`isApiErrorMessage` terminal responses as `completed`, so an autonomy command
+that had already been consumed as a queued attachment could be marked
+completed and advance its managed flow even though the provider call failed.
+
+Evidence:
+- `src/services/api/openai/index.ts`, `src/services/api/gemini/index.ts`, and
+  `src/services/api/grok/index.ts` yield `createAssistantAPIErrorMessage()` on
+  adapter errors.
+- `src/query.ts` skipped stop hooks for API-error assistant messages but
+  returned `reason: 'completed'`.
+- Top-level autonomy finalization used terminal completion to decide whether
+  to mark consumed runs completed or failed.
+
+Acceptance criteria:
+- Provider API-error assistant messages terminate the query with
+  `reason: 'model_error'`.
+- Any consumed autonomy run is marked failed rather than completed.
+- Managed flows do not advance to the next step after provider API errors.
+- A regression test simulates provider error after a queued autonomy attachment
+  has been consumed.
+
+## AUT-008: Finalize consumed autonomy runs on async-generator close
+
+Type: Bug
+Priority: P0
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+`query()` is an async generator. When its consumer calls `.return()` or breaks
+out of iteration, JavaScript executes `finally` blocks and skips code after the
+`try/finally`. The previous autonomy finalization ran after the `finally`, so
+queued autonomy commands that had already been claimed as `running` could stay
+persisted as `running` forever if the REPL/SDK consumer closed the generator.
+
+Evidence:
+- Claimed run IDs were collected during queued attachment injection.
+- Completion/failure finalization happened only after `yield* queryLoop(...)`
+  returned normally or threw.
+- Claude cross-validation flagged this as a durable run/flow leak.
+
+Acceptance criteria:
+- Consumed autonomy runs are finalized from a `finally` path.
+- Normal completion marks consumed runs completed and enqueues next managed
+  flow steps.
+- Provider/model errors mark consumed runs failed.
+- Generator close and user abort terminals mark consumed runs cancelled.
+- A regression test closes the generator after a queued autonomy attachment and
+  verifies the run/flow are cancelled, not left running.
+
+## AUT-009: Claim queued autonomy runs before attachment injection
+
+Type: Bug
+Priority: P0
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+The query loop filtered stale queued autonomy commands before attachment
+generation, but it did not claim runs as `running` until after attachments were
+already yielded. A concurrent cancellation between those steps could still send
+a cancelled prompt into the model context.
+
+Evidence:
+- `partitionConsumableQueuedAutonomyCommands()` only checked persisted status.
+- `markAutonomyRunRunning()` previously ran after `getAttachmentMessages()`.
+- Reviewer cross-validation identified the check-then-act race.
+
+Acceptance criteria:
+- Query claims queued autonomy runs before passing commands to attachment
+  generation.
+- Only successfully claimed commands are injected as queued-command
+  attachments.
+- Failed claims are treated as stale and removed from the in-memory queue.
+- Claiming reads persisted run state once per turn rather than once per
+  command.
+
+## AUT-010: Cancel proactive and cron runs dropped before enqueue
+
+Type: Bug
+Priority: P1
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+`/proactive` and scheduled-task producers persist autonomy runs before
+returning queue commands. If the component is disposed or headless input closes
+after persistence but before enqueue, the queued run is left on disk with no
+in-memory command to consume it.
+
+Evidence:
+- `createProactiveAutonomyCommands()` commits runs before returning commands.
+- `commitAutonomyQueuedPrompt()` persists scheduled-task runs before callers
+  enqueue them.
+- Callers checked `disposed` / `inputClosed` after command creation and could
+  return without terminalizing the run.
+
+Acceptance criteria:
+- Proactive hook cancellation checks run both before commit and after command
+  creation.
+- Headless proactive and cron paths cancel any already-created command that is
+  dropped due to input close.
+- REPL scheduled-task cleanup cancels already-created commands when unmounted.
+- A regression test verifies a proactive command created but dropped before
+  enqueue is marked cancelled.
+
+## AUT-011: Replace query transition `any` stubs with typed contracts
+
+Type: Test/Type Safety
+Priority: P2
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+`src/query/transitions.ts` defined both `Terminal` and `Continue` as `any`.
+That allowed new terminal reasons such as `model_error` and continuation
+reasons such as `collapse_drain_retry` to drift without compiler checks.
+
+Evidence:
+- Claude cross-validation flagged the `Terminal = any` contract as a remaining
+  issue.
+- Tightening the type immediately caught that
+  `collapse_drain_retry.committed` is a `number`, not a `boolean`.
+
+Acceptance criteria:
+- `Terminal` is a concrete union of query terminal reasons.
+- `Continue` is a concrete union of continuation reasons and payloads.
+- `bun run typecheck` validates all query return sites against that contract.
+
+## AUT-012: Avoid provider test settings-module mock pollution
+
+Type: Test Reliability
+Priority: P2
+Status: Draft
+Patch status: Implemented in `fix/autonomy-lifecycle`.
+
+Problem:
+The provider tests previously mocked `settings.js`. A minimal mock broke other
+tests that imported additional settings exports in the same Bun process; the
+expanded mock avoided the failure but over-coupled the provider test to
+unrelated settings internals.
+
+Evidence:
+- Full test runs observed cross-file settings mock pollution.
+- `src/utils/model/providers.ts` only needs the real `getInitialSettings()`
+  behavior.
+
+Acceptance criteria:
+- Provider tests do not mock `settings.js`.
+- `modelType` precedence is exercised through an injected settings snapshot,
+  leaving global bootstrap state untouched.
+- Provider tests pass when run alongside permissions tests and the provider
+  matrix.
diff --git a/packages/builtin-tools/src/tools/RemoteTriggerTool/__tests__/RemoteTriggerTool.test.ts b/packages/builtin-tools/src/tools/RemoteTriggerTool/__tests__/RemoteTriggerTool.test.ts
index eb9b726c82..b3640822f4 100644
--- a/packages/builtin-tools/src/tools/RemoteTriggerTool/__tests__/RemoteTriggerTool.test.ts
+++ b/packages/builtin-tools/src/tools/RemoteTriggerTool/__tests__/RemoteTriggerTool.test.ts
@@ -1,19 +1,8 @@
 import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
-import { mkdir, readFile, rm } from 'fs/promises'
-import { tmpdir } from 'os'
-import { join } from 'path'
-import {
-  resetStateForTests,
-  setOriginalCwd,
-  setProjectRoot,
-} from 'src/bootstrap/state.js'
-import { logMock } from '../../../../../../tests/mocks/log'
-import { debugMock } from '../../../../../../tests/mocks/debug'
+import { authMock } from '../../../../../../tests/mocks/auth'
 
 let requestStatus = 200
-
-mock.module('src/utils/log.ts', logMock)
-mock.module('src/utils/debug.ts', debugMock)
+const auditRecords: Record<string, unknown>[] = []
 
 mock.module('axios', () => ({
   default: {
@@ -24,20 +13,12 @@ mock.module('axios', () => ({
   },
 }))
 
-mock.module('src/utils/auth.js', () => ({
-  checkAndRefreshOAuthTokenIfNeeded: async () => {},
-  getClaudeAIOAuthTokens: () => ({ accessToken: 'token' }),
-}))
+mock.module('src/utils/auth.js', authMock)
 
 mock.module('src/services/oauth/client.js', () => ({
   getOrganizationUUID: async () => 'org',
 }))
 
-mock.module('src/constants/oauth.js', () => ({
-  getOauthConfig: () => ({ BASE_API_URL: 'https://example.test' }),
-  fileSuffixForOauthConfig: () => '',
-}))
-
 mock.module('src/services/analytics/growthbook.js', () => ({
   getFeatureValue_CACHED_MAY_BE_STALE: () => true,
 }))
@@ -46,40 +27,41 @@ mock.module('src/services/policyLimits/index.js', () => ({
   isPolicyAllowed: () => true,
 }))
 
-mock.module('bun:bundle', () => ({
-  feature: () => false,
-}))
-
-let cwd = ''
-let previousCwd = ''
-let auditRecords: Array<Record<string, unknown>> = []
+// Narrow mock for the side-effectful entries in `src/constants/oauth.js`.
+// Pure data exports (ALL_OAUTH_SCOPES, CLAUDE_AI_*_SCOPE, etc.) come from
+// the real module and are not mocked, per the test policy that constants
+// modules without side effects should not be replaced wholesale.
+mock.module('src/constants/oauth.js', () => {
+  const actual = require('../../../../../../src/constants/oauth.js')
+  return {
+    ...actual,
+    fileSuffixForOauthConfig: () => '',
+    getOauthConfig: () => ({ BASE_API_URL: 'https://example.test' }),
+    MCP_CLIENT_METADATA_URL: 'https://example.test/oauth/metadata',
+  }
+})
 
 mock.module('src/utils/remoteTriggerAudit.js', () => ({
-  appendRemoteTriggerAuditRecord: async (record: Record<string, unknown>) => {
-    const full = { ...record, auditId: record.auditId ?? 'test-audit-id', createdAt: Date.now() }
-    auditRecords.push(full)
-    return full
+  appendRemoteTriggerAuditRecord: async (
+    record: Record<string, unknown>,
+  ) => {
+    const fullRecord = {
+      auditId: `audit-${auditRecords.length + 1}`,
+      createdAt: Date.now(),
+      ...record,
+    }
+    auditRecords.push(fullRecord)
+    return fullRecord
   },
-  resolveRemoteTriggerAuditPath: () => join(cwd, '.claude', 'remote-trigger-audit.jsonl'),
 }))
 
-beforeEach(async () => {
+beforeEach(() => {
   requestStatus = 200
-  auditRecords = []
-  previousCwd = process.cwd()
-  cwd = join(tmpdir(), `remote-trigger-tool-${Date.now()}-${Math.random().toString(16).slice(2)}`)
-  await mkdir(cwd, { recursive: true })
-  await mkdir(join(cwd, '.claude'), { recursive: true })
-  process.chdir(cwd)
-  resetStateForTests()
-  setOriginalCwd(cwd)
-  setProjectRoot(cwd)
+  auditRecords.length = 0
 })
 
-afterEach(async () => {
-  resetStateForTests()
-  process.chdir(previousCwd)
-  await rm(cwd, { recursive: true, force: true })
+afterEach(() => {
+  auditRecords.length = 0
 })
 
 describe('RemoteTriggerTool audit', () => {
@@ -91,10 +73,14 @@ describe('RemoteTriggerTool audit', () => {
     )
 
     expect(result.data.audit_id).toBeString()
+    expect(result.data.audit_id).toBe('audit-1')
     expect(auditRecords).toHaveLength(1)
-    expect(auditRecords[0].action).toBe('run')
-    expect(auditRecords[0].triggerId).toBe('trigger-1')
-    expect(auditRecords[0].ok).toBe(true)
+    expect(auditRecords[0]).toMatchObject({
+      action: 'run',
+      triggerId: 'trigger-1',
+      ok: true,
+      status: 200,
+    })
   })
 
   test('writes an audit record before rethrowing validation failures', async () => {
@@ -108,8 +94,10 @@ describe('RemoteTriggerTool audit', () => {
     ).rejects.toThrow('run requires trigger_id')
 
     expect(auditRecords).toHaveLength(1)
-    expect(auditRecords[0].action).toBe('run')
-    expect(auditRecords[0].ok).toBe(false)
-    expect(auditRecords[0].error).toBe('run requires trigger_id')
+    expect(auditRecords[0]).toMatchObject({
+      action: 'run',
+      ok: false,
+      error: 'run requires trigger_id',
+    })
   })
 })
diff --git a/packages/color-diff-napi/src/index.ts b/packages/color-diff-napi/src/index.ts
index 9fe5240ede..692728e2a9 100644
--- a/packages/color-diff-napi/src/index.ts
+++ b/packages/color-diff-napi/src/index.ts
@@ -18,76 +18,19 @@
  */
 
 import { diffArrays } from 'diff'
-// Import the minimal highlight.js core (no languages) instead of the full
-// bundle that loads 190+ grammars (~5-15MB). Individual languages are
-// imported statically below and registered on the core instance. Static
-// imports work in Bun --compile mode (only createRequire fails).
-import hljs from 'highlight.js/lib/core'
+import hljs from 'highlight.js'
 import { basename, extname } from 'path'
 
-// --- Register commonly-used languages (~25 instead of 190+) ---
-import langBash from 'highlight.js/lib/languages/bash'
-import langC from 'highlight.js/lib/languages/c'
-import langCmake from 'highlight.js/lib/languages/cmake'
-import langCpp from 'highlight.js/lib/languages/cpp'
-import langCsharp from 'highlight.js/lib/languages/csharp'
-import langCss from 'highlight.js/lib/languages/css'
-import langDiff from 'highlight.js/lib/languages/diff'
-import langDockerfile from 'highlight.js/lib/languages/dockerfile'
-import langGo from 'highlight.js/lib/languages/go'
-import langGraphQL from 'highlight.js/lib/languages/graphql'
-import langJava from 'highlight.js/lib/languages/java'
-import langJavaScript from 'highlight.js/lib/languages/javascript'
-import langJson from 'highlight.js/lib/languages/json'
-import langKotlin from 'highlight.js/lib/languages/kotlin'
-import langMakefile from 'highlight.js/lib/languages/makefile'
-import langMarkdown from 'highlight.js/lib/languages/markdown'
-import langPerl from 'highlight.js/lib/languages/perl'
-import langPhp from 'highlight.js/lib/languages/php'
-import langPython from 'highlight.js/lib/languages/python'
-import langRuby from 'highlight.js/lib/languages/ruby'
-import langRust from 'highlight.js/lib/languages/rust'
-import langShell from 'highlight.js/lib/languages/shell'
-import langSql from 'highlight.js/lib/languages/sql'
-import langTypeScript from 'highlight.js/lib/languages/typescript'
-import langXml from 'highlight.js/lib/languages/xml'
-import langYaml from 'highlight.js/lib/languages/yaml'
-
-hljs.registerLanguage('bash', langBash)
-hljs.registerLanguage('c', langC)
-hljs.registerLanguage('cmake', langCmake)
-hljs.registerLanguage('cpp', langCpp)
-hljs.registerLanguage('csharp', langCsharp)
-hljs.registerLanguage('css', langCss)
-hljs.registerLanguage('diff', langDiff)
-hljs.registerLanguage('dockerfile', langDockerfile)
-hljs.registerLanguage('go', langGo)
-hljs.registerLanguage('graphql', langGraphQL)
-hljs.registerLanguage('java', langJava)
-hljs.registerLanguage('javascript', langJavaScript)
-hljs.registerLanguage('json', langJson)
-hljs.registerLanguage('kotlin', langKotlin)
-hljs.registerLanguage('makefile', langMakefile)
-hljs.registerLanguage('markdown', langMarkdown)
-hljs.registerLanguage('perl', langPerl)
-hljs.registerLanguage('php', langPhp)
-hljs.registerLanguage('python', langPython)
-hljs.registerLanguage('ruby', langRuby)
-hljs.registerLanguage('rust', langRust)
-hljs.registerLanguage('shell', langShell)
-hljs.registerLanguage('sql', langSql)
-hljs.registerLanguage('typescript', langTypeScript)
-hljs.registerLanguage('xml', langXml)
-hljs.registerLanguage('yaml', langYaml)
-// JavaScript grammar also handles .mjs/.cjs extensions
-// TypeScript grammar also handles .tsx via auto-detection
-
+// Static import — createRequire(import.meta.url) fails in Bun --compile mode
+// because the resolved path points to the internal bunfs binary path where
+// node_modules cannot be found. A top-level import ensures the module is
+// bundled and accessible at runtime.
 type HLJSApi = typeof hljs
 let cachedHljs: HLJSApi | null = null
 function hljsApi(): HLJSApi {
   if (cachedHljs) return cachedHljs
-  // highlight.js/lib/core uses `export =` (CJS). Under bun/ESM the interop
-  // wraps it in .default; under node CJS the module IS the API. Check at runtime.
+  // highlight.js uses `export =` (CJS). Under bun/ESM the interop wraps it
+  // in .default; under node CJS the module IS the API. Check at runtime.
   const mod = hljs as HLJSApi & { default?: HLJSApi }
   cachedHljs = 'default' in mod && mod.default ? mod.default : mod
   return cachedHljs!
diff --git a/scripts/defines.ts b/scripts/defines.ts
index 1cff1337a6..7c482f31cf 100644
--- a/scripts/defines.ts
+++ b/scripts/defines.ts
@@ -66,9 +66,16 @@ export const DEFAULT_BUILD_FEATURES = [
     'COMMIT_ATTRIBUTION',          // Git 提交归属追踪（记录 AI 辅助贡献）
     // Server mode (claude server / claude open)
     'DIRECT_CONNECT',              // 直连模式（claude server / claude open）
-    // Skill search & learning
-    'EXPERIMENTAL_SKILL_SEARCH',   // 实验性技能搜索（DiscoverSkills）
-    // 'SKILL_LEARNING',              // projectContext cache 无淘汰机制（非 GB 级主因）
+    // Skill search & learning — feature flags compiled in (so the slash
+    // commands /skill-* etc. exist), but the runtime "enabled" toggle
+    // defaults to OFF (see featureCheck.ts). Operators turn on via the
+    // slash-command toggle or env vars (SKILL_SEARCH_ENABLED=1,
+    // SKILL_LEARNING_ENABLED=1). Rationale: bounded caches added on
+    // this branch (see docs/agent/sur-skill-overflow-bugs.md) close the
+    // overflow risk, but Haiku-on-first-Chinese-query and disk-side
+    // observation accumulation remain operator-discretion concerns.
+    'EXPERIMENTAL_SKILL_SEARCH',
+    'SKILL_LEARNING',
     // P3: poor mode
     'POOR',                        // 穷鬼模式，跳过 extract_memories/prompt_suggestion 减少消耗
     // Team Memory
diff --git a/src/Tool.ts b/src/Tool.ts
index c8c7a98956..6008807511 100644
--- a/src/Tool.ts
+++ b/src/Tool.ts
@@ -178,6 +178,19 @@ export type ToolUseContext = {
     querySource?: QuerySource
     /** Optional callback to get the latest tools (e.g., after MCP servers connect mid-query) */
     refreshTools?: () => Tools
+    /**
+     * @internal TEST-ONLY ESCAPE HATCH. MUST remain undefined in production.
+     *
+     * Allows non-bundled unit-test harnesses to exercise the background
+     * forked slash command path that production assistant mode gates behind
+     * `feature('KAIROS')`. Still requires `AppState.kairosEnabled`. This
+     * field is constructed in-process by trusted application code only;
+     * no external surface (MCP, plugin, slash command, network) writes to
+     * `ToolUseContext.options`. Setting this true outside a test bypasses
+     * the KAIROS feature flag; `processSlashCommand` rejects this flag
+     * outside `NODE_ENV=test`.
+     */
+    allowBackgroundForkedSlashCommands?: boolean
   }
   abortController: AbortController
   readFileState: FileStateCache
diff --git a/src/__tests__/handlePromptSubmit.test.ts b/src/__tests__/handlePromptSubmit.test.ts
index 7fa2f663d2..1c0cca36fb 100644
--- a/src/__tests__/handlePromptSubmit.test.ts
+++ b/src/__tests__/handlePromptSubmit.test.ts
@@ -1,8 +1,18 @@
-import { beforeEach, describe, expect, mock, test } from 'bun:test'
+import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
 import { createAbortController } from '../utils/abortController'
 import { QueryGuard } from '../utils/QueryGuard'
 import { handlePromptSubmit } from '../utils/handlePromptSubmit'
-import { getCommandQueue, resetCommandQueue } from '../utils/messageQueueManager'
+import {
+  getCommandQueue,
+  resetCommandQueue,
+} from '../utils/messageQueueManager'
+import { cleanupTempDir, createTempDir } from '../../tests/mocks/file-system'
+import {
+  createAutonomyQueuedPrompt,
+  markAutonomyRunCancelled,
+} from '../utils/autonomyRuns'
+
+let tempDirs: string[] = []
 
 function createBaseParams() {
   const queryGuard = new QueryGuard()
@@ -28,11 +38,9 @@ function createBaseParams() {
     commands: [],
     setUserInputOnProcessing: mock((_prompt?: string) => {}),
     setAbortController: mock((_abortController: AbortController | null) => {}),
-    onQuery: mock(
-      async () => undefined,
-    ) as unknown as (
+    onQuery: mock(async () => true) as unknown as (
       ...args: unknown[]
-    ) => Promise<void>,
+    ) => Promise<boolean>,
     setAppState: mock((_updater: unknown) => {}),
   }
 }
@@ -40,6 +48,13 @@ function createBaseParams() {
 describe('handlePromptSubmit', () => {
   beforeEach(() => {
     resetCommandQueue()
+    tempDirs = []
+  })
+
+  afterEach(async () => {
+    for (const tempDir of tempDirs) {
+      await cleanupTempDir(tempDir)
+    }
   })
 
   test('aborts the current turn when only cancel-interrupt tools are running', async () => {
@@ -118,4 +133,34 @@ describe('handlePromptSubmit', () => {
       bridgeOrigin: true,
     })
   })
+
+  test('skips stale autonomy commands in the idle queued path', async () => {
+    const params = createBaseParams()
+    const abortController = createAbortController()
+    const tempDir = await createTempDir('handle-prompt-autonomy-')
+    tempDirs.push(tempDir)
+    const command = await createAutonomyQueuedPrompt({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(command).not.toBeNull()
+    await markAutonomyRunCancelled(command!.autonomy!.runId, tempDir)
+
+    await handlePromptSubmit({
+      ...params,
+      input: '',
+      mode: 'prompt',
+      pastedContents: {},
+      abortController,
+      streamMode: 'normal' as any,
+      hasInterruptibleToolInProgress: false,
+      isExternalLoading: false,
+      queuedCommands: [command!],
+    })
+
+    expect(params.getToolUseContext).not.toHaveBeenCalled()
+    expect(params.onQuery).not.toHaveBeenCalled()
+  })
 })
diff --git a/src/__tests__/queryAutonomyProviderBoundary.test.ts b/src/__tests__/queryAutonomyProviderBoundary.test.ts
new file mode 100644
index 0000000000..5da040c13b
--- /dev/null
+++ b/src/__tests__/queryAutonomyProviderBoundary.test.ts
@@ -0,0 +1,337 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { randomUUID } from 'crypto'
+import {
+  resetStateForTests,
+  setCwdState,
+  setOriginalCwd,
+  setProjectRoot,
+} from '../bootstrap/state'
+import { query } from '../query'
+import { getEmptyToolPermissionContext } from '../Tool'
+import type { AssistantMessage } from '../types/message'
+import { asSystemPrompt } from '../utils/systemPromptType'
+import {
+  createAssistantAPIErrorMessage,
+  createUserMessage,
+} from '../utils/messages'
+import { cleanupTempDir, createTempDir } from '../../tests/mocks/file-system'
+import {
+  enqueue,
+  getCommandsByMaxPriority,
+  resetCommandQueue,
+} from '../utils/messageQueueManager'
+import { getAutonomyFlowById, listAutonomyFlows } from '../utils/autonomyFlows'
+import {
+  getAutonomyRunById,
+  startManagedAutonomyFlowFromHeartbeatTask,
+} from '../utils/autonomyRuns'
+
+let tempDir = ''
+let originalProcessCwd = ''
+
+beforeEach(async () => {
+  originalProcessCwd = process.cwd()
+  tempDir = await createTempDir('query-autonomy-provider-boundary-')
+  resetStateForTests()
+  resetCommandQueue()
+  setOriginalCwd(tempDir)
+  setCwdState(tempDir)
+  setProjectRoot(tempDir)
+})
+
+afterEach(async () => {
+  resetStateForTests()
+  resetCommandQueue()
+  if (originalProcessCwd) {
+    process.chdir(originalProcessCwd)
+  }
+  if (tempDir) {
+    let lastError: unknown
+    for (let attempt = 0; attempt < 20; attempt++) {
+      try {
+        await cleanupTempDir(tempDir)
+        lastError = undefined
+        break
+      } catch (error) {
+        lastError = error
+        await new Promise(resolve => setTimeout(resolve, 100))
+      }
+    }
+    if (lastError) {
+      throw lastError
+    }
+  }
+})
+
+function createToolUseAssistantMessage(): AssistantMessage {
+  return {
+    type: 'assistant',
+    uuid: randomUUID(),
+    timestamp: new Date().toISOString(),
+    requestId: undefined,
+    message: {
+      id: 'msg_tool_use',
+      type: 'message',
+      role: 'assistant',
+      model: 'test-model',
+      stop_reason: 'tool_use',
+      stop_sequence: null,
+      usage: {
+        input_tokens: 1,
+        output_tokens: 1,
+        cache_creation_input_tokens: 0,
+        cache_read_input_tokens: 0,
+      },
+      content: [
+        {
+          type: 'tool_use',
+          id: 'toolu_provider_boundary',
+          name: 'MissingBoundaryTool',
+          input: {},
+        },
+      ],
+    },
+  } as unknown as AssistantMessage
+}
+
+function createToolUseContext(): any {
+  let inProgressToolUseIds = new Set<string>()
+  let responseLength = 0
+  let appState = {
+    toolPermissionContext: getEmptyToolPermissionContext(),
+    fastMode: false,
+    mcp: {
+      tools: [],
+      clients: [],
+    },
+    effortValue: undefined,
+    advisorModel: undefined,
+    sessionHooks: new Map(),
+  }
+
+  return {
+    options: {
+      commands: [],
+      debug: false,
+      mainLoopModel: 'claude-sonnet-4-5-20250929',
+      tools: [],
+      verbose: false,
+      thinkingConfig: { type: 'disabled' },
+      mcpClients: [],
+      mcpResources: {},
+      isNonInteractiveSession: true,
+      agentDefinitions: {
+        activeAgents: [],
+        allowedAgentTypes: [],
+      },
+    },
+    abortController: new AbortController(),
+    readFileState: new Map(),
+    getAppState: () => appState,
+    setAppState: (updater: (state: any) => any) => {
+      appState = updater(appState as never)
+    },
+    setInProgressToolUseIDs: (updater: (state: Set<string>) => Set<string>) => {
+      inProgressToolUseIds = updater(inProgressToolUseIds)
+    },
+    setResponseLength: (updater: (state: number) => number) => {
+      responseLength = updater(responseLength)
+    },
+    updateFileHistoryState: () => {},
+    updateAttributionState: () => {},
+    messages: [],
+  } as any
+}
+
+describe('query autonomy/provider boundary', () => {
+  test('provider api-error messages fail a consumed autonomy run instead of advancing the flow', async () => {
+    const previousDisableAttachments =
+      process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS
+    process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS = '1'
+    try {
+      const command = await startManagedAutonomyFlowFromHeartbeatTask({
+        task: {
+          name: 'provider-boundary',
+          interval: '1h',
+          prompt: 'Exercise provider boundary',
+          steps: [
+            { name: 'first', prompt: 'First provider-boundary step' },
+            { name: 'second', prompt: 'Second provider-boundary step' },
+          ],
+        },
+        rootDir: tempDir,
+        currentDir: tempDir,
+        priority: 'next',
+      })
+      expect(command).not.toBeNull()
+      enqueue(command!)
+
+      const toolUseContext = createToolUseContext()
+
+      let callCount = 0
+      const deps = {
+        uuid: () => 'query-chain-id',
+        microcompact: async (messages: unknown[]) => ({ messages }),
+        autocompact: async () => ({
+          compactionResult: undefined,
+          consecutiveFailures: 0,
+        }),
+        callModel: async function* () {
+          callCount += 1
+          if (callCount === 1) {
+            yield createToolUseAssistantMessage()
+            return
+          }
+          yield createAssistantAPIErrorMessage({
+            content: 'API Error: provider unavailable',
+            apiError: 'api_error',
+            error: new Error('provider unavailable') as never,
+          })
+        },
+      }
+
+      const emitted: any[] = []
+      const generator = query({
+        messages: [
+          createUserMessage({
+            content: 'start provider-boundary test',
+          }),
+        ],
+        systemPrompt: asSystemPrompt([]),
+        userContext: {},
+        systemContext: {},
+        canUseTool: async (_tool, input) => ({
+          behavior: 'allow',
+          updatedInput: input,
+        }),
+        toolUseContext,
+        querySource: 'sdk',
+        maxTurns: 3,
+        deps: deps as never,
+      })
+      let next = await generator.next()
+      while (!next.done) {
+        emitted.push(next.value)
+        next = await generator.next()
+      }
+
+      const [flow] = await listAutonomyFlows(tempDir)
+      const finalFlow = await getAutonomyFlowById(flow!.flowId, tempDir)
+      const run = await getAutonomyRunById(command!.autonomy!.runId, tempDir)
+
+      expect(next.value.reason).toBe('model_error')
+      expect(callCount).toBe(2)
+      expect(
+        emitted.some(
+          message =>
+            message.type === 'attachment' &&
+            message.attachment.type === 'queued_command',
+        ),
+      ).toBe(true)
+      expect(run!.status).toBe('failed')
+      expect(run!.error).toBe('provider api_error')
+      expect(finalFlow!.status).toBe('failed')
+      expect(finalFlow!.stateJson!.steps.map(step => step.status)).toEqual([
+        'failed',
+        'pending',
+      ])
+      expect(getCommandsByMaxPriority('later')).toHaveLength(0)
+    } finally {
+      if (previousDisableAttachments === undefined) {
+        delete process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS
+      } else {
+        process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS = previousDisableAttachments
+      }
+    }
+  })
+
+  test('generator return cancels a consumed autonomy run instead of leaving it running', async () => {
+    const previousDisableAttachments =
+      process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS
+    process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS = '1'
+    try {
+      const command = await startManagedAutonomyFlowFromHeartbeatTask({
+        task: {
+          name: 'return-boundary',
+          interval: '1h',
+          prompt: 'Exercise generator return boundary',
+          steps: [
+            { name: 'first', prompt: 'First return-boundary step' },
+            { name: 'second', prompt: 'Second return-boundary step' },
+          ],
+        },
+        rootDir: tempDir,
+        currentDir: tempDir,
+        priority: 'next',
+      })
+      expect(command).not.toBeNull()
+      enqueue(command!)
+
+      const toolUseContext = createToolUseContext()
+      const deps = {
+        uuid: () => 'query-chain-id',
+        microcompact: async (messages: unknown[]) => ({ messages }),
+        autocompact: async () => ({
+          compactionResult: undefined,
+          consecutiveFailures: 0,
+        }),
+        callModel: async function* () {
+          yield createToolUseAssistantMessage()
+        },
+      }
+
+      const generator = query({
+        messages: [
+          createUserMessage({
+            content: 'start return-boundary test',
+          }),
+        ],
+        systemPrompt: asSystemPrompt([]),
+        userContext: {},
+        systemContext: {},
+        canUseTool: async (_tool, input) => ({
+          behavior: 'allow',
+          updatedInput: input,
+        }),
+        toolUseContext,
+        querySource: 'sdk',
+        maxTurns: 3,
+        deps: deps as never,
+      })
+
+      let sawQueuedAttachment = false
+      let next = await generator.next()
+      while (!next.done) {
+        const message = next.value as any
+        if (
+          message.type === 'attachment' &&
+          message.attachment.type === 'queued_command'
+        ) {
+          sawQueuedAttachment = true
+          await generator.return(undefined as never)
+          break
+        }
+        next = await generator.next()
+      }
+
+      const [flow] = await listAutonomyFlows(tempDir)
+      const finalFlow = await getAutonomyFlowById(flow!.flowId, tempDir)
+      const run = await getAutonomyRunById(command!.autonomy!.runId, tempDir)
+
+      expect(sawQueuedAttachment).toBe(true)
+      expect(run!.status).toBe('cancelled')
+      expect(finalFlow!.status).toBe('cancelled')
+      expect(finalFlow!.stateJson!.steps.map(step => step.status)).toEqual([
+        'cancelled',
+        'cancelled',
+      ])
+      expect(getCommandsByMaxPriority('later')).toHaveLength(0)
+    } finally {
+      if (previousDisableAttachments === undefined) {
+        delete process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS
+      } else {
+        process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS = previousDisableAttachments
+      }
+    }
+  })
+})
diff --git a/src/cli/print.ts b/src/cli/print.ts
index c4e8c45697..8b0aedc462 100644
--- a/src/cli/print.ts
+++ b/src/cli/print.ts
@@ -321,16 +321,15 @@ import {
 } from 'src/utils/queryProfiler.js'
 import { asSessionId } from 'src/types/ids.js'
 import {
-  commitAutonomyQueuedPrompt,
-  createAutonomyQueuedPrompt,
+  createAutonomyQueuedPromptIfNoActiveSource,
   createProactiveAutonomyCommands,
-  finalizeAutonomyRunCompleted,
-  finalizeAutonomyRunFailed,
-  markAutonomyRunCompleted,
   markAutonomyRunFailed,
-  markAutonomyRunRunning,
 } from 'src/utils/autonomyRuns.js'
-import { prepareAutonomyTurnPrompt } from 'src/utils/autonomyAuthority.js'
+import {
+  cancelQueuedAutonomyCommands,
+  claimConsumableQueuedAutonomyCommands,
+  finalizeAutonomyCommandsForTurn,
+} from 'src/utils/autonomyQueueLifecycle.js'
 import { jsonStringify } from '../utils/slowOperations.js'
 import { skillChangeDetector } from '../utils/skills/skillChangeDetector.js'
 import { getCommands, clearCommandsCache } from '../commands.js'
@@ -1865,17 +1864,26 @@ function runHeadlessStreaming(
                 currentDir: cwd(),
                 shouldCreate: () => !inputClosed,
               })
+              if (inputClosed) {
+                await cancelQueuedAutonomyCommands({ commands })
+                return
+              }
               for (const command of commands) {
-                if (inputClosed) {
-                  return
-                }
                 enqueue({
                   ...command,
                   uuid: randomUUID(),
                 })
               }
               void run()
-            })()
+            })().catch(error => {
+              logError(error)
+              logForDebugging(
+                `[Proactive] failed to create headless tick: ${error}`,
+                {
+                  level: 'error',
+                },
+              )
+            })
           }, 0)
         }
       : undefined
@@ -1971,17 +1979,24 @@ function runHeadlessStreaming(
           // Non-prompt commands (task-notification, orphaned-permission) carry
           // side effects or orphanedPermission state, so they process singly.
           // Prompt commands greedily collect followers with matching workload.
-          const batch: QueuedCommand[] = [command]
+          let batch: QueuedCommand[] = [command]
           if (command.mode === 'prompt') {
             while (canBatchWith(command, peek(isMainThread))) {
               batch.push(dequeue(isMainThread)!)
             }
-            if (batch.length > 1) {
-              command = {
-                ...command,
-                value: joinPromptValues(batch.map(c => c.value)),
-                uuid: batch.findLast(c => c.uuid)?.uuid ?? command.uuid,
-              }
+          }
+          const queuedAutonomyClaim =
+            await claimConsumableQueuedAutonomyCommands(batch)
+          batch = queuedAutonomyClaim.attachmentCommands
+          if (batch.length === 0) {
+            continue
+          }
+          command = batch[0]!
+          if (command.mode === 'prompt' && batch.length > 1) {
+            command = {
+              ...command,
+              value: joinPromptValues(batch.map(c => c.value)),
+              uuid: batch.findLast(c => c.uuid)?.uuid ?? command.uuid,
             }
           }
           const batchUuids = batch.map(c => c.uuid).filter(u => u !== undefined)
@@ -2120,9 +2135,7 @@ function runHeadlessStreaming(
           }
 
           const input = command.value
-          const autonomyRunIds = batch
-            .map(item => item.autonomy?.runId)
-            .filter((runId): runId is string => Boolean(runId))
+          const claimedAutonomyCommands = queuedAutonomyClaim.claimedCommands
 
           if (structuredIO instanceof RemoteIO && command.mode === 'prompt') {
             logEvent('tengu_bridge_message_received', {
@@ -2172,9 +2185,6 @@ function runHeadlessStreaming(
           // const-capture: TS loses `while ((command = dequeue()))` narrowing
           // inside the closure.
           const cmd = command
-          for (const runId of autonomyRunIds) {
-            await markAutonomyRunRunning(runId)
-          }
           let lastResultIsError = false
           try {
             await runWithWorkload(
@@ -2286,35 +2296,39 @@ function runHeadlessStreaming(
               },
             ) // end runWithWorkload
             if (lastResultIsError) {
-              for (const runId of autonomyRunIds) {
-                await finalizeAutonomyRunFailed({
-                  runId,
-                  error: 'ask() returned an error result',
-                })
-              }
+              await finalizeAutonomyCommandsForTurn({
+                commands: claimedAutonomyCommands,
+                outcome: {
+                  type: 'failed',
+                  message: 'ask() returned an error result',
+                },
+                currentDir: cwd(),
+                priority: 'later',
+                workload: cmd.workload ?? options.workload,
+              })
             } else {
-              for (const runId of autonomyRunIds) {
-                const nextCommands = await finalizeAutonomyRunCompleted({
-                  runId,
-                  currentDir: cwd(),
-                  priority: 'later',
-                  workload: cmd.workload ?? options.workload,
+              const nextCommands = await finalizeAutonomyCommandsForTurn({
+                commands: claimedAutonomyCommands,
+                outcome: { type: 'completed' },
+                currentDir: cwd(),
+                priority: 'later',
+                workload: cmd.workload ?? options.workload,
+              })
+              for (const nextCommand of nextCommands) {
+                enqueue({
+                  ...nextCommand,
+                  uuid: randomUUID(),
                 })
-                for (const nextCommand of nextCommands) {
-                  enqueue({
-                    ...nextCommand,
-                    uuid: randomUUID(),
-                  })
-                }
               }
             }
           } catch (error) {
-            for (const runId of autonomyRunIds) {
-              await finalizeAutonomyRunFailed({
-                runId,
-                error: String(error),
-              })
-            }
+            await finalizeAutonomyCommandsForTurn({
+              commands: claimedAutonomyCommands,
+              outcome: { type: 'failed', error },
+              currentDir: cwd(),
+              priority: 'later',
+              workload: cmd.workload ?? options.workload,
+            })
             throw error
           }
 
@@ -2805,72 +2819,90 @@ function runHeadlessStreaming(
   let cronScheduler: import('../utils/cronScheduler.js').CronScheduler | null =
     null
   if (cronGate.isKairosCronEnabled()) {
+    // Shared dedup-claim → input-close-recheck → onSuccess pipeline for the
+    // three cron entry points (legacy onFire, onFireTask agent, onFireTask
+    // non-agent). Centralizing the cancel-on-late-shutdown contract here keeps
+    // the three branches from drifting on what happens between claim and
+    // dispatch. onSuccess receives the claimed QueuedCommand and decides
+    // whether to enqueue it (normal path) or mark the run failed (agent path).
+    const dispatchHeadlessCronCommand = (params: {
+      basePrompt: string
+      sourceId: string
+      sourceLabel: string
+      logSuffix: string
+      onSuccess: (command: QueuedCommand) => void | Promise<void>
+    }): void => {
+      if (inputClosed) return
+      void (async () => {
+        const command = await createAutonomyQueuedPromptIfNoActiveSource({
+          basePrompt: params.basePrompt,
+          trigger: 'scheduled-task',
+          currentDir: cwd(),
+          sourceId: params.sourceId,
+          sourceLabel: params.sourceLabel,
+          workload: WORKLOAD_CRON,
+          shouldCreate: () => !inputClosed,
+        })
+        if (!command) return
+        if (inputClosed) {
+          await cancelQueuedAutonomyCommands({ commands: [command] })
+          return
+        }
+        await params.onSuccess(command)
+      })().catch(error => {
+        logError(error)
+        logForDebugging(
+          `[ScheduledTasks] failed to enqueue headless task${params.logSuffix}: ${error}`,
+          { level: 'error' },
+        )
+      })
+    }
+
+    const enqueueAndRun = (command: QueuedCommand): void => {
+      enqueue({
+        ...command,
+        uuid: randomUUID(),
+      })
+      void run()
+    }
+
     cronScheduler = cronSchedulerModule.createCronScheduler({
       onFire: prompt => {
-        if (inputClosed) return
-        void (async () => {
-          const prepared = await prepareAutonomyTurnPrompt({
-            basePrompt: prompt,
-            trigger: 'scheduled-task',
-            currentDir: cwd(),
-          })
-          if (inputClosed) return
-          const command = await commitAutonomyQueuedPrompt({
-            prepared,
-            currentDir: cwd(),
-            workload: WORKLOAD_CRON,
-          })
-          if (inputClosed) return
-          enqueue({
-            ...command,
-            uuid: randomUUID(),
-          })
-          void run()
-        })()
+        // Legacy KAIROS-style entries: the prompt text is what uniquely
+        // identifies the cron entry, so it doubles as both source id and
+        // source label for dedup.
+        dispatchHeadlessCronCommand({
+          basePrompt: prompt,
+          sourceId: prompt,
+          sourceLabel: prompt,
+          logSuffix: '',
+          onSuccess: enqueueAndRun,
+        })
       },
       onFireTask: task => {
-        if (inputClosed) return
-        void (async () => {
-          if (task.agentId) {
-            const prepared = await prepareAutonomyTurnPrompt({
-              basePrompt: task.prompt,
-              trigger: 'scheduled-task',
-              currentDir: cwd(),
-            })
-            if (inputClosed) return
-            const command = await commitAutonomyQueuedPrompt({
-              prepared,
-              currentDir: cwd(),
-              sourceId: task.id,
-              sourceLabel: task.prompt,
-              workload: WORKLOAD_CRON,
-            })
-            await markAutonomyRunFailed(
-              command.autonomy!.runId,
-              `No teammate runtime available for scheduled task owner ${task.agentId} in headless mode.`,
-            )
-            return
-          }
-          const prepared = await prepareAutonomyTurnPrompt({
+        if (task.agentId) {
+          dispatchHeadlessCronCommand({
             basePrompt: task.prompt,
-            trigger: 'scheduled-task',
-            currentDir: cwd(),
-          })
-          if (inputClosed) return
-          const command = await commitAutonomyQueuedPrompt({
-            prepared,
-            currentDir: cwd(),
             sourceId: task.id,
             sourceLabel: task.prompt,
-            workload: WORKLOAD_CRON,
-          })
-          if (inputClosed) return
-          enqueue({
-            ...command,
-            uuid: randomUUID(),
+            logSuffix: ` ${task.id}`,
+            onSuccess: async command => {
+              await markAutonomyRunFailed(
+                command.autonomy!.runId,
+                `No teammate runtime available for scheduled task owner ${task.agentId} in headless mode.`,
+                command.autonomy!.rootDir,
+              )
+            },
           })
-          void run()
-        })()
+          return
+        }
+        dispatchHeadlessCronCommand({
+          basePrompt: task.prompt,
+          sourceId: task.id,
+          sourceLabel: task.prompt,
+          logSuffix: ` ${task.id}`,
+          onSuccess: enqueueAndRun,
+        })
       },
       isLoading: () => running || inputClosed,
       getJitterConfig: cronJitterConfigModule?.getCronJitterConfig,
diff --git a/src/commands/skill-learning/index.ts b/src/commands/skill-learning/index.ts
index a5afb655d1..6fff9c5276 100644
--- a/src/commands/skill-learning/index.ts
+++ b/src/commands/skill-learning/index.ts
@@ -1,5 +1,5 @@
 import type { Command } from '../../commands.js'
-import { isSkillLearningEnabled } from '../../services/skillLearning/featureCheck.js'
+import { isSkillLearningCompiledIn } from '../../services/skillLearning/featureCheck.js'
 
 const skillLearning = {
   type: 'local-jsx',
@@ -7,7 +7,10 @@ const skillLearning = {
   description: 'Manage skill learning (observe, analyze, evolve)',
   argumentHint:
     '[start|stop|about|status|ingest|evolve|export|import|prune|promote|projects]',
-  isEnabled: () => isSkillLearningEnabled(),
+  // The slash command is visible whenever the subsystem is compiled in.
+  // Whether the runtime feature is actually doing work is a separate
+  // concern controlled by `/skill-learning start` (see featureCheck.ts).
+  isEnabled: () => isSkillLearningCompiledIn(),
   isHidden: false,
   load: () => import('./skillPanel.js'),
 } satisfies Command
diff --git a/src/commands/skill-search/index.ts b/src/commands/skill-search/index.ts
index e3c35aea07..814a6af137 100644
--- a/src/commands/skill-search/index.ts
+++ b/src/commands/skill-search/index.ts
@@ -1,10 +1,14 @@
 import type { Command } from '../../commands.js'
+import { isSkillSearchCompiledIn } from '../../services/skillSearch/featureCheck.js'
 
 const skillSearch = {
   type: 'local-jsx',
   name: 'skill-search',
   description: 'Control automatic skill matching during conversations',
   argumentHint: '[start|stop|about|status]',
+  // Visible whenever the subsystem is compiled in (build flag); runtime
+  // activation is separate and operator-controlled via /skill-search start.
+  isEnabled: () => isSkillSearchCompiledIn(),
   isHidden: false,
   load: () => import('./skillSearchPanel.js'),
 } satisfies Command
diff --git a/src/daemon/main.ts b/src/daemon/main.ts
index 513103e9ae..0d3855ddb2 100644
--- a/src/daemon/main.ts
+++ b/src/daemon/main.ts
@@ -30,6 +30,7 @@ interface WorkerState {
   failureCount: number
   parked: boolean
   lastStartTime: number
+  restartTimer: ReturnType<typeof setTimeout> | null
 }
 
 /**
@@ -241,6 +242,7 @@ async function runSupervisor(args: string[]): Promise<void> {
       failureCount: 0,
       parked: false,
       lastStartTime: 0,
+      restartTimer: null,
     },
   ]
 
@@ -261,6 +263,10 @@ async function runSupervisor(args: string[]): Promise<void> {
     controller.abort()
     removeDaemonState()
     for (const w of workers) {
+      if (w.restartTimer) {
+        clearTimeout(w.restartTimer)
+        w.restartTimer = null
+      }
       if (w.process && !w.process.killed) {
         w.process.kill('SIGTERM')
       }
@@ -288,22 +294,30 @@ async function runSupervisor(args: string[]): Promise<void> {
   // Wait for all workers to exit
   await Promise.all(
     workers
-      .filter(w => w.process && !w.process.killed)
+      .filter(w => w.process && w.process.exitCode === null)
       .map(
         w =>
           new Promise<void>(resolve => {
-            if (!w.process) {
+            if (!w.process || w.process.exitCode !== null) {
               resolve()
               return
             }
-            w.process.on('exit', () => resolve())
+            let killTimer: ReturnType<typeof setTimeout> | null = null
+            w.process.on('exit', () => {
+              if (killTimer) {
+                clearTimeout(killTimer)
+                killTimer = null
+              }
+              resolve()
+            })
             // Force kill after grace period
-            setTimeout(() => {
-              if (w.process && !w.process.killed) {
+            killTimer = setTimeout(() => {
+              if (w.process && w.process.exitCode === null) {
                 w.process.kill('SIGKILL')
               }
               resolve()
             }, 30_000)
+            killTimer.unref?.()
           }),
       ),
   )
@@ -398,11 +412,13 @@ function spawnWorker(
       `[daemon] worker '${worker.kind}' exited (code=${code}, signal=${sig}), restarting in ${worker.backoffMs}ms`,
     )
 
-    setTimeout(() => {
+    worker.restartTimer = setTimeout(() => {
+      worker.restartTimer = null
       if (!signal.aborted && !worker.parked) {
         spawnWorker(worker, dir, config, signal)
       }
     }, worker.backoffMs)
+    worker.restartTimer.unref?.()
 
     // Exponential backoff
     worker.backoffMs = Math.min(
diff --git a/src/entrypoints/cli.tsx b/src/entrypoints/cli.tsx
index 844c4d7109..a535f35683 100644
--- a/src/entrypoints/cli.tsx
+++ b/src/entrypoints/cli.tsx
@@ -255,6 +255,29 @@ async function main(): Promise<void> {
     return
   }
 
+  // Fast-path for `claude autonomy ...`: state inspection/management commands
+  // do not need the full interactive CLI bootstrap. The full Commander path
+  // imports main.tsx and runs root preAction initialization before the autonomy
+  // action; under coverage/CI that leaves unrelated handles around simple
+  // state-only subprocess calls.
+  if (args[0] === 'autonomy') {
+    profileCheckpoint('cli_autonomy_path')
+    const { getAutonomyCommandText } = await import(
+      '../cli/handlers/autonomy.js'
+    )
+    const text = await getAutonomyCommandText(args.slice(1).join(' '))
+    await new Promise<void>((resolve, reject) => {
+      process.stdout.write(`${text}\n`, error => {
+        if (error) {
+          reject(error)
+          return
+        }
+        resolve()
+      })
+    })
+    process.exit(0)
+  }
+
   // Fast-path for `--bg`/`--background` shortcut → daemon bg.
   if (
     feature('BG_SESSIONS') &&
@@ -398,4 +421,4 @@ async function main(): Promise<void> {
 }
 
 // eslint-disable-next-line custom-rules/no-top-level-side-effects
-void main()
+await main()
diff --git a/src/hooks/__tests__/useScheduledTasks.test.ts b/src/hooks/__tests__/useScheduledTasks.test.ts
new file mode 100644
index 0000000000..ce6b1f966a
--- /dev/null
+++ b/src/hooks/__tests__/useScheduledTasks.test.ts
@@ -0,0 +1,80 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import {
+  resetStateForTests,
+  setCwdState,
+  setOriginalCwd,
+  setProjectRoot,
+} from '../../bootstrap/state'
+import { createScheduledTaskQueuedCommand } from '../useScheduledTasks'
+import {
+  listAutonomyRuns,
+  markAutonomyRunCompleted,
+} from '../../utils/autonomyRuns'
+import { resetAutonomyAuthorityForTests } from '../../utils/autonomyAuthority'
+import { cleanupTempDir, createTempDir } from '../../../tests/mocks/file-system'
+
+let tempDir = ''
+
+beforeEach(async () => {
+  tempDir = await createTempDir('scheduled-tasks-')
+  resetStateForTests()
+  resetAutonomyAuthorityForTests()
+  setOriginalCwd(tempDir)
+  setProjectRoot(tempDir)
+  setCwdState(tempDir)
+})
+
+afterEach(async () => {
+  resetStateForTests()
+  resetAutonomyAuthorityForTests()
+  if (tempDir) {
+    await cleanupTempDir(tempDir)
+  }
+})
+
+describe('createScheduledTaskQueuedCommand', () => {
+  function createCommandForTest(task: { id: string; prompt: string }) {
+    return createScheduledTaskQueuedCommand(task, {
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+  }
+
+  test('skips a scheduled task when the same source already has an active run', async () => {
+    const task = {
+      id: 'cron-1',
+      prompt: '/loop review the repository',
+    }
+
+    const first = await createCommandForTest(task)
+    const second = await createCommandForTest(task)
+    const runs = await listAutonomyRuns(tempDir)
+
+    expect(first).not.toBeNull()
+    expect(second).toBeNull()
+    expect(runs).toHaveLength(1)
+    expect(runs[0]).toMatchObject({
+      trigger: 'scheduled-task',
+      status: 'queued',
+      sourceId: 'cron-1',
+    })
+  })
+
+  test('allows a scheduled task after the previous same-source run completes', async () => {
+    const task = {
+      id: 'cron-1',
+      prompt: '/loop review the repository',
+    }
+
+    const first = await createCommandForTest(task)
+    expect(first?.autonomy?.runId).toBeDefined()
+
+    await markAutonomyRunCompleted(first!.autonomy!.runId, tempDir, 100)
+    const second = await createCommandForTest(task)
+    const runs = await listAutonomyRuns(tempDir)
+
+    expect(second).not.toBeNull()
+    expect(runs).toHaveLength(2)
+    expect(runs.map(run => run.status).sort()).toEqual(['completed', 'queued'])
+  })
+})
diff --git a/src/hooks/useReplBridge.tsx b/src/hooks/useReplBridge.tsx
index df9669e2e0..fb05c1c948 100644
--- a/src/hooks/useReplBridge.tsx
+++ b/src/hooks/useReplBridge.tsx
@@ -189,12 +189,6 @@ export function useReplBridge(
       }
 
       let cancelled = false
-      // Map of pending bridge permission response handlers, keyed by request_id.
-      // Defined at useEffect scope so the cleanup function can clear it on unmount.
-      const pendingPermissionHandlers = new Map<
-        string,
-        (response: BridgePermissionResponse) => void
-      >()
       // Capture messages.length now so we don't re-send initial messages
       // through writeMessages after the bridge connects.
       const initialMessageCount = messages.length
@@ -467,6 +461,13 @@ export function useReplBridge(
             }
           }
 
+          // Map of pending bridge permission response handlers, keyed by request_id.
+          // Each entry is an onResponse handler waiting for CCR to reply.
+          const pendingPermissionHandlers = new Map<
+            string,
+            (response: BridgePermissionResponse) => void
+          >()
+
           // Dispatch incoming control_response messages to registered handlers
           function handlePermissionResponse(msg: SDKControlResponse): void {
             const requestId = msg.response?.request_id
@@ -817,10 +818,6 @@ export function useReplBridge(
 
       return () => {
         cancelled = true
-        // Release all pending permission handlers so their closures (which
-        // may capture React state/setters) can be GC'd immediately rather
-        // than waiting for the entire useEffect closure to become unreachable.
-        pendingPermissionHandlers.clear()
         clearTimeout(failureTimeoutRef.current)
         failureTimeoutRef.current = undefined
         if (handleRef.current) {
diff --git a/src/hooks/useScheduledTasks.ts b/src/hooks/useScheduledTasks.ts
index e1dadbe851..5013270b2b 100644
--- a/src/hooks/useScheduledTasks.ts
+++ b/src/hooks/useScheduledTasks.ts
@@ -10,13 +10,18 @@ import type { Message } from '../types/message.js'
 import { getCwd } from '../utils/cwd.js'
 import { getCronJitterConfig } from '../utils/cronJitterConfig.js'
 import { createCronScheduler } from '../utils/cronScheduler.js'
-import { removeCronTasks } from '../utils/cronTasks.js'
-import { createAutonomyQueuedPrompt } from '../utils/autonomyRuns.js'
-import { markAutonomyRunFailed } from '../utils/autonomyRuns.js'
+import { removeCronTasks, type CronTask } from '../utils/cronTasks.js'
+import {
+  createAutonomyQueuedPrompt,
+  createAutonomyQueuedPromptIfNoActiveSource,
+  markAutonomyRunCancelled,
+  markAutonomyRunFailed,
+} from '../utils/autonomyRuns.js'
 import { logForDebugging } from '../utils/debug.js'
 import { enqueuePendingNotification } from '../utils/messageQueueManager.js'
 import { createScheduledTaskFireMessage } from '../utils/messages.js'
 import { WORKLOAD_CRON } from '../utils/workloadContext.js'
+import type { QueuedCommand } from '../types/textInputTypes.js'
 
 type Props = {
   isLoading: boolean
@@ -32,6 +37,32 @@ type Props = {
   setMessages: React.Dispatch<React.SetStateAction<Message[]>>
 }
 
+export async function createScheduledTaskQueuedCommand(
+  task: Pick<CronTask, 'id' | 'prompt'>,
+  options?: {
+    rootDir?: string
+    currentDir?: string
+    shouldCreate?: () => boolean
+  },
+): Promise<QueuedCommand | null> {
+  const command = await createAutonomyQueuedPromptIfNoActiveSource({
+    basePrompt: task.prompt,
+    trigger: 'scheduled-task',
+    rootDir: options?.rootDir,
+    currentDir: options?.currentDir ?? getCwd(),
+    sourceId: task.id,
+    sourceLabel: task.prompt,
+    workload: WORKLOAD_CRON,
+    shouldCreate: options?.shouldCreate,
+  })
+  if (!command) {
+    logForDebugging(
+      `[ScheduledTasks] skipping ${task.id}: previous run still queued or running`,
+    )
+  }
+  return command
+}
+
 /**
  * REPL wrapper for the cron scheduler. Mounts the scheduler once and tears
  * it down on unmount. Fired prompts go into the command queue as 'later'
@@ -71,16 +102,25 @@ export function useScheduledTasks({
     // forward isMeta, so their messages remain visible in the
     // transcript. This is acceptable since normal mode is not the
     // primary use case for scheduled tasks.
+    let disposed = false
     const enqueueForLead = async (prompt: string) => {
       const command = await createAutonomyQueuedPrompt({
         basePrompt: prompt,
         trigger: 'scheduled-task',
         currentDir: getCwd(),
         workload: WORKLOAD_CRON,
+        shouldCreate: () => !disposed,
       })
       if (!command) {
         return
       }
+      if (disposed) {
+        await markAutonomyRunCancelled(
+          command.autonomy!.runId,
+          command.autonomy!.rootDir,
+        )
+        return
+      }
       enqueuePendingNotification(command)
     }
 
@@ -90,7 +130,12 @@ export function useScheduledTasks({
       // which is populated from disk at scheduler startup — this path only
       // handles team-lead durable crons.
       onFire: prompt => {
-        void enqueueForLead(prompt)
+        void enqueueForLead(prompt).catch(error =>
+          logForDebugging(
+            `[ScheduledTasks] failed to enqueue missed task prompt: ${error}`,
+            { level: 'error' },
+          ),
+        )
       },
       // Normal fires receive the full CronTask so we can route by agentId.
       onFireTask: task => {
@@ -101,22 +146,26 @@ export function useScheduledTasks({
               store.getState().tasks,
             )
             if (teammate && !isTerminalTaskStatus(teammate.status)) {
-              const command = await createAutonomyQueuedPrompt({
-                basePrompt: task.prompt,
-                trigger: 'scheduled-task',
-                currentDir: getCwd(),
-                sourceId: task.id,
-                sourceLabel: task.prompt,
-                workload: WORKLOAD_CRON,
-              })
+              const command = await createScheduledTaskQueuedCommand(
+                task,
+                { shouldCreate: () => !disposed },
+              )
               if (!command) {
                 return
               }
+              if (disposed) {
+                await markAutonomyRunCancelled(
+                  command.autonomy!.runId,
+                  command.autonomy!.rootDir,
+                )
+                return
+              }
               const injected = injectUserMessageToTeammate(
                 teammate.id,
                 command.value as string,
                 {
                   autonomyRunId: command.autonomy?.runId,
+                  autonomyRootDir: command.autonomy?.rootDir,
                   origin: command.origin,
                 },
                 setAppState,
@@ -125,6 +174,7 @@ export function useScheduledTasks({
                 await markAutonomyRunFailed(
                   command.autonomy.runId,
                   `Teammate ${task.agentId} exited before the scheduled message could be delivered.`,
+                  command.autonomy.rootDir,
                 )
               }
               return
@@ -139,24 +189,32 @@ export function useScheduledTasks({
             return
           }
 
-          const command = await createAutonomyQueuedPrompt({
-            basePrompt: task.prompt,
-            trigger: 'scheduled-task',
-            currentDir: getCwd(),
-            sourceId: task.id,
-            sourceLabel: task.prompt,
-            workload: WORKLOAD_CRON,
-          })
+          const command = await createScheduledTaskQueuedCommand(
+            task,
+            { shouldCreate: () => !disposed },
+          )
           if (!command) {
             return
           }
+          if (disposed) {
+            await markAutonomyRunCancelled(
+              command.autonomy!.runId,
+              command.autonomy!.rootDir,
+            )
+            return
+          }
 
           const msg = createScheduledTaskFireMessage(
             `Running scheduled task (${formatCronFireTime(new Date())})`,
           )
           setMessages(prev => [...prev, msg])
           enqueuePendingNotification(command)
-        })()
+        })().catch(error =>
+          logForDebugging(
+            `[ScheduledTasks] failed to enqueue task ${task.id}: ${error}`,
+            { level: 'error' },
+          ),
+        )
       },
       isLoading: () => isLoadingRef.current,
       assistantMode,
@@ -164,7 +222,10 @@ export function useScheduledTasks({
       isKilled: () => !isKairosCronEnabled(),
     })
     scheduler.start()
-    return () => scheduler.stop()
+    return () => {
+      disposed = true
+      scheduler.stop()
+    }
     // assistantMode is stable for the session lifetime; store/setAppState are
     // stable refs from useSyncExternalStore; setMessages is a stable useCallback.
     // eslint-disable-next-line react-hooks/exhaustive-deps
diff --git a/src/proactive/useProactive.ts b/src/proactive/useProactive.ts
index aa79ef7584..2853725fa5 100644
--- a/src/proactive/useProactive.ts
+++ b/src/proactive/useProactive.ts
@@ -9,7 +9,9 @@ import { useEffect, useRef } from 'react'
 import type { QueuedCommand } from '../types/textInputTypes.js'
 import { TICK_TAG } from '../constants/xml.js'
 import { getCwd } from '../utils/cwd.js'
+import { cancelQueuedAutonomyCommands } from '../utils/autonomyQueueLifecycle.js'
 import { createProactiveAutonomyCommands } from '../utils/autonomyRuns.js'
+import { logForDebugging } from '../utils/debug.js'
 import {
   isProactiveActive,
   isProactivePaused,
@@ -38,6 +40,8 @@ export function useProactive(opts: UseProactiveOpts): void {
     if (!isProactiveActive()) return
 
     let timer: ReturnType<typeof setTimeout> | null = null
+    let disposed = false
+    let generating = false
 
     function scheduleTick(): void {
       const nextTs = Date.now() + TICK_INTERVAL_MS
@@ -66,25 +70,51 @@ export function useProactive(opts: UseProactiveOpts): void {
           isLoading ||
           isInPlanMode ||
           hasActiveLocalJsxUI ||
-          queuedCommandsLength > 0
+          queuedCommandsLength > 0 ||
+          generating
         ) {
           scheduleTick()
           return
         }
 
+        generating = true
         void (async () => {
           const commands = await createProactiveAutonomyCommands({
             basePrompt: `<${TICK_TAG}>${new Date().toLocaleTimeString()}</${TICK_TAG}>`,
             currentDir: getCwd(),
+            shouldCreate: () => !disposed,
           })
-          for (const command of commands) {
-            // Always queue proactive turns. This avoids races where the prompt
-            // is built asynchronously, a user turn starts meanwhile, and a
-            // direct-submit path would silently drop the autonomy turn after
-            // consuming its heartbeat due-state.
-            optsRef.current.onQueueTick(command)
+          if (disposed) {
+            await cancelQueuedAutonomyCommands({ commands })
+            return
+          }
+          const queuedCommands: QueuedCommand[] = []
+          try {
+            for (const command of commands) {
+              // Always queue proactive turns. This avoids races where the prompt
+              // is built asynchronously, a user turn starts meanwhile, and a
+              // direct-submit path would silently drop the autonomy turn after
+              // consuming its heartbeat due-state.
+              optsRef.current.onQueueTick(command)
+              queuedCommands.push(command)
+            }
+          } catch (error) {
+            await cancelQueuedAutonomyCommands({
+              commands: commands.filter(
+                command => !queuedCommands.includes(command),
+              ),
+            })
+            throw error
           }
         })()
+          .catch(error =>
+            logForDebugging(`[Proactive] failed to create tick: ${error}`, {
+              level: 'error',
+            }),
+          )
+          .finally(() => {
+            generating = false
+          })
 
         // Schedule next tick
         scheduleTick()
@@ -94,6 +124,7 @@ export function useProactive(opts: UseProactiveOpts): void {
     scheduleTick()
 
     return () => {
+      disposed = true
       if (timer !== null) {
         clearTimeout(timer)
         timer = null
diff --git a/src/query.ts b/src/query.ts
index fc7830727a..b7bce909c1 100644
--- a/src/query.ts
+++ b/src/query.ts
@@ -71,10 +71,16 @@ const jobClassifier = feature('TEMPLATES')
   : null
 /* eslint-enable @typescript-eslint/no-require-imports */
 import {
+  enqueue,
   remove as removeFromQueue,
   getCommandsByMaxPriority,
   isSlashCommand,
 } from './utils/messageQueueManager.js'
+import {
+  type AutonomyTurnOutcome,
+  claimConsumableQueuedAutonomyCommands,
+  finalizeAutonomyCommandsForTurn,
+} from './utils/autonomyQueueLifecycle.js'
 import { notifyCommandLifecycle } from './utils/commandLifecycle.js'
 import { headlessProfilerCheckpoint } from './utils/headlessProfiler.js'
 import {
@@ -92,6 +98,7 @@ import { SLEEP_TOOL_NAME } from '@claude-code-best/builtin-tools/tools/SleepTool
 import { executePostSamplingHooks } from './utils/hooks/postSamplingHooks.js'
 import { executeStopFailureHooks } from './utils/hooks.js'
 import type { QuerySource } from './constants/querySource.js'
+import type { QueuedCommand } from './types/textInputTypes.js'
 import { createDumpPromptsFetch } from './services/api/dumpPrompts.js'
 import { StreamingToolExecutor } from './services/tools/StreamingToolExecutor.js'
 import { queryCheckpoint } from './utils/queryProfiler.js'
@@ -111,7 +118,11 @@ import {
 } from './bootstrap/state.js'
 import { createBudgetTracker, checkTokenBudget } from './query/tokenBudget.js'
 import { count } from './utils/array.js'
-import { createTrace, endTrace, isLangfuseEnabled } from './services/langfuse/index.js'
+import {
+  createTrace,
+  endTrace,
+  isLangfuseEnabled,
+} from './services/langfuse/index.js'
 import { getAPIProvider } from './utils/model/providers.js'
 
 /* eslint-disable @typescript-eslint/no-require-imports */
@@ -129,7 +140,11 @@ function* yieldMissingToolResultBlocks(
 ) {
   for (const assistantMessage of assistantMessages) {
     // Extract all tool use blocks from this assistant message
-    const toolUseBlocks = (Array.isArray(assistantMessage.message?.content) ? assistantMessage.message.content : []).filter(
+    const toolUseBlocks = (
+      Array.isArray(assistantMessage.message?.content)
+        ? assistantMessage.message.content
+        : []
+    ).filter(
       (content: { type: string }) => content.type === 'tool_use',
     ) as ToolUseBlock[]
 
@@ -181,6 +196,33 @@ function isWithheldMaxOutputTokens(
   return msg?.type === 'assistant' && msg.apiError === 'max_output_tokens'
 }
 
+function getAutonomyTurnOutcome(params: {
+  terminal?: Terminal
+  thrownError?: unknown
+}): AutonomyTurnOutcome {
+  if (params.thrownError !== undefined) {
+    return { type: 'failed', error: params.thrownError }
+  }
+
+  const terminal = params.terminal
+  const reason = terminal?.reason
+  switch (reason) {
+    case 'completed':
+      return { type: 'completed' }
+    case undefined:
+    case 'aborted_streaming':
+    case 'aborted_tools':
+      return { type: 'cancelled' }
+    case 'model_error':
+      return { type: 'failed', error: terminal.error }
+    default:
+      return {
+        type: 'failed',
+        message: `query ended without successful completion: ${reason}`,
+      }
+  }
+}
+
 export type QueryParams = {
   messages: Message[]
   systemPrompt: SystemPrompt
@@ -230,6 +272,7 @@ export async function* query(
   Terminal
 > {
   const consumedCommandUuids: string[] = []
+  const consumedAutonomyCommands: QueuedCommand[] = []
 
   // Create Langfuse trace for this query turn (no-op if not configured).
   // When called as a sub-agent, langfuseTrace is already set by runAgent()
@@ -238,8 +281,9 @@ export async function* query(
   logForDebugging(
     `[query] ownsTrace=${ownsTrace} incoming langfuseTrace=${params.toolUseContext.langfuseTrace ? 'present' : 'null/undefined'} isLangfuseEnabled=${isLangfuseEnabled()}`,
   )
-  const langfuseTrace = params.toolUseContext.langfuseTrace
-    ?? (isLangfuseEnabled()
+  const langfuseTrace =
+    params.toolUseContext.langfuseTrace ??
+    (isLangfuseEnabled()
       ? createTrace({
           sessionId: getSessionId(),
           model: params.toolUseContext.options.mainLoopModel,
@@ -258,9 +302,34 @@ export async function* query(
     : params
 
   let terminal: Terminal | undefined
+  let didThrow = false
+  let thrownError: unknown
   try {
-    terminal = yield* queryLoop(paramsWithTrace, consumedCommandUuids)
+    terminal = yield* queryLoop(
+      paramsWithTrace,
+      consumedCommandUuids,
+      consumedAutonomyCommands,
+    )
+  } catch (error) {
+    didThrow = true
+    thrownError = error
+    throw error
   } finally {
+    await finalizeAutonomyCommandsForTurn({
+      commands: consumedAutonomyCommands,
+      outcome: getAutonomyTurnOutcome({
+        terminal,
+        ...(didThrow ? { thrownError } : {}),
+      }),
+      priority: 'later',
+    })
+      .then(nextCommands => {
+        for (const command of nextCommands) {
+          enqueue(command)
+        }
+      })
+      .catch(logError)
+
     // Only end the trace if we created it — sub-agents own their traces
     if (ownsTrace) {
       const isAborted =
@@ -283,6 +352,7 @@ export async function* query(
 async function* queryLoop(
   params: QueryParams,
   consumedCommandUuids: string[],
+  consumedAutonomyCommands: QueuedCommand[],
 ): AsyncGenerator<
   | StreamEvent
   | RequestStartEvent
@@ -790,7 +860,14 @@ async function* queryLoop(
             let yieldMessage: typeof message = message
             if (message.type === 'assistant') {
               const assistantMsg = message as AssistantMessage
-              const contentArr = Array.isArray(assistantMsg.message?.content) ? assistantMsg.message.content as unknown as Array<{ type: string; input?: unknown; name?: string; [key: string]: unknown }> : []
+              const contentArr = Array.isArray(assistantMsg.message?.content)
+                ? (assistantMsg.message.content as unknown as Array<{
+                    type: string
+                    input?: unknown
+                    name?: string
+                    [key: string]: unknown
+                  }>)
+                : []
               let clonedContent: typeof contentArr | undefined
               for (let i = 0; i < contentArr.length; i++) {
                 const block = contentArr[i]!
@@ -826,7 +903,10 @@ async function* queryLoop(
               if (clonedContent) {
                 yieldMessage = {
                   ...message,
-                  message: { ...(assistantMsg.message ?? {}), content: clonedContent },
+                  message: {
+                    ...(assistantMsg.message ?? {}),
+                    content: clonedContent,
+                  },
                 } as typeof message
               }
             }
@@ -872,7 +952,11 @@ async function* queryLoop(
               const assistantMessage = message as AssistantMessage
               assistantMessages.push(assistantMessage)
 
-              const msgToolUseBlocks = (Array.isArray(assistantMessage.message?.content) ? assistantMessage.message.content : []).filter(
+              const msgToolUseBlocks = (
+                Array.isArray(assistantMessage.message?.content)
+                  ? assistantMessage.message.content
+                  : []
+              ).filter(
                 (content: { type: string }) => content.type === 'tool_use',
               ) as ToolUseBlock[]
               if (msgToolUseBlocks.length > 0) {
@@ -1005,7 +1089,10 @@ async function* queryLoop(
       logEvent('tengu_query_error', {
         assistantMessages: assistantMessages.length,
         toolUses: assistantMessages.flatMap(_ =>
-          (Array.isArray(_.message?.content) ? _.message.content as Array<{ type: string }> : []).filter(content => content.type === 'tool_use'),
+          (Array.isArray(_.message?.content)
+            ? (_.message.content as Array<{ type: string }>)
+            : []
+          ).filter(content => content.type === 'tool_use'),
         ).length,
 
         queryChainId: queryChainIdForAnalytics,
@@ -1307,7 +1394,10 @@ async function* queryLoop(
       // error → hook blocking → retry → error → …
       if (lastMessage?.isApiErrorMessage) {
         void executeStopFailureHooks(lastMessage, toolUseContext)
-        return { reason: 'completed' }
+        return {
+          reason: 'model_error',
+          error: lastMessage.error ?? lastMessage.apiError ?? 'api_error',
+        }
       }
 
       const stopHookResult = yield* handleStopHooks(
@@ -1408,7 +1498,6 @@ async function* queryLoop(
 
     queryCheckpoint('query_tool_execution_start')
 
-
     if (streamingToolExecutor) {
       logEvent('tengu_streaming_tool_execution_used', {
         tool_count: toolUseBlocks.length,
@@ -1468,9 +1557,14 @@ async function* queryLoop(
       const lastAssistantMessage = assistantMessages.at(-1)
       let lastAssistantText: string | undefined
       if (lastAssistantMessage) {
-        const textBlocks = (Array.isArray(lastAssistantMessage.message?.content) ? lastAssistantMessage.message.content as Array<{ type: string; text?: string }> : []).filter(
-          block => block.type === 'text',
-        )
+        const textBlocks = (
+          Array.isArray(lastAssistantMessage.message?.content)
+            ? (lastAssistantMessage.message.content as Array<{
+                type: string
+                text?: string
+              }>)
+            : []
+        ).filter(block => block.type === 'text')
         if (textBlocks.length > 0) {
           const lastTextBlock = textBlocks.at(-1)
           if (lastTextBlock && 'text' in lastTextBlock) {
@@ -1622,12 +1716,32 @@ async function* queryLoop(
       // user prompts, even if someone stamps an agentId on one.
       return cmd.mode === 'task-notification' && cmd.agentId === currentAgentId
     })
+    const queuedAutonomyClaim = await claimConsumableQueuedAutonomyCommands(
+      queuedCommandsSnapshot,
+    )
+    if (queuedAutonomyClaim.staleCommands.length > 0) {
+      removeFromQueue(queuedAutonomyClaim.staleCommands)
+    }
+
+    const claimedConsumedCommands = queuedAutonomyClaim.claimedCommands.filter(
+      cmd => cmd.mode === 'prompt' || cmd.mode === 'task-notification',
+    )
+    if (claimedConsumedCommands.length > 0) {
+      consumedAutonomyCommands.push(...claimedConsumedCommands)
+      for (const cmd of claimedConsumedCommands) {
+        if (cmd.uuid) {
+          consumedCommandUuids.push(cmd.uuid)
+          notifyCommandLifecycle(cmd.uuid, 'started')
+        }
+      }
+      removeFromQueue(claimedConsumedCommands)
+    }
 
     for await (const attachment of getAttachmentMessages(
       null,
       updatedToolUseContext,
       null,
-      queuedCommandsSnapshot,
+      queuedAutonomyClaim.attachmentCommands,
       [...messagesForQuery, ...assistantMessages, ...toolResults],
       querySource,
     )) {
@@ -1659,7 +1773,6 @@ async function* queryLoop(
       pendingMemoryPrefetch.consumedOnIteration = turnCount - 1
     }
 
-
     // Inject prefetched skill discovery. collectSkillDiscoveryPrefetch emits
     // hidden_by_main_turn — true when the prefetch resolved before this point
     // (should be >98% at AKI@250ms / Haiku@573ms vs turn durations of 2-30s).
@@ -1675,8 +1788,11 @@ async function* queryLoop(
 
     // Remove only commands that were actually consumed as attachments.
     // Prompt and task-notification commands are converted to attachments above.
-    const consumedCommands = queuedCommandsSnapshot.filter(
-      cmd => cmd.mode === 'prompt' || cmd.mode === 'task-notification',
+    const claimedCommandSet = new Set(claimedConsumedCommands)
+    const consumedCommands = queuedAutonomyClaim.attachmentCommands.filter(
+      cmd =>
+        (cmd.mode === 'prompt' || cmd.mode === 'task-notification') &&
+        !claimedCommandSet.has(cmd),
     )
     if (consumedCommands.length > 0) {
       for (const cmd of consumedCommands) {
diff --git a/src/query/transitions.ts b/src/query/transitions.ts
index f8fe515514..ba2fa8b401 100644
--- a/src/query/transitions.ts
+++ b/src/query/transitions.ts
@@ -1,3 +1,20 @@
-// Auto-generated stub — replace with real implementation
-export type Terminal = any;
-export type Continue = any;
+export type Terminal =
+  | { reason: 'completed' }
+  | { reason: 'blocking_limit' }
+  | { reason: 'image_error' }
+  | { reason: 'model_error'; error?: unknown }
+  | { reason: 'aborted_streaming' }
+  | { reason: 'aborted_tools' }
+  | { reason: 'prompt_too_long' }
+  | { reason: 'stop_hook_prevented' }
+  | { reason: 'hook_stopped' }
+  | { reason: 'max_turns'; turnCount: number }
+
+export type Continue =
+  | { reason: 'collapse_drain_retry'; committed: number }
+  | { reason: 'reactive_compact_retry' }
+  | { reason: 'max_output_tokens_escalate' }
+  | { reason: 'max_output_tokens_recovery'; attempt: number }
+  | { reason: 'stop_hook_blocking' }
+  | { reason: 'token_budget_continuation' }
+  | { reason: 'next_turn' }
diff --git a/src/screens/REPL.tsx b/src/screens/REPL.tsx
index 28e4132d84..fe26e38cfb 100644
--- a/src/screens/REPL.tsx
+++ b/src/screens/REPL.tsx
@@ -79,10 +79,9 @@ import { isEnvTruthy } from '../utils/envUtils.js';
 import { formatTokens, truncateToWidth } from '../utils/format.js';
 import { consumeEarlyInput } from '../utils/earlyInput.js';
 import {
-  finalizeAutonomyRunCompleted,
-  finalizeAutonomyRunFailed,
-  markAutonomyRunRunning,
-} from '../utils/autonomyRuns.js';
+  claimConsumableQueuedAutonomyCommands,
+  finalizeAutonomyCommandsForTurn,
+} from '../utils/autonomyQueueLifecycle.js';
 
 import { setMemberActive } from '../utils/swarm/teamHelpers.js';
 import {
@@ -3054,18 +3053,19 @@ export function REPL({
               setMessages(old => {
                 const postBoundary = getMessagesAfterCompactBoundary(old, {
                   includeSnipped: true,
-                })
+                });
                 // Hard cap: keep at most 500 messages in fullscreen scrollback
                 // to prevent unbounded memory growth in multi-day sessions.
                 // normalizeMessages/applyGrouping are O(n), and Ink fiber
                 // trees cost ~250KB RSS per message. Without this cap,
                 // scrollback after several compactions can reach thousands
                 // of messages (observed: 13k+, 1GB+ heap).
-                const MAX_FULLSCREEN_SCROLLBACK = 500
-                const kept = postBoundary.length > MAX_FULLSCREEN_SCROLLBACK
-                  ? postBoundary.slice(-MAX_FULLSCREEN_SCROLLBACK)
-                  : postBoundary
-                return [...kept, newMessage]
+                const MAX_FULLSCREEN_SCROLLBACK = 500;
+                const kept =
+                  postBoundary.length > MAX_FULLSCREEN_SCROLLBACK
+                    ? postBoundary.slice(-MAX_FULLSCREEN_SCROLLBACK)
+                    : postBoundary;
+                return [...kept, newMessage];
               });
             } else {
               setMessages(() => [newMessage]);
@@ -3098,13 +3098,10 @@ export function REPL({
               // so interleaved non-ephemeral messages caused duplicate progress
               // entries to accumulate (observed 13k+ entries in sleep-heavy sessions).
               for (let i = oldMessages.length - 1; i >= 0; i--) {
-                const m = oldMessages[i]!
-                if (m.type !== 'progress') break
-                const mData = m.data as Record<string, unknown> | undefined
-                if (
-                  m.parentToolUseID === newMessage.parentToolUseID &&
-                  mData?.type === newData.type
-                ) {
+                const m = oldMessages[i]!;
+                if (m.type !== 'progress') break;
+                const mData = m.data as Record<string, unknown> | undefined;
+                if (m.parentToolUseID === newMessage.parentToolUseID && mData?.type === newData.type) {
                   const copy = oldMessages.slice();
                   copy[i] = newMessage;
                   return copy;
@@ -3477,7 +3474,7 @@ export function REPL({
       onBeforeQueryCallback?: (input: string, newMessages: MessageType[]) => Promise<boolean>,
       input?: string,
       effort?: EffortValue,
-    ): Promise<void> => {
+    ): Promise<boolean> => {
       // If this is a teammate, mark them as active when starting a turn
       if (isAgentSwarmsEnabled()) {
         const teamName = getTeamName();
@@ -3508,7 +3505,7 @@ export function REPL({
               logEvent('tengu_concurrent_onquery_enqueued', {});
             }
           });
-        return;
+        return false;
       }
 
       try {
@@ -3541,7 +3538,7 @@ export function REPL({
         if (onBeforeQueryCallback && input) {
           const shouldProceed = await onBeforeQueryCallback(input, latestMessages);
           if (!shouldProceed) {
-            return;
+            return true;
           }
         }
 
@@ -3690,6 +3687,7 @@ export function REPL({
           }
         }
       }
+      return true;
     },
     [onQueryImpl, setAppState, resetLoadingState, queryGuard, mrOnBeforeQuery, mrOnTurnComplete],
   );
@@ -4844,44 +4842,62 @@ export function REPL({
             } satisfies QueuedCommand)
           : input;
 
-      const newAbortController = createAbortController();
-      setAbortController(newAbortController);
+      void (async () => {
+        const claim = await claimConsumableQueuedAutonomyCommands([queuedCommand]);
+        const command = claim.attachmentCommands[0];
+        if (!command) return;
 
-      // Create a user message with the formatted content (includes XML wrapper)
-      const userMessage = createUserMessage({
-        content: queuedCommand.value as string,
-        isMeta: queuedCommand.isMeta ? true : undefined,
-        origin: queuedCommand.origin,
-      });
+        const newAbortController = createAbortController();
+        setAbortController(newAbortController);
 
-      const autonomyRunId = queuedCommand.autonomy?.runId;
-      if (autonomyRunId) {
-        void markAutonomyRunRunning(autonomyRunId);
-      }
+        // Create a user message with the formatted content (includes XML wrapper)
+        const userMessage = createUserMessage({
+          content: command.value,
+          isMeta: command.isMeta ? true : undefined,
+          origin: command.origin,
+        });
 
-      void onQuery([userMessage], newAbortController, true, [], mainLoopModel)
-        .then(() => {
-          if (autonomyRunId) {
-            void finalizeAutonomyRunCompleted({
-              runId: autonomyRunId,
+        let executed = false;
+        try {
+          executed = (await onQuery([userMessage], newAbortController, true, [], mainLoopModel)) !== false;
+        } catch (error: unknown) {
+          try {
+            await finalizeAutonomyCommandsForTurn({
+              commands: claim.claimedCommands,
+              outcome: { type: 'failed', error },
               currentDir: getCwd(),
               priority: 'later',
-            }).then(nextCommands => {
-              for (const command of nextCommands) {
-                enqueue(command);
-              }
-            });
-          }
-        })
-        .catch((error: unknown) => {
-          if (autonomyRunId) {
-            void finalizeAutonomyRunFailed({
-              runId: autonomyRunId,
-              error: String(error),
             });
+          } catch (finalizeError: unknown) {
+            logError(toError(finalizeError));
           }
           logError(toError(error));
-        });
+          return;
+        }
+
+        // Only finalize as completed when onQuery actually executed the turn
+        // (it returns false from the concurrent-guard path without running).
+        // Keep this finalize in its own try/catch so a failure here does not
+        // trigger a second finalize as `failed` for the same commands.
+        if (!executed) {
+          return;
+        }
+        try {
+          const nextCommands = await finalizeAutonomyCommandsForTurn({
+            commands: claim.claimedCommands,
+            outcome: { type: 'completed' },
+            currentDir: getCwd(),
+            priority: 'later',
+          });
+          for (const nextCommand of nextCommands) {
+            enqueue(nextCommand);
+          }
+        } catch (finalizeError: unknown) {
+          logError(toError(finalizeError));
+        }
+      })().catch((error: unknown) => {
+        logError(toError(error));
+      });
       return true;
     },
     [onQuery, mainLoopModel, store],
diff --git a/src/services/compact/postCompactCleanup.ts b/src/services/compact/postCompactCleanup.ts
index 50cbfd6172..b89e3a0be6 100644
--- a/src/services/compact/postCompactCleanup.ts
+++ b/src/services/compact/postCompactCleanup.ts
@@ -5,9 +5,9 @@ import { getUserContext } from '../../context.js'
 import { clearSpeculativeChecks } from '@claude-code-best/builtin-tools/tools/BashTool/bashPermissions.js'
 import { clearClassifierApprovals } from '../../utils/classifierApprovals.js'
 import { resetGetMemoryFilesCache } from '../../utils/claudemd.js'
+import { logError } from '../../utils/log.js'
 import { clearSessionMessagesCache } from '../../utils/sessionStorage.js'
 import { clearBetaTracingState } from '../../utils/telemetry/betaSessionTracing.js'
-import { getLspServerManager } from '../../services/lsp/manager.js'
 import { resetMicrocompactState } from './microCompact.js'
 
 /**
@@ -29,7 +29,7 @@ import { resetMicrocompactState } from './microCompact.js'
  * pass querySource — undefined is only safe for callers that are
  * genuinely main-thread-only (/compact, /clear).
  */
-export async function runPostCompactCleanup(querySource?: QuerySource): Promise<void> {
+export function runPostCompactCleanup(querySource?: QuerySource): void {
   // Subagents (agent:*) run in the same process and share module-level
   // state with the main thread. Only reset main-thread module-level state
   // (context-collapse, memory file cache) for main-thread compacts.
@@ -70,20 +70,22 @@ export async function runPostCompactCleanup(querySource?: QuerySource): Promise<
   // cacheUtils resets. See compactConversation() for full rationale.
   clearBetaTracingState()
   if (feature('COMMIT_ATTRIBUTION')) {
-    void import('../../utils/attributionHooks.js').then(m =>
-      m.sweepFileContentCache(),
-    )
+    // Intentionally fire-and-forget: the file-content cache sweep is a
+    // best-effort memory release whose completion no caller depends on.
+    // Keeping `runPostCompactCleanup` synchronous lets compaction call sites
+    // (REPL post-compact handler, /compact command, autoCompact) finish their
+    // own state transitions without an extra microtask round-trip — the sweep
+    // catches up on the next event-loop tick.
+    //
+    // The .catch is required even though the current attributionHooks.ts is a
+    // no-op stub: without it, a future restored sweepFileContentCache that
+    // throws would surface as an unhandled promise rejection from a function
+    // whose synchronous signature gives callers no way to observe it.
+    void import('../../utils/attributionHooks.js')
+      .then(m => m.sweepFileContentCache())
+      .catch(error => {
+        logError(error)
+      })
   }
   clearSessionMessagesCache()
-  // Close all LSP-tracked files so servers release state for files no longer
-  // in the active context after compaction. Best-effort — LSP may not be
-  // initialized, and closeAllFiles catches per-file errors internally.
-  try {
-    const lspManager = getLspServerManager()
-    if (lspManager) {
-      await lspManager.closeAllFiles()
-    }
-  } catch {
-    // LSP module may not be available in all environments
-  }
 }
diff --git a/src/services/skillLearning/featureCheck.ts b/src/services/skillLearning/featureCheck.ts
index f67f17919c..9a1488ee1d 100644
--- a/src/services/skillLearning/featureCheck.ts
+++ b/src/services/skillLearning/featureCheck.ts
@@ -1,12 +1,36 @@
 import { feature } from 'bun:bundle'
 
+/**
+ * Build-time presence check: is the `/skill-learning` slash command
+ * compiled into this build? Used by the command registry's `isEnabled` so
+ * the command appears in the menu whenever it is buildable. Operators
+ * activate the subsystem itself via `/skill-learning start`, which flips
+ * `SKILL_LEARNING_ENABLED=1` and turns the runtime observers on (see
+ * `isSkillLearningEnabled`).
+ */
+export function isSkillLearningCompiledIn(): boolean {
+  if (feature('SKILL_LEARNING')) return true
+  return false
+}
+
+/**
+ * Runtime activation check: is the skill-learning subsystem actively
+ * running (toolEvent, runtime, session observers attached, persisting
+ * observations to disk)? Off by default — the operator must run
+ * `/skill-learning start` (which sets `SKILL_LEARNING_ENABLED=1`).
+ *
+ * Legacy `FEATURE_SKILL_LEARNING=1` is also accepted for backward
+ * compatibility with operators who set it before the slash-command UX
+ * landed.
+ *
+ * Build-flag gating is intentionally NOT performed here: the command
+ * registry already gates command compilation on the build flag, and this
+ * function is only reached from code paths that the build flag has
+ * already let through. Decoupling keeps the test surface clean (tests
+ * exercise the env-var contract without needing to mock `bun:bundle`).
+ */
 export function isSkillLearningEnabled(): boolean {
-  if (process.env.SKILL_LEARNING_ENABLED === '0') return false
   if (process.env.SKILL_LEARNING_ENABLED === '1') return true
-  if (process.env.FEATURE_SKILL_LEARNING === '0') return false
   if (process.env.FEATURE_SKILL_LEARNING === '1') return true
-  if (feature('SKILL_LEARNING')) {
-    return true
-  }
   return false
 }
diff --git a/src/services/skillLearning/projectContext.ts b/src/services/skillLearning/projectContext.ts
index a886cee6f5..3f50379735 100644
--- a/src/services/skillLearning/projectContext.ts
+++ b/src/services/skillLearning/projectContext.ts
@@ -45,15 +45,44 @@ export function getProjectContextPath(projectId: string): string {
 // in the tool.call hot path (one wrapper invocation per tool) that cost would
 // accumulate into the hundreds-of-ms range per session. Cache keyed by the
 // exact cwd string so different worktrees still get independent entries.
+//
+// Bounded with LRU eviction: long-lived processes that traverse many
+// worktrees (e.g. multi-repo build orchestrators) would otherwise grow the
+// cache without limit. Each entry holds a SkillLearningProjectContext
+// (instinct + skill lists), so the cap ensures bounded memory regardless
+// of cwd diversity. `defines.ts` originally flagged this as
+// "无淘汰机制（非 GB 级主因）" — this fix closes that gap.
+const PROJECT_CONTEXT_CACHE_MAX = 32
+const PROJECT_CONTEXT_CACHE_TRIM_TO = 24
 const contextCache = new Map<string, SkillLearningProjectContext>()
 const PERSIST_INTERVAL_MS = 5 * 60 * 1000
 let lastPersistAt = 0
 
+function setProjectContextCache(
+  cwd: string,
+  ctx: SkillLearningProjectContext,
+): void {
+  if (contextCache.has(cwd)) contextCache.delete(cwd)
+  contextCache.set(cwd, ctx)
+  if (contextCache.size > PROJECT_CONTEXT_CACHE_MAX) {
+    const toDrop = contextCache.size - PROJECT_CONTEXT_CACHE_TRIM_TO
+    const iter = contextCache.keys()
+    for (let i = 0; i < toDrop; i++) {
+      const next = iter.next()
+      if (next.done) break
+      contextCache.delete(next.value)
+    }
+  }
+}
+
 export function resolveProjectContext(
   cwd = process.cwd(),
 ): SkillLearningProjectContext {
   const cached = contextCache.get(cwd)
   if (cached) {
+    // Refresh insertion order so frequently-accessed cwds survive eviction.
+    contextCache.delete(cwd)
+    contextCache.set(cwd, cached)
     // Still touch the registry so long-lived processes keep `lastSeenAt`
     // reasonably fresh, but throttle the write so it doesn't fire on every
     // tool call.
@@ -65,7 +94,7 @@ export function resolveProjectContext(
     return cached
   }
   const resolved = resolveContext(cwd)
-  contextCache.set(cwd, resolved)
+  setProjectContextCache(cwd, resolved)
   persistProjectContext(resolved)
   lastPersistAt = Date.now()
   return resolved
diff --git a/src/services/skillLearning/promotion.ts b/src/services/skillLearning/promotion.ts
index 12fb2805e6..7efc3c9bf4 100644
--- a/src/services/skillLearning/promotion.ts
+++ b/src/services/skillLearning/promotion.ts
@@ -23,8 +23,30 @@ export type PromotionOptions = {
   minConfidence?: number
 }
 
+/**
+ * Set bounded with FIFO eviction. # promotions per session is small in
+ * practice (single digits), but a long-lived sandbox/daemon could push
+ * this if it never restarts. The cap is defensive and the degraded
+ * behaviour — re-promote if we exceed N then forget the oldest — is
+ * benign because promotion is idempotent at the lifecycle layer.
+ */
+const SESSION_PROMOTED_IDS_MAX = 256
+const SESSION_PROMOTED_IDS_TRIM_TO = 192
 const sessionPromotedIds = new Set<string>()
 
+function recordSessionPromoted(id: string): void {
+  sessionPromotedIds.add(id)
+  if (sessionPromotedIds.size > SESSION_PROMOTED_IDS_MAX) {
+    const toDrop = sessionPromotedIds.size - SESSION_PROMOTED_IDS_TRIM_TO
+    const iter = sessionPromotedIds.values()
+    for (let i = 0; i < toDrop; i++) {
+      const next = iter.next()
+      if (next.done) break
+      sessionPromotedIds.delete(next.value)
+    }
+  }
+}
+
 export function resetPromotionBookkeeping(): void {
   sessionPromotedIds.clear()
 }
@@ -103,7 +125,7 @@ export async function checkPromotion(
     }
     await saveInstinct(globalInstinct, globalOptions)
 
-    sessionPromotedIds.add(candidate.instinctId)
+    recordSessionPromoted(candidate.instinctId)
     promoted.push(candidate)
   }
 
diff --git a/src/services/skillSearch/featureCheck.ts b/src/services/skillSearch/featureCheck.ts
index 38dcda534e..dbdce72f3c 100644
--- a/src/services/skillSearch/featureCheck.ts
+++ b/src/services/skillSearch/featureCheck.ts
@@ -1,10 +1,30 @@
 import { feature } from 'bun:bundle'
 
-export function isSkillSearchEnabled(): boolean {
-  if (process.env.SKILL_SEARCH_ENABLED === '0') return false
-  if (process.env.SKILL_SEARCH_ENABLED === '1') return true
-  if (feature('EXPERIMENTAL_SKILL_SEARCH')) {
-    return true
-  }
+/**
+ * Build-time presence check: is the `/skill-search` slash command compiled
+ * into this build? Used by the command registry's `isEnabled` so the
+ * command appears in the menu whenever it is buildable. Operators activate
+ * the subsystem itself via `/skill-search start`, which flips
+ * `SKILL_SEARCH_ENABLED=1` and turns the runtime hot paths on (see
+ * `isSkillSearchEnabled`).
+ */
+export function isSkillSearchCompiledIn(): boolean {
+  if (feature('EXPERIMENTAL_SKILL_SEARCH')) return true
   return false
 }
+
+/**
+ * Runtime activation check: is the skill-search subsystem currently doing
+ * work (intentNormalize Haiku calls, prefetch hot path, telemetry)? Off by
+ * default — the operator must run `/skill-search start` (which sets
+ * `SKILL_SEARCH_ENABLED=1`). See docs/agent/sur-skill-overflow-bugs.md §5.
+ *
+ * Build-flag gating is intentionally NOT performed here: the command
+ * registry already gates command compilation on the build flag, and this
+ * function is only reached from code paths that the build flag has
+ * already let through. Decoupling keeps the test surface clean (tests
+ * exercise the env-var contract without needing to mock `bun:bundle`).
+ */
+export function isSkillSearchEnabled(): boolean {
+  return process.env.SKILL_SEARCH_ENABLED === '1'
+}
diff --git a/src/services/skillSearch/intentNormalize.ts b/src/services/skillSearch/intentNormalize.ts
index 9073958b85..7ec5c226e3 100644
--- a/src/services/skillSearch/intentNormalize.ts
+++ b/src/services/skillSearch/intentNormalize.ts
@@ -47,10 +47,35 @@ Output ONLY keywords. Nothing else.`
 const DEFAULT_TIMEOUT_MS = 6_000
 const MAX_QUERY_CHARS = 500
 const MAX_KEYWORDS_CHARS = 120
+/**
+ * Bound on the process-level query→keywords cache. Insertion-order LRU —
+ * Map iteration order is insertion order, so we evict from the front when
+ * size exceeds the cap. ~200 entries × ~600 bytes (query + keywords) ≈
+ * 120 KB worst case. Without this cap the cache grew monotonically with
+ * the diversity of Chinese queries in a long session.
+ */
+const CACHE_MAX_ENTRIES = 200
+const CACHE_TRIM_TO = 150
 
 /** Process-level cache. Keyed by the original (trimmed) query. */
 const cache = new Map<string, string>()
 
+function setCachedQueryIntent(key: string, value: string): void {
+  // Refresh insertion order on hit-then-write so frequently-used keys
+  // stay alive (delete + set is the canonical Map-LRU idiom).
+  if (cache.has(key)) cache.delete(key)
+  cache.set(key, value)
+  if (cache.size > CACHE_MAX_ENTRIES) {
+    const toDrop = cache.size - CACHE_TRIM_TO
+    const iter = cache.keys()
+    for (let i = 0; i < toDrop; i++) {
+      const next = iter.next()
+      if (next.done) break
+      cache.delete(next.value)
+    }
+  }
+}
+
 export function isIntentNormalizeEnabled(): boolean {
   return process.env.SKILL_SEARCH_INTENT_ENABLED === '1'
 }
@@ -74,12 +99,17 @@ export async function normalizeQueryIntent(query: string): Promise<string> {
   if (!/[\u4e00-\u9fff]/.test(trimmed)) return trimmed
 
   const cached = cache.get(trimmed)
-  if (cached !== undefined) return cached
+  if (cached !== undefined) {
+    // Refresh LRU position so frequently-queried strings survive eviction.
+    cache.delete(trimmed)
+    cache.set(trimmed, cached)
+    return cached
+  }
 
   const capped = trimmed.slice(0, MAX_QUERY_CHARS)
   const keywords = await callHaiku(capped)
   const result = keywords ? `${trimmed} ${keywords}` : trimmed
-  cache.set(trimmed, result)
+  setCachedQueryIntent(trimmed, result)
   logForDebugging(
     `[skill-search] intent normalized: "${trimmed.slice(0, 40)}" -> "${keywords}"`,
   )
diff --git a/src/services/skillSearch/prefetch.ts b/src/services/skillSearch/prefetch.ts
index 6d77f6c332..502524da18 100644
--- a/src/services/skillSearch/prefetch.ts
+++ b/src/services/skillSearch/prefetch.ts
@@ -14,9 +14,35 @@ import { readFile } from 'node:fs/promises'
 import { join } from 'node:path'
 import { parseFrontmatter } from '../../utils/frontmatterParser.js'
 
+/**
+ * Per-session memoization to avoid re-emitting the same skill discovery /
+ * gap signal twice. Each Set is bounded to keep long-running sessions from
+ * monotonically accumulating skill names and signal keys forever (which
+ * was the original session-scoped-but-unbounded design).
+ *
+ * FIFO eviction by insertion order — once the cap is hit, the oldest
+ * entries roll off and may be re-recorded if rediscovered, which is the
+ * correct degraded behaviour: at worst we re-emit a duplicate signal,
+ * never silently drop a real one.
+ */
+const SESSION_TRACKING_MAX = 1000
+const SESSION_TRACKING_TRIM_TO = 750
 const discoveredThisSession = new Set<string>()
 const recordedGapSignals = new Set<string>()
 
+function addBoundedSessionEntry(set: Set<string>, value: string): void {
+  set.add(value)
+  if (set.size > SESSION_TRACKING_MAX) {
+    const toDrop = set.size - SESSION_TRACKING_TRIM_TO
+    const iter = set.values()
+    for (let i = 0; i < toDrop; i++) {
+      const next = iter.next()
+      if (next.done) break
+      set.delete(next.value)
+    }
+  }
+}
+
 const AUTO_LOAD_MIN_SCORE = Number(
   process.env.SKILL_SEARCH_AUTOLOAD_MIN_SCORE ?? '0.30',
 )
@@ -185,7 +211,7 @@ async function maybeRecordSkillGap(
 
   const gapSignalKey = `${trigger}:${queryText.trim().toLowerCase()}`
   if (recordedGapSignals.has(gapSignalKey)) return undefined
-  recordedGapSignals.add(gapSignalKey)
+  addBoundedSessionEntry(recordedGapSignals, gapSignalKey)
 
   try {
     const [{ isSkillLearningEnabled }, { recordSkillGap }] = await Promise.all([
@@ -241,7 +267,7 @@ export async function startSkillDiscoveryPrefetch(
     const newResults = results.filter(r => !discoveredThisSession.has(r.name))
     if (newResults.length === 0) return []
 
-    for (const r of newResults) discoveredThisSession.add(r.name)
+    for (const r of newResults) addBoundedSessionEntry(discoveredThisSession, r.name)
 
     const signal: DiscoverySignal = {
       trigger: 'assistant_turn',
@@ -305,7 +331,7 @@ export async function getTurnZeroSkillDiscovery(
 
     if (results.length === 0 && !gap) return null
 
-    for (const r of results) discoveredThisSession.add(r.name)
+    for (const r of results) addBoundedSessionEntry(discoveredThisSession, r.name)
 
     const signal: DiscoverySignal = {
       trigger: 'user_input',
diff --git a/src/tasks/InProcessTeammateTask/InProcessTeammateTask.tsx b/src/tasks/InProcessTeammateTask/InProcessTeammateTask.tsx
index 6b9d8c3cc1..52a202a368 100644
--- a/src/tasks/InProcessTeammateTask/InProcessTeammateTask.tsx
+++ b/src/tasks/InProcessTeammateTask/InProcessTeammateTask.tsx
@@ -73,6 +73,7 @@ export function injectUserMessageToTeammate(
   options:
     | {
         autonomyRunId?: string;
+        autonomyRootDir?: string;
         origin?: MessageOrigin;
       }
     | undefined,
@@ -93,6 +94,9 @@ export function injectUserMessageToTeammate(
     if (options?.autonomyRunId !== undefined) {
       pendingMessage.autonomyRunId = options.autonomyRunId;
     }
+    if (options?.autonomyRootDir !== undefined) {
+      pendingMessage.autonomyRootDir = options.autonomyRootDir;
+    }
     if (options?.origin !== undefined) {
       pendingMessage.origin = options.origin;
     }
diff --git a/src/tasks/InProcessTeammateTask/types.ts b/src/tasks/InProcessTeammateTask/types.ts
index 90d9fb2120..17676647ba 100644
--- a/src/tasks/InProcessTeammateTask/types.ts
+++ b/src/tasks/InProcessTeammateTask/types.ts
@@ -22,6 +22,7 @@ export type TeammateIdentity = {
 export type PendingTeammateUserMessage = {
   message: string
   autonomyRunId?: string
+  autonomyRootDir?: string
   origin?: MessageOrigin
 }
 
diff --git a/src/types/textInputTypes.ts b/src/types/textInputTypes.ts
index 6b0a848d28..26e2c29ed3 100644
--- a/src/types/textInputTypes.ts
+++ b/src/types/textInputTypes.ts
@@ -361,6 +361,7 @@ export type QueuedCommand = {
    */
   autonomy?: {
     runId: string
+    rootDir?: string
     trigger: 'scheduled-task' | 'proactive-tick' | 'managed-flow-step'
     sourceId?: string
     sourceLabel?: string
diff --git a/src/utils/__tests__/autonomyAuthority.test.ts b/src/utils/__tests__/autonomyAuthority.test.ts
index c9033134bf..a57b9a80dd 100644
--- a/src/utils/__tests__/autonomyAuthority.test.ts
+++ b/src/utils/__tests__/autonomyAuthority.test.ts
@@ -5,6 +5,7 @@ import {
   AUTONOMY_DIR,
   buildAutonomyTurnPrompt,
   loadAutonomyAuthority,
+  parseHeartbeatAuthorityTasks,
   resetAutonomyAuthorityForTests,
 } from '../autonomyAuthority'
 import {
@@ -238,4 +239,79 @@ describe('autonomyAuthority', () => {
     expect(prompt).not.toContain('- weekly-report (7d): Ship the weekly report')
     expect(prompt).not.toContain('- gather (')
   })
+
+  test('parseHeartbeatAuthorityTasks ignores tasks: literals inside markdown code fences', () => {
+    const content = [
+      '# HEARTBEAT.md',
+      '',
+      '```yaml',
+      'tasks:',
+      '  - name: not-a-real-task',
+      '    interval: 1m',
+      '    prompt: "would-be-shadowed"',
+      '```',
+      '',
+      'tasks:',
+      '  - name: real-task',
+      '    interval: 30m',
+      '    prompt: "Real prompt"',
+    ].join('\n')
+
+    const parsed = parseHeartbeatAuthorityTasks(content)
+
+    expect(parsed).toHaveLength(1)
+    expect(parsed[0]).toMatchObject({
+      name: 'real-task',
+      interval: '30m',
+      prompt: 'Real prompt',
+    })
+  })
+
+  test('parseHeartbeatAuthorityTasks ignores tasks: literals inside tilde markdown code fences', () => {
+    const content = [
+      '# HEARTBEAT.md',
+      '',
+      '~~~yaml',
+      'tasks:',
+      '  - name: not-a-real-task',
+      '    interval: 1m',
+      '    prompt: "would-be-shadowed"',
+      '~~~',
+      '',
+      'tasks:',
+      '  - name: real-task',
+      '    interval: 30m',
+      '    prompt: "Real prompt"',
+    ].join('\n')
+
+    const parsed = parseHeartbeatAuthorityTasks(content)
+
+    expect(parsed).toHaveLength(1)
+    expect(parsed[0]).toMatchObject({
+      name: 'real-task',
+      interval: '30m',
+      prompt: 'Real prompt',
+    })
+  })
+
+  test('parseHeartbeatAuthorityTasks parses real tasks even when documentation precedes them', () => {
+    const content = [
+      '# Heartbeat docs',
+      '',
+      'See `tasks:` below — the parser keys on the literal at column 0.',
+      '',
+      'tasks:',
+      '  - name: weekly',
+      '    interval: 7d',
+      '    prompt: "Ship report"',
+    ].join('\n')
+
+    const parsed = parseHeartbeatAuthorityTasks(content)
+
+    // Inline `tasks:` mention does NOT collide because it's not at column 0
+    // on its own line — the existing line.trim() === 'tasks:' guard handles
+    // that case. This test pins the behaviour.
+    expect(parsed).toHaveLength(1)
+    expect(parsed[0]?.name).toBe('weekly')
+  })
 })
diff --git a/src/utils/__tests__/autonomyFlows.test.ts b/src/utils/__tests__/autonomyFlows.test.ts
index 8436844b48..8cf504fb82 100644
--- a/src/utils/__tests__/autonomyFlows.test.ts
+++ b/src/utils/__tests__/autonomyFlows.test.ts
@@ -126,6 +126,14 @@ describe('listAutonomyFlows', () => {
             runCount: 0,
             ownerKey: DEFAULT_AUTONOMY_OWNER_KEY,
             currentDir: tempDir,
+            boundary: [
+              ' src/utils/** ',
+              '/absolute/not-allowed',
+              'src\\windows',
+              '../outside',
+              'src/utils/**',
+              'docs/*.md',
+            ],
             stateJson: {
               currentStepIndex: 0,
               steps: [
@@ -147,6 +155,7 @@ describe('listAutonomyFlows', () => {
     expect(flows).toHaveLength(1)
     expect(flows[0]?.flowId).toBe('flow-1')
     expect(flows[0]?.syncMode).toBe('managed')
+    expect(flows[0]?.boundary).toEqual(['src/utils/**', 'docs/*.md'])
     expect(flows[0]?.stateJson?.steps).toHaveLength(1)
   })
 
@@ -191,6 +200,64 @@ describe('listAutonomyFlows', () => {
     const flows = await listAutonomyFlows(tempDir)
     expect(flows).toEqual([])
   })
+
+  test('persistence pruning keeps active flows ahead of recent terminal history', async () => {
+    const flows: AutonomyFlowRecord[] = [
+      {
+        flowId: 'old-active',
+        flowKey: 'managed:scheduled-task:old-active',
+        syncMode: 'managed',
+        ownerKey: DEFAULT_AUTONOMY_OWNER_KEY,
+        revision: 1,
+        trigger: 'scheduled-task',
+        status: 'queued',
+        goal: 'old active',
+        rootDir: tempDir,
+        currentDir: tempDir,
+        runCount: 0,
+        createdAt: 1,
+        updatedAt: 1,
+      },
+      ...Array.from({ length: 100 }, (_, index) => ({
+        flowId: `history-${index}`,
+        flowKey: `managed:scheduled-task:history-${index}`,
+        syncMode: 'managed' as const,
+        ownerKey: DEFAULT_AUTONOMY_OWNER_KEY,
+        revision: 1,
+        trigger: 'scheduled-task' as const,
+        status: 'succeeded' as const,
+        goal: `history ${index}`,
+        rootDir: tempDir,
+        currentDir: tempDir,
+        runCount: 1,
+        createdAt: 1_000 + index,
+        updatedAt: 1_000 + index,
+        endedAt: 2_000 + index,
+      })),
+    ]
+    const flowsPath = resolveAutonomyFlowsPath(tempDir)
+    await mkdir(join(tempDir, AUTONOMY_DIR), { recursive: true })
+    await writeFile(
+      flowsPath,
+      `${JSON.stringify({ flows }, null, 2)}\n`,
+      'utf-8',
+    )
+
+    await startManagedAutonomyFlow({
+      trigger: 'scheduled-task',
+      goal: 'fresh active',
+      steps: TWO_STEPS,
+      rootDir: tempDir,
+      currentDir: tempDir,
+      sourceId: 'fresh-active',
+      nowMs: 9_999,
+    })
+
+    const persisted = await listAutonomyFlows(tempDir)
+    expect(persisted).toHaveLength(100)
+    expect(persisted.some(flow => flow.flowId === 'old-active')).toBe(true)
+    expect(persisted.some(flow => flow.flowId === 'history-0')).toBe(false)
+  })
 })
 
 describe('startManagedAutonomyFlow', () => {
@@ -225,6 +292,49 @@ describe('startManagedAutonomyFlow', () => {
     expect(result!.nextStep!.step.name).toBe('gather')
   })
 
+  test('normalizes and preserves boundary across completed flow restarts', async () => {
+    const first = await startManagedAutonomyFlow({
+      trigger: 'scheduled-task',
+      goal: 'Scoped flow',
+      steps: [{ name: 'only', prompt: 'Do it' }],
+      rootDir: tempDir,
+      sourceId: 'scoped-src',
+      boundary: [' src/utils/** ', 'src\\bad', '/absolute', 'docs/*.md'],
+      nowMs: 1000,
+    })
+    const flowId = first!.flow.flowId
+
+    expect(first!.flow.boundary).toEqual(['src/utils/**', 'docs/*.md'])
+
+    await queueManagedAutonomyFlowStepRun({
+      flowId,
+      stepId: first!.nextStep!.step.stepId,
+      stepIndex: 0,
+      runId: 'run-1',
+      rootDir: tempDir,
+      nowMs: 2000,
+    })
+    await markManagedAutonomyFlowStepCompleted({
+      flowId,
+      runId: 'run-1',
+      rootDir: tempDir,
+      nowMs: 3000,
+    })
+
+    const restarted = await startManagedAutonomyFlow({
+      trigger: 'scheduled-task',
+      goal: 'Scoped flow',
+      steps: [{ name: 'only', prompt: 'Do it again' }],
+      rootDir: tempDir,
+      sourceId: 'scoped-src',
+      nowMs: 4000,
+    })
+
+    expect(restarted!.started).toBe(true)
+    expect(restarted!.flow.flowId).toBe(flowId)
+    expect(restarted!.flow.boundary).toEqual(['src/utils/**', 'docs/*.md'])
+  })
+
   test('sets status=waiting when first step has waitFor', async () => {
     const result = await startManagedAutonomyFlow({
       trigger: 'scheduled-task',
diff --git a/src/utils/__tests__/autonomyPersistence.test.ts b/src/utils/__tests__/autonomyPersistence.test.ts
index a265263eeb..f16877206e 100644
--- a/src/utils/__tests__/autonomyPersistence.test.ts
+++ b/src/utils/__tests__/autonomyPersistence.test.ts
@@ -54,6 +54,25 @@ describe('withAutonomyPersistenceLock', () => {
     ).rejects.toThrow('inner failure')
   })
 
+  test('releases same-root lock bookkeeping after success and failure', async () => {
+    const {
+      getAutonomyPersistenceLockCountForTests,
+      withAutonomyPersistenceLock,
+    } = await import('../autonomyPersistence')
+
+    expect(getAutonomyPersistenceLockCountForTests()).toBe(0)
+
+    await withAutonomyPersistenceLock(tempDir, async () => 'ok')
+    expect(getAutonomyPersistenceLockCountForTests()).toBe(0)
+
+    await expect(
+      withAutonomyPersistenceLock(tempDir, async () => {
+        throw new Error('inner failure')
+      }),
+    ).rejects.toThrow('inner failure')
+    expect(getAutonomyPersistenceLockCountForTests()).toBe(0)
+  })
+
   test('serializes concurrent calls on the same rootDir', async () => {
     const { withAutonomyPersistenceLock } = await import(
       '../autonomyPersistence'
diff --git a/src/utils/__tests__/autonomyQueueLifecycle.test.ts b/src/utils/__tests__/autonomyQueueLifecycle.test.ts
new file mode 100644
index 0000000000..2449f84051
--- /dev/null
+++ b/src/utils/__tests__/autonomyQueueLifecycle.test.ts
@@ -0,0 +1,279 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { createTempDir, cleanupTempDir } from '../../../tests/mocks/file-system'
+import { getAttachmentMessages } from '../attachments'
+import {
+  createAutonomyQueuedPrompt,
+  createProactiveAutonomyCommands,
+  getAutonomyRunById,
+  markAutonomyRunCancelled,
+  startManagedAutonomyFlowFromHeartbeatTask,
+} from '../autonomyRuns'
+import { getAutonomyFlowById, listAutonomyFlows } from '../autonomyFlows'
+import {
+  cancelQueuedAutonomyCommands,
+  claimConsumableQueuedAutonomyCommands,
+  finalizeAutonomyCommandsForTurn,
+  partitionConsumableQueuedAutonomyCommands,
+} from '../autonomyQueueLifecycle'
+import {
+  enqueue,
+  getCommandsByMaxPriority,
+  remove as removeFromQueue,
+  resetCommandQueue,
+} from '../messageQueueManager'
+
+let tempDir = ''
+let extraTempDirs: string[] = []
+
+beforeEach(async () => {
+  tempDir = await createTempDir('autonomy-queue-lifecycle-')
+  extraTempDirs = []
+  resetCommandQueue()
+})
+
+afterEach(async () => {
+  resetCommandQueue()
+  if (tempDir) {
+    await cleanupTempDir(tempDir)
+  }
+  for (const extraTempDir of extraTempDirs) {
+    await cleanupTempDir(extraTempDir)
+  }
+})
+
+describe('autonomyQueueLifecycle', () => {
+  async function consumeQueuedAutonomyAttachmentTurn() {
+    const previousDisableAttachments =
+      process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS
+    process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS = '1'
+    try {
+      const snapshot = getCommandsByMaxPriority('later')
+      const claim = await claimConsumableQueuedAutonomyCommands(
+        snapshot,
+        tempDir,
+      )
+      removeFromQueue(claim.staleCommands)
+      removeFromQueue(claim.claimedCommands)
+
+      const attachments = []
+      for await (const attachment of getAttachmentMessages(
+        null,
+        {} as never,
+        null,
+        claim.attachmentCommands,
+        [],
+      )) {
+        attachments.push(attachment)
+      }
+
+      const consumedCommands = claim.attachmentCommands.filter(
+        command =>
+          (command.mode === 'prompt' || command.mode === 'task-notification') &&
+          !claim.claimedCommands.includes(command),
+      )
+      removeFromQueue(consumedCommands)
+      const nextCommands = await finalizeAutonomyCommandsForTurn({
+        commands: claim.claimedCommands,
+        outcome: { type: 'completed' },
+        currentDir: tempDir,
+        priority: 'later',
+      })
+      for (const command of nextCommands) {
+        enqueue(command)
+      }
+
+      return { attachments, runningRunIds: claim.claimedRunIds, nextCommands }
+    } finally {
+      if (previousDisableAttachments === undefined) {
+        delete process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS
+      } else {
+        process.env.CLAUDE_CODE_DISABLE_ATTACHMENTS = previousDisableAttachments
+      }
+    }
+  }
+
+  test('filters stale autonomy commands before mid-turn attachment consumption', async () => {
+    const command = await createAutonomyQueuedPrompt({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(command).not.toBeNull()
+
+    const initial = await partitionConsumableQueuedAutonomyCommands(
+      [command!],
+      tempDir,
+    )
+    expect(initial.attachmentCommands).toHaveLength(1)
+    expect(initial.staleCommands).toHaveLength(0)
+
+    await markAutonomyRunCancelled(command!.autonomy!.runId, tempDir)
+
+    const afterCancel = await partitionConsumableQueuedAutonomyCommands(
+      [command!],
+      tempDir,
+    )
+    expect(afterCancel.attachmentCommands).toHaveLength(0)
+    expect(afterCancel.staleCommands).toHaveLength(1)
+  })
+
+  test('cancels proactive commands that are created but dropped before enqueue', async () => {
+    const commands = await createProactiveAutonomyCommands({
+      basePrompt: '<tick>12:00:00</tick>',
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(commands).toHaveLength(1)
+
+    const queuedRun = await getAutonomyRunById(
+      commands[0]!.autonomy!.runId,
+      tempDir,
+    )
+    expect(queuedRun!.status).toBe('queued')
+
+    await cancelQueuedAutonomyCommands({ commands, rootDir: tempDir })
+
+    const cancelledRun = await getAutonomyRunById(
+      commands[0]!.autonomy!.runId,
+      tempDir,
+    )
+    expect(cancelledRun!.status).toBe('cancelled')
+  })
+
+  test('uses command rootDir when claiming after project context changes', async () => {
+    const otherProjectDir = await createTempDir('autonomy-other-project-')
+    extraTempDirs.push(otherProjectDir)
+    const command = await createAutonomyQueuedPrompt({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(command).not.toBeNull()
+    expect(command!.autonomy?.rootDir).toBe(tempDir)
+
+    const claim = await claimConsumableQueuedAutonomyCommands(
+      [command!],
+      otherProjectDir,
+    )
+
+    const originalRun = await getAutonomyRunById(
+      command!.autonomy!.runId,
+      tempDir,
+    )
+    const wrongProjectRun = await getAutonomyRunById(
+      command!.autonomy!.runId,
+      otherProjectDir,
+    )
+
+    expect(claim.claimedRunIds).toEqual([command!.autonomy!.runId])
+    expect(claim.attachmentCommands).toHaveLength(1)
+    expect(originalRun!.status).toBe('running')
+    expect(wrongProjectRun).toBeNull()
+  })
+
+  test('advances a managed flow consumed as a queued attachment', async () => {
+    const command = await startManagedAutonomyFlowFromHeartbeatTask({
+      task: {
+        name: 'weekly-report',
+        interval: '7d',
+        prompt: 'Ship the weekly report',
+        steps: [
+          { name: 'gather', prompt: 'Gather weekly inputs' },
+          { name: 'draft', prompt: 'Draft weekly report' },
+        ],
+      },
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(command).not.toBeNull()
+
+    const claim = await claimConsumableQueuedAutonomyCommands(
+      [command!],
+      tempDir,
+    )
+    const runningRunIds = claim.claimedRunIds
+    expect(runningRunIds).toEqual([command!.autonomy!.runId])
+
+    const nextCommands = await finalizeAutonomyCommandsForTurn({
+      commands: claim.claimedCommands,
+      outcome: { type: 'completed' },
+      currentDir: tempDir,
+      priority: 'later',
+    })
+    const [flow] = await listAutonomyFlows(tempDir)
+    const detail = await getAutonomyFlowById(flow!.flowId, tempDir)
+    const run = await getAutonomyRunById(command!.autonomy!.runId, tempDir)
+
+    expect(run!.status).toBe('completed')
+    expect(nextCommands).toHaveLength(1)
+    expect(nextCommands[0]!.autonomy?.flowId).toBe(flow!.flowId)
+    expect(detail!.stateJson!.steps.map(step => step.status)).toEqual([
+      'completed',
+      'queued',
+    ])
+  })
+
+  test('keeps managed autonomy flow coherent across queued attachment turns', async () => {
+    const firstCommand = await startManagedAutonomyFlowFromHeartbeatTask({
+      task: {
+        name: 'weekly-report',
+        interval: '7d',
+        prompt: 'Ship the weekly report',
+        steps: [
+          { name: 'gather', prompt: 'Gather weekly inputs' },
+          { name: 'draft', prompt: 'Draft weekly report' },
+        ],
+      },
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(firstCommand).not.toBeNull()
+    enqueue(firstCommand!)
+
+    const firstTurn = await consumeQueuedAutonomyAttachmentTurn()
+    const queuedAfterFirstTurn = getCommandsByMaxPriority('later')
+    const [flowAfterFirstTurn] = await listAutonomyFlows(tempDir)
+    const firstRun = await getAutonomyRunById(
+      firstCommand!.autonomy!.runId,
+      tempDir,
+    )
+
+    expect(firstTurn.attachments).toHaveLength(1)
+    expect(firstTurn.attachments[0]!.attachment?.type).toBe('queued_command')
+    expect(firstTurn.runningRunIds).toEqual([firstCommand!.autonomy!.runId])
+    expect(firstTurn.nextCommands).toHaveLength(1)
+    expect(queuedAfterFirstTurn).toHaveLength(1)
+    expect(queuedAfterFirstTurn[0]!.autonomy?.flowId).toBe(
+      flowAfterFirstTurn!.flowId,
+    )
+    expect(firstRun!.status).toBe('completed')
+    expect(
+      flowAfterFirstTurn!.stateJson!.steps.map(step => step.status),
+    ).toEqual(['completed', 'queued'])
+
+    const secondCommand = queuedAfterFirstTurn[0]!
+    const secondTurn = await consumeQueuedAutonomyAttachmentTurn()
+    const queuedAfterSecondTurn = getCommandsByMaxPriority('later')
+    const finalFlow = await getAutonomyFlowById(
+      flowAfterFirstTurn!.flowId,
+      tempDir,
+    )
+    const secondRun = await getAutonomyRunById(
+      secondCommand.autonomy!.runId,
+      tempDir,
+    )
+
+    expect(secondTurn.attachments).toHaveLength(1)
+    expect(secondTurn.runningRunIds).toEqual([secondCommand.autonomy!.runId])
+    expect(secondTurn.nextCommands).toHaveLength(0)
+    expect(queuedAfterSecondTurn).toHaveLength(0)
+    expect(secondRun!.status).toBe('completed')
+    expect(finalFlow!.status).toBe('succeeded')
+    expect(finalFlow!.stateJson!.steps.map(step => step.status)).toEqual([
+      'completed',
+      'completed',
+    ])
+  })
+})
diff --git a/src/utils/__tests__/autonomyRuns.test.ts b/src/utils/__tests__/autonomyRuns.test.ts
index 056083e486..268b856fd0 100644
--- a/src/utils/__tests__/autonomyRuns.test.ts
+++ b/src/utils/__tests__/autonomyRuns.test.ts
@@ -1,6 +1,5 @@
 import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
-import { mkdir, writeFile } from 'fs/promises'
-import { join } from 'path'
+import { join, resolve as resolvePath } from 'node:path'
 import {
   resetStateForTests,
   setCwdState,
@@ -8,17 +7,23 @@ import {
   setProjectRoot,
 } from '../../bootstrap/state'
 import {
+  createAutonomyRun,
   formatAutonomyRunsList,
   formatAutonomyRunsStatus,
   listAutonomyRuns,
   createAutonomyQueuedPrompt,
+  createAutonomyQueuedPromptIfNoActiveSource,
   createProactiveAutonomyCommands,
   finalizeAutonomyRunCompleted,
+  getAutonomyRunById,
+  hasActiveAutonomyRunForSource,
   markAutonomyRunCompleted,
+  markAutonomyRunCancelled,
   markAutonomyRunFailed,
   markAutonomyRunRunning,
   recoverManagedAutonomyFlowPrompt,
   resolveAutonomyRunsPath,
+  STALE_ACTIVE_RUN_ERROR_PREFIX,
   startManagedAutonomyFlowFromHeartbeatTask,
 } from '../autonomyRuns'
 import {
@@ -35,11 +40,14 @@ import {
   cleanupTempDir,
   createTempDir,
   createTempSubdir,
+  readTempFile,
+  tempPathExists,
   writeTempFile,
 } from '../../../tests/mocks/file-system'
 
 const AGENTS_REL = join(AUTONOMY_DIR, 'AGENTS.md')
 const HEARTBEAT_REL = join(AUTONOMY_DIR, 'HEARTBEAT.md')
+const RUNS_REL = join(AUTONOMY_DIR, 'runs.json')
 
 let tempDir = ''
 
@@ -95,7 +103,9 @@ describe('autonomyRuns', () => {
       ownerKey: 'main-thread',
       sourceId: 'cron-1',
       sourceLabel: 'nightly-report',
+      ownerProcessId: process.pid,
     })
+    expect(runs[0]?.ownerSessionId).toBeString()
     expect(flows).toHaveLength(0)
     expect(resolveAutonomyRunsPath(tempDir)).toContain('.claude')
   })
@@ -118,7 +128,7 @@ describe('autonomyRuns', () => {
     expect(command!.value).toContain('nested authority')
   })
 
-  test('markAutonomyRunRunning/completed/failed update persisted lifecycle state for plain runs', async () => {
+  test('markAutonomyRunRunning/completed update persisted lifecycle state for plain runs', async () => {
     const command = await createAutonomyQueuedPrompt({
       basePrompt: '<tick>12:00:00</tick>',
       trigger: 'proactive-tick',
@@ -134,7 +144,9 @@ describe('autonomyRuns', () => {
       runId,
       status: 'running',
       startedAt: 100,
+      ownerProcessId: process.pid,
     })
+    expect(runs[0]?.ownerSessionId).toBeString()
 
     await markAutonomyRunCompleted(runId, tempDir, 200)
     runs = await listAutonomyRuns(tempDir)
@@ -143,9 +155,22 @@ describe('autonomyRuns', () => {
       status: 'completed',
       endedAt: 200,
     })
+  })
 
+  test('markAutonomyRunFailed updates a non-terminal run', async () => {
+    const command = await createAutonomyQueuedPrompt({
+      basePrompt: '<tick>12:00:00</tick>',
+      trigger: 'proactive-tick',
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(command).not.toBeNull()
+    const runId = command!.autonomy!.runId
+
+    await markAutonomyRunRunning(runId, tempDir, 100)
     await markAutonomyRunFailed(runId, 'boom', tempDir, 300)
-    runs = await listAutonomyRuns(tempDir)
+    const runs = await listAutonomyRuns(tempDir)
+
     expect(runs[0]).toMatchObject({
       runId,
       status: 'failed',
@@ -154,6 +179,346 @@ describe('autonomyRuns', () => {
     })
   })
 
+  test('terminal runs are not revived by stale lifecycle updates', async () => {
+    const command = await createAutonomyQueuedPrompt({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(command).not.toBeNull()
+    const runId = command!.autonomy!.runId
+
+    await markAutonomyRunCancelled(runId, tempDir, 100)
+    const revived = await markAutonomyRunRunning(runId, tempDir, 200)
+    const completed = await markAutonomyRunCompleted(runId, tempDir, 300)
+    const failed = await markAutonomyRunFailed(
+      runId,
+      'late failure',
+      tempDir,
+      400,
+    )
+    const persisted = await getAutonomyRunById(runId, tempDir)
+
+    expect(revived).toBeNull()
+    expect(completed).toBeNull()
+    expect(failed).toBeNull()
+    expect(persisted).toMatchObject({
+      status: 'cancelled',
+      endedAt: 100,
+    })
+    expect(persisted!.error).toBeUndefined()
+  })
+
+  test('hasActiveAutonomyRunForSource only treats queued and running scheduled runs as active', async () => {
+    const command = await createAutonomyQueuedPrompt({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+      sourceId: 'cron-1',
+      sourceLabel: 'nightly',
+    })
+    expect(command).not.toBeNull()
+    const runId = command!.autonomy!.runId
+
+    await expect(
+      hasActiveAutonomyRunForSource({
+        trigger: 'scheduled-task',
+        sourceId: 'cron-1',
+        rootDir: tempDir,
+      }),
+    ).resolves.toBe(true)
+
+    await markAutonomyRunRunning(runId, tempDir, 100)
+    await expect(
+      hasActiveAutonomyRunForSource({
+        trigger: 'scheduled-task',
+        sourceId: 'cron-1',
+        rootDir: tempDir,
+      }),
+    ).resolves.toBe(true)
+
+    await expect(
+      hasActiveAutonomyRunForSource({
+        trigger: 'scheduled-task',
+        sourceId: 'cron-2',
+        rootDir: tempDir,
+      }),
+    ).resolves.toBe(false)
+
+    await markAutonomyRunCompleted(runId, tempDir, 200)
+    await expect(
+      hasActiveAutonomyRunForSource({
+        trigger: 'scheduled-task',
+        sourceId: 'cron-1',
+        rootDir: tempDir,
+      }),
+    ).resolves.toBe(false)
+
+    const failedCommand = await createAutonomyQueuedPrompt({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+      sourceId: 'cron-1',
+    })
+    expect(failedCommand).not.toBeNull()
+    await markAutonomyRunFailed(
+      failedCommand!.autonomy!.runId,
+      'boom',
+      tempDir,
+      300,
+    )
+    await expect(
+      hasActiveAutonomyRunForSource({
+        trigger: 'scheduled-task',
+        sourceId: 'cron-1',
+        rootDir: tempDir,
+      }),
+    ).resolves.toBe(false)
+  })
+
+  test('createAutonomyQueuedPromptIfNoActiveSource atomically skips duplicate active scheduled sources', async () => {
+    const [first, second] = await Promise.all([
+      createAutonomyQueuedPromptIfNoActiveSource({
+        basePrompt: 'scheduled prompt',
+        trigger: 'scheduled-task',
+        rootDir: tempDir,
+        currentDir: tempDir,
+        sourceId: 'cron-1',
+      }),
+      createAutonomyQueuedPromptIfNoActiveSource({
+        basePrompt: 'scheduled prompt',
+        trigger: 'scheduled-task',
+        rootDir: tempDir,
+        currentDir: tempDir,
+        sourceId: 'cron-1',
+      }),
+    ])
+
+    const created = [first, second].filter(command => command !== null)
+    const runs = await listAutonomyRuns(tempDir)
+
+    expect(created).toHaveLength(1)
+    expect(runs).toHaveLength(1)
+    expect(runs[0]).toMatchObject({
+      trigger: 'scheduled-task',
+      status: 'queued',
+      sourceId: 'cron-1',
+    })
+  })
+
+  test('createAutonomyQueuedPromptIfNoActiveSource scopes dedup by ownerKey', async () => {
+    const first = await createAutonomyQueuedPromptIfNoActiveSource({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+      sourceId: 'cron-1',
+      ownerKey: 'owner-a',
+    })
+    const second = await createAutonomyQueuedPromptIfNoActiveSource({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+      sourceId: 'cron-1',
+      ownerKey: 'owner-b',
+    })
+
+    const runs = await listAutonomyRuns(tempDir)
+
+    expect(first).not.toBeNull()
+    expect(second).not.toBeNull()
+    expect(runs).toHaveLength(2)
+    expect(new Set(runs.map(run => run.ownerKey))).toEqual(
+      new Set(['owner-a', 'owner-b']),
+    )
+  })
+
+  test('createAutonomyQueuedPromptIfNoActiveSource does not advance heartbeat last-run state on dedup skip (two-phase commit invariant)', async () => {
+    await writeTempFile(
+      tempDir,
+      HEARTBEAT_REL,
+      [
+        'tasks:',
+        '  - name: inbox',
+        '    interval: 30m',
+        '    prompt: "Check inbox"',
+      ].join('\n'),
+    )
+
+    // Seed an active queued run for cron-1 so the next dedup attempt skips.
+    await writeTempFile(
+      tempDir,
+      RUNS_REL,
+      `${JSON.stringify(
+        {
+          runs: [
+            {
+              runId: 'preexisting-active',
+              runtime: 'automatic',
+              trigger: 'scheduled-task',
+              status: 'queued',
+              rootDir: tempDir,
+              currentDir: tempDir,
+              sourceId: 'cron-1',
+              promptPreview: 'still queued',
+              createdAt: 100,
+              ownerProcessId: process.pid,
+              ownerSessionId: 'self',
+            },
+          ],
+        },
+        null,
+        2,
+      )}\n`,
+    )
+
+    const skipped = await createAutonomyQueuedPromptIfNoActiveSource({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+      sourceId: 'cron-1',
+    })
+    expect(skipped).toBeNull()
+
+    // If the dedup skip wrongly advanced heartbeat state, the next
+    // proactive-tick prompt would NOT include the inbox task. Verify it
+    // still does.
+    const followUp = await createAutonomyQueuedPrompt({
+      basePrompt: '<tick>12:00:00</tick>',
+      trigger: 'proactive-tick',
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(followUp).not.toBeNull()
+    expect(followUp!.value).toContain('Due HEARTBEAT.md tasks:')
+    expect(followUp!.value).toContain('- inbox (30m): Check inbox')
+  })
+
+  test('createAutonomyQueuedPromptIfNoActiveSource recovers stale active runs from dead owner processes', async () => {
+    await writeTempFile(
+      tempDir,
+      RUNS_REL,
+      `${JSON.stringify(
+        {
+          runs: [
+            {
+              runId: 'stale-run',
+              runtime: 'automatic',
+              trigger: 'scheduled-task',
+              status: 'running',
+              rootDir: tempDir,
+              currentDir: tempDir,
+              sourceId: 'cron-1',
+              sourceLabel: 'nightly',
+              promptPreview: 'stale scheduled prompt',
+              createdAt: 100,
+              startedAt: 100,
+              ownerProcessId: 2_147_483_647,
+              ownerSessionId: 'dead-session',
+            },
+          ],
+        },
+        null,
+        2,
+      )}\n`,
+    )
+
+    await expect(
+      hasActiveAutonomyRunForSource({
+        trigger: 'scheduled-task',
+        sourceId: 'cron-1',
+        rootDir: tempDir,
+      }),
+    ).resolves.toBe(false)
+
+    const command = await createAutonomyQueuedPromptIfNoActiveSource({
+      basePrompt: 'scheduled prompt',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+      sourceId: 'cron-1',
+    })
+    const runs = await listAutonomyRuns(tempDir)
+
+    expect(command).not.toBeNull()
+    expect(runs).toHaveLength(2)
+    expect(runs[0]).toMatchObject({
+      trigger: 'scheduled-task',
+      status: 'queued',
+      sourceId: 'cron-1',
+      ownerProcessId: process.pid,
+    })
+    expect(runs[1]).toMatchObject({
+      runId: 'stale-run',
+      status: 'failed',
+      endedAt: runs[0]?.createdAt,
+      error: expect.stringContaining('owner process 2147483647'),
+    })
+  })
+
+  test('stale managed-flow run recovery also marks the flow step failed', async () => {
+    const command = await startManagedAutonomyFlowFromHeartbeatTask({
+      task: {
+        name: 'weekly-report',
+        interval: '7d',
+        prompt: 'Ship the weekly report',
+        steps: [
+          {
+            name: 'gather',
+            prompt: 'Gather weekly inputs',
+          },
+        ],
+      },
+      rootDir: tempDir,
+      currentDir: tempDir,
+    })
+    expect(command).not.toBeNull()
+    const runId = command!.autonomy!.runId
+    await markAutonomyRunRunning(runId, tempDir, 100)
+
+    const runsPath = resolveAutonomyRunsPath(tempDir)
+    const file = JSON.parse(await readTempFile(runsPath)) as {
+      runs: Array<Record<string, unknown>>
+    }
+    file.runs = file.runs.map(run =>
+      run.runId === runId
+        ? { ...run, ownerProcessId: 2_147_483_647 }
+        : run,
+    )
+    await writeTempFile(tempDir, RUNS_REL, `${JSON.stringify(file, null, 2)}\n`)
+
+    const replacement = await createAutonomyQueuedPromptIfNoActiveSource({
+      basePrompt: 'replacement prompt',
+      trigger: 'managed-flow-step',
+      rootDir: tempDir,
+      currentDir: tempDir,
+      sourceId: command!.autonomy!.sourceId!,
+      ownerKey: 'main-thread',
+    })
+    const [flow] = await listAutonomyFlows(tempDir)
+    const runs = await listAutonomyRuns(tempDir)
+
+    expect(replacement).not.toBeNull()
+    expect(runs.find(run => run.runId === runId)).toMatchObject({
+      status: 'failed',
+      error: expect.stringContaining(STALE_ACTIVE_RUN_ERROR_PREFIX),
+    })
+    expect(flow).toMatchObject({
+      status: 'failed',
+      blockedRunId: runId,
+    })
+    expect(flow?.stateJson?.steps[0]).toMatchObject({
+      status: 'failed',
+      runId,
+      error: expect.stringContaining(STALE_ACTIVE_RUN_ERROR_PREFIX),
+    })
+  })
+
   test('formatters produce readable status and run listings', async () => {
     const first = await createAutonomyQueuedPrompt({
       basePrompt: 'scheduled prompt',
@@ -223,11 +588,56 @@ describe('autonomyRuns', () => {
     )
   })
 
+  test('persistence pruning keeps active runs ahead of recent completed history', async () => {
+    const runs = [
+      {
+        runId: 'old-active',
+        runtime: 'automatic',
+        trigger: 'scheduled-task',
+        status: 'queued',
+        rootDir: tempDir,
+        currentDir: tempDir,
+        ownerKey: 'main-thread',
+        promptPreview: 'old active',
+        createdAt: 1,
+      },
+      ...Array.from({ length: 200 }, (_, index) => ({
+        runId: `history-${index}`,
+        runtime: 'automatic',
+        trigger: 'scheduled-task',
+        status: 'completed',
+        rootDir: tempDir,
+        currentDir: tempDir,
+        ownerKey: 'main-thread',
+        promptPreview: `history ${index}`,
+        createdAt: 1_000 + index,
+        endedAt: 2_000 + index,
+      })),
+    ]
+    await writeTempFile(
+      tempDir,
+      RUNS_REL,
+      `${JSON.stringify({ runs }, null, 2)}\n`,
+    )
+
+    await createAutonomyRun({
+      trigger: 'scheduled-task',
+      prompt: 'fresh active',
+      rootDir: tempDir,
+      currentDir: tempDir,
+      nowMs: 9_999,
+    })
+
+    const persisted = await listAutonomyRuns(tempDir)
+    expect(persisted).toHaveLength(200)
+    expect(persisted.some(run => run.runId === 'old-active')).toBe(true)
+    expect(persisted.some(run => run.runId === 'history-0')).toBe(false)
+  })
+
   test('listAutonomyRuns keeps older persisted records by normalizing missing runtime and owner metadata', async () => {
-    const runsPath = resolveAutonomyRunsPath(tempDir)
-    await mkdir(join(tempDir, '.claude', 'autonomy'), { recursive: true })
-    await writeFile(
-      runsPath,
+    await writeTempFile(
+      tempDir,
+      RUNS_REL,
       `${JSON.stringify(
         {
           runs: [
@@ -244,7 +654,6 @@ describe('autonomyRuns', () => {
         null,
         2,
       )}\n`,
-      'utf-8',
     )
 
     const [legacy] = await listAutonomyRuns(tempDir)
@@ -418,4 +827,27 @@ describe('autonomyRuns', () => {
     expect(recovered!.autonomy?.runId).toBe(command!.autonomy?.runId)
     expect(recovered!.autonomy?.flowId).toBe(flow!.flowId)
   })
+
+  test('STALE_ACTIVE_RUN_ERROR_PREFIX stays in sync with HEARTBEAT.md stale-recovery-health task', async () => {
+    // The HEARTBEAT.md stale-recovery-health task prompt embeds this prefix
+    // as a literal string. Changing the constant without updating the
+    // heartbeat prompt would silently break the monitor — this test fails
+    // first to force the simultaneous update.
+    const heartbeatPath = resolvePath(
+      import.meta.dir,
+      '..',
+      '..',
+      '..',
+      '.claude',
+      'autonomy',
+      'HEARTBEAT.md',
+    )
+    if (!(await tempPathExists(heartbeatPath))) {
+      // .claude/ may be absent in some checkout layouts (e.g., shallow clone
+      // for npm pack). Skip rather than fail in that case.
+      return
+    }
+    const content = await readTempFile(heartbeatPath)
+    expect(content).toContain(STALE_ACTIVE_RUN_ERROR_PREFIX)
+  })
 })
diff --git a/src/utils/autonomyAuthority.ts b/src/utils/autonomyAuthority.ts
index c604d30499..cd5326f602 100644
--- a/src/utils/autonomyAuthority.ts
+++ b/src/utils/autonomyAuthority.ts
@@ -133,11 +133,50 @@ function mergeAgentsAuthority(files: AutonomyAuthorityFile[]): string | null {
     .join('\n\n')
 }
 
+/**
+ * Replaces fenced code-block content (and the ``` / ~~~ fence delimiters
+ * themselves) with empty strings while preserving the index of every
+ * other line. Used by the heartbeat parser so that `tasks:` literals
+ * appearing inside Markdown code samples in HEARTBEAT.md docs do not
+ * collide with the real config block.
+ */
+function maskCodeFencedLines(lines: string[]): string[] {
+  const masked = lines.slice()
+  let activeFenceChar: '`' | '~' | null = null
+  let activeFenceLen = 0
+  for (let i = 0; i < masked.length; i++) {
+    const trimmed = masked[i]!.trim()
+    const fenceMatch = trimmed.match(/^([`~])\1{2,}/)
+    if (fenceMatch) {
+      const fenceChar = fenceMatch[1]! as '`' | '~'
+      const fenceLen = fenceMatch[0]!.length
+      const trailing = trimmed.slice(fenceLen)
+      if (activeFenceChar === null) {
+        activeFenceChar = fenceChar
+        activeFenceLen = fenceLen
+      } else if (
+        activeFenceChar === fenceChar &&
+        fenceLen >= activeFenceLen &&
+        trailing.trim() === ''
+      ) {
+        activeFenceChar = null
+        activeFenceLen = 0
+      }
+      masked[i] = ''
+      continue
+    }
+    if (activeFenceChar !== null) {
+      masked[i] = ''
+    }
+  }
+  return masked
+}
+
 export function parseHeartbeatAuthorityTasks(
   content: string,
 ): HeartbeatAuthorityTask[] {
   const tasks: HeartbeatAuthorityTask[] = []
-  const lines = content.split('\n')
+  const lines = maskCodeFencedLines(content.split('\n'))
   const getIndent = (line: string): number =>
     line.length - line.trimStart().length
   const parseScalar = (line: string, key: string): string =>
diff --git a/src/utils/autonomyFlows.ts b/src/utils/autonomyFlows.ts
index c67d5c0584..989dd851f1 100644
--- a/src/utils/autonomyFlows.ts
+++ b/src/utils/autonomyFlows.ts
@@ -3,7 +3,10 @@ import { mkdir, writeFile } from 'fs/promises'
 import { dirname, join, resolve } from 'path'
 import { getProjectRoot } from '../bootstrap/state.js'
 import { AUTONOMY_DIR, type AutonomyTriggerKind } from './autonomyAuthority.js'
-import { withAutonomyPersistenceLock } from './autonomyPersistence.js'
+import {
+  retainActiveFirst,
+  withAutonomyPersistenceLock,
+} from './autonomyPersistence.js'
 import { getFsImplementation } from './fsOperations.js'
 
 const AUTONOMY_FLOWS_MAX = 100
@@ -83,6 +86,20 @@ export type AutonomyFlowRecord = {
   waitJson?: AutonomyFlowWaitState
   cancelRequestedAt?: number
   lastError?: string
+  /**
+   * Repo-relative POSIX glob patterns describing which paths this flow's
+   * `report`-step approval covers. The pre-tool-use hook
+   * `require-plan-for-risky-edit.mjs` consults this list to permit edits
+   * only when the target file matches at least one entry. Absent or empty
+   * means "no boundary declared" — during the pilot window the hook
+   * treats this as broad approval (v1 behaviour). Once all production
+   * flows declare boundaries, the hook will deny absent-boundary flows.
+   *
+   * Supported syntax: `*` matches one path segment, `**` matches any
+   * number including zero. Examples: `src/utils/autonomy*`,
+   * `src/services/api/**`, `src/Tool.ts`.
+   */
+  boundary?: string[]
 }
 
 type AutonomyFlowsFile = {
@@ -138,6 +155,7 @@ function cloneWaitState(
 function cloneFlowRecord(flow: AutonomyFlowRecord): AutonomyFlowRecord {
   return {
     ...flow,
+    ...(flow.boundary ? { boundary: [...flow.boundary] } : {}),
     ...(flow.stateJson ? { stateJson: cloneManagedState(flow.stateJson) } : {}),
     ...(flow.waitJson ? { waitJson: cloneWaitState(flow.waitJson) } : {}),
   }
@@ -152,6 +170,17 @@ function isManagedFlowStatusActive(status: AutonomyFlowStatus): boolean {
   )
 }
 
+function selectPersistedAutonomyFlows(
+  flows: AutonomyFlowRecord[],
+): AutonomyFlowRecord[] {
+  return retainActiveFirst(
+    flows.map(cloneFlowRecord),
+    flow => isManagedFlowStatusActive(flow.status),
+    flow => flow.updatedAt,
+    AUTONOMY_FLOWS_MAX,
+  )
+}
+
 function defaultFlowSource(params: {
   trigger: AutonomyTriggerKind
   sourceId?: string
@@ -237,6 +266,35 @@ function normalizeWaitState(value: unknown): AutonomyFlowWaitState | undefined {
   }
 }
 
+function isPosixBoundaryGlob(value: string): boolean {
+  if (!value || value.startsWith('/') || value.includes('\\')) {
+    return false
+  }
+  if (value.includes('\0')) {
+    return false
+  }
+  return !value.split('/').some(segment => segment === '..')
+}
+
+function normalizeBoundary(value: unknown): string[] | undefined {
+  if (!Array.isArray(value)) {
+    return undefined
+  }
+  const seen = new Set<string>()
+  const boundary = value
+    .filter((entry): entry is string => typeof entry === 'string')
+    .map(entry => entry.trim())
+    .filter(isPosixBoundaryGlob)
+    .filter(entry => {
+      if (seen.has(entry)) {
+        return false
+      }
+      seen.add(entry)
+      return true
+    })
+  return boundary.length > 0 ? boundary : undefined
+}
+
 function normalizeFlowRecord(flow: AutonomyFlowRecord): AutonomyFlowRecord {
   const source = defaultFlowSource(flow)
   return {
@@ -247,6 +305,7 @@ function normalizeFlowRecord(flow: AutonomyFlowRecord): AutonomyFlowRecord {
     goal: flow.goal || flow.sourceLabel || flow.sourceId || flow.flowKey,
     currentDir: flow.currentDir || flow.rootDir,
     runCount: Math.max(flow.runCount ?? 0, 0),
+    boundary: normalizeBoundary(flow.boundary),
     stateJson: normalizeManagedState(flow.stateJson),
     waitJson: normalizeWaitState(flow.waitJson),
     ...(flow.sourceId
@@ -369,11 +428,7 @@ async function writeAutonomyFlows(
     path,
     `${JSON.stringify(
       {
-        flows: flows
-          .slice()
-          .map(cloneFlowRecord)
-          .sort((left, right) => right.updatedAt - left.updatedAt)
-          .slice(0, AUTONOMY_FLOWS_MAX),
+        flows: selectPersistedAutonomyFlows(flows),
       } satisfies AutonomyFlowsFile,
       null,
       2,
@@ -420,6 +475,7 @@ export async function startManagedAutonomyFlow(params: {
   ownerKey?: string
   sourceId?: string
   sourceLabel?: string
+  boundary?: string[]
   nowMs?: number
 }): Promise<ManagedAutonomyFlowStartResult | null> {
   if (params.steps.length === 0) {
@@ -450,6 +506,8 @@ export async function startManagedAutonomyFlow(params: {
 
     const stateJson = buildManagedState(params.steps)
     const firstStep = stateJson.steps[0]!
+    const boundary =
+      normalizeBoundary(params.boundary) ?? normalizeBoundary(current?.boundary)
     const waiting =
       firstStep.waitFor != null
         ? {
@@ -474,6 +532,7 @@ export async function startManagedAutonomyFlow(params: {
       currentDir,
       ...(source.sourceId ? { sourceId: source.sourceId } : {}),
       ...(source.sourceLabel ? { sourceLabel: source.sourceLabel } : {}),
+      ...(boundary ? { boundary } : {}),
       latestRunId: undefined,
       runCount: current?.runCount ?? 0,
       createdAt: current?.createdAt ?? nowMs,
diff --git a/src/utils/autonomyPersistence.ts b/src/utils/autonomyPersistence.ts
index 0a0ebef05e..4085a1a9a3 100644
--- a/src/utils/autonomyPersistence.ts
+++ b/src/utils/autonomyPersistence.ts
@@ -4,6 +4,42 @@ import { lock } from './lockfile.js'
 
 const persistenceLocks = new Map<string, Promise<void>>()
 
+/**
+ * Two-phase persistence retention. Active records (queued/running, etc.) are
+ * always kept — capping them risks evicting in-flight work; that responsibility
+ * lives in caller-side leak detection. Inactive (terminal) records are ranked
+ * by `getTimestamp` desc and capped to fill the remaining budget below `max`.
+ *
+ * Returned list is sorted by `getTimestamp` desc regardless of activity, so
+ * the persisted file is plain reverse-chronological order — listings/UI can
+ * consume it directly without re-sorting.
+ */
+export function retainActiveFirst<T>(
+  records: readonly T[],
+  isActive: (record: T) => boolean,
+  getTimestamp: (record: T) => number,
+  max: number,
+): T[] {
+  const sortDesc = (left: T, right: T) =>
+    getTimestamp(right) - getTimestamp(left)
+  const active = records.filter(isActive).slice().sort(sortDesc)
+  const history = records
+    .filter(record => !isActive(record))
+    .slice()
+    .sort(sortDesc)
+    .slice(0, Math.max(0, max - active.length))
+  return [...active, ...history].sort(sortDesc)
+}
+
+export function getAutonomyPersistenceLockCountForTests(): number {
+  if (process.env.NODE_ENV !== 'test') {
+    throw new Error(
+      'getAutonomyPersistenceLockCountForTests can only be called in tests',
+    )
+  }
+  return persistenceLocks.size
+}
+
 export async function withAutonomyPersistenceLock<T>(
   rootDir: string,
   fn: () => Promise<T>,
@@ -16,10 +52,8 @@ export async function withAutonomyPersistenceLock<T>(
   const current = new Promise<void>(resolve => {
     release = resolve
   })
-  persistenceLocks.set(
-    key,
-    previous.then(() => current),
-  )
+  const chained = previous.then(() => current)
+  persistenceLocks.set(key, chained)
 
   await previous
   try {
@@ -41,7 +75,7 @@ export async function withAutonomyPersistenceLock<T>(
     }
   } finally {
     release()
-    if (persistenceLocks.get(key) === current) {
+    if (persistenceLocks.get(key) === chained) {
       persistenceLocks.delete(key)
     }
   }
diff --git a/src/utils/autonomyQueueLifecycle.ts b/src/utils/autonomyQueueLifecycle.ts
new file mode 100644
index 0000000000..efc796b6a0
--- /dev/null
+++ b/src/utils/autonomyQueueLifecycle.ts
@@ -0,0 +1,261 @@
+import type { QueuedCommand } from '../types/textInputTypes.js'
+import {
+  finalizeAutonomyRunCompleted,
+  finalizeAutonomyRunFailed,
+  listAutonomyRuns,
+  markAutonomyRunCancelled,
+  markAutonomyRunRunning,
+} from './autonomyRuns.js'
+
+export type AutonomyQueuePartition = {
+  attachmentCommands: QueuedCommand[]
+  staleCommands: QueuedCommand[]
+}
+
+export type AutonomyQueueClaim = AutonomyQueuePartition & {
+  claimedRunIds: string[]
+  claimedCommands: QueuedCommand[]
+}
+
+export type AutonomyTurnOutcome =
+  | { type: 'completed' }
+  | { type: 'cancelled' }
+  | { type: 'failed'; error?: unknown; message?: string }
+
+type AutonomyRunRef = {
+  runId: string
+  rootDir?: string
+}
+
+function getCommandRootDir(
+  command: QueuedCommand,
+  fallbackRootDir?: string,
+): string | undefined {
+  return command.autonomy?.rootDir ?? fallbackRootDir
+}
+
+function refKey(ref: AutonomyRunRef): string {
+  return `${ref.rootDir ?? ''}\0${ref.runId}`
+}
+
+function getAutonomyRunRefs(
+  commands: QueuedCommand[],
+  fallbackRootDir?: string,
+): AutonomyRunRef[] {
+  const refs = new Map<string, AutonomyRunRef>()
+  for (const command of commands) {
+    const runId = command.autonomy?.runId
+    if (!runId) {
+      continue
+    }
+    const ref = {
+      runId,
+      rootDir: getCommandRootDir(command, fallbackRootDir),
+    }
+    refs.set(refKey(ref), ref)
+  }
+  return [...refs.values()]
+}
+
+function isInlineQueuedCommand(command: QueuedCommand): boolean {
+  return command.mode === 'prompt' || command.mode === 'task-notification'
+}
+
+function groupRefsByRootDir(
+  refs: AutonomyRunRef[],
+): Map<string, AutonomyRunRef[]> {
+  const grouped = new Map<string, AutonomyRunRef[]>()
+  for (const ref of refs) {
+    const key = ref.rootDir ?? ''
+    const group = grouped.get(key)
+    if (group) {
+      group.push(ref)
+    } else {
+      grouped.set(key, [ref])
+    }
+  }
+  return grouped
+}
+
+/**
+ * Exclude queued autonomy commands whose persisted run is no longer queued.
+ * This prevents stale in-memory commands from reviving flows after cancellation
+ * or after another path has already consumed the run.
+ */
+export async function partitionConsumableQueuedAutonomyCommands(
+  commands: QueuedCommand[],
+  rootDir?: string,
+): Promise<AutonomyQueuePartition> {
+  const attachmentCommands: QueuedCommand[] = []
+  const staleCommands: QueuedCommand[] = []
+  const refs = getAutonomyRunRefs(commands, rootDir)
+  const runsByRef = new Map<
+    string,
+    Awaited<ReturnType<typeof listAutonomyRuns>>[number]
+  >()
+  for (const [rootKey, group] of groupRefsByRootDir(refs)) {
+    const runs = await listAutonomyRuns(rootKey || undefined)
+    const wanted = new Set(group.map(ref => ref.runId))
+    for (const run of runs) {
+      if (wanted.has(run.runId)) {
+        runsByRef.set(
+          refKey({ runId: run.runId, rootDir: rootKey || undefined }),
+          run,
+        )
+      }
+    }
+  }
+
+  for (const command of commands) {
+    const runId = command.autonomy?.runId
+    if (!runId) {
+      attachmentCommands.push(command)
+      continue
+    }
+
+    const commandRootDir = getCommandRootDir(command, rootDir)
+    const run = runsByRef.get(refKey({ runId, rootDir: commandRootDir }))
+    if (run?.status === 'queued' && !run.startedAt && !run.endedAt) {
+      attachmentCommands.push(command)
+    } else {
+      staleCommands.push(command)
+    }
+  }
+
+  return { attachmentCommands, staleCommands }
+}
+
+export async function claimConsumableQueuedAutonomyCommands(
+  commands: QueuedCommand[],
+  rootDir?: string,
+): Promise<AutonomyQueueClaim> {
+  const partition = await partitionConsumableQueuedAutonomyCommands(
+    commands,
+    rootDir,
+  )
+  const claimedRunIds: string[] = []
+  const claimedRunKeys: string[] = []
+  const staleRunKeys = new Set<string>()
+  const candidateRefs = getAutonomyRunRefs(
+    partition.attachmentCommands.filter(isInlineQueuedCommand),
+    rootDir,
+  )
+
+  for (const ref of candidateRefs) {
+    const updated = await markAutonomyRunRunning(ref.runId, ref.rootDir)
+    if (updated?.status === 'running') {
+      claimedRunIds.push(ref.runId)
+      claimedRunKeys.push(refKey(ref))
+    } else {
+      staleRunKeys.add(refKey(ref))
+    }
+  }
+
+  const claimedRunKeySet = new Set(claimedRunKeys)
+  const attachmentCommands: QueuedCommand[] = []
+  const claimedCommands: QueuedCommand[] = []
+  const staleCommands = [...partition.staleCommands]
+
+  for (const command of partition.attachmentCommands) {
+    const runId = command.autonomy?.runId
+    if (!runId) {
+      attachmentCommands.push(command)
+      continue
+    }
+    const key = refKey({
+      runId,
+      rootDir: getCommandRootDir(command, rootDir),
+    })
+    if (claimedRunKeySet.has(key)) {
+      attachmentCommands.push(command)
+      claimedCommands.push(command)
+    } else if (staleRunKeys.has(key)) {
+      staleCommands.push(command)
+    }
+  }
+
+  return {
+    attachmentCommands,
+    staleCommands,
+    claimedRunIds,
+    claimedCommands,
+  }
+}
+
+export async function cancelQueuedAutonomyCommands(params: {
+  commands: QueuedCommand[]
+  rootDir?: string
+}): Promise<void> {
+  for (const ref of getAutonomyRunRefs(params.commands, params.rootDir)) {
+    await markAutonomyRunCancelled(ref.runId, ref.rootDir)
+  }
+}
+
+function stringifyAutonomyError(error: unknown): string {
+  if (typeof error === 'string') {
+    return error
+  }
+  if (error instanceof Error) {
+    return error.message
+  }
+  return String(error)
+}
+
+export function sanitizeAutonomyFailureForPersistence(
+  error: unknown,
+  fallback = 'query failed',
+): string {
+  const message = stringifyAutonomyError(error)
+  const lower = message.toLowerCase()
+  if (
+    lower.includes('api_error') ||
+    lower.includes('provider') ||
+    lower.includes('openai') ||
+    lower.includes('gemini') ||
+    lower.includes('grok') ||
+    lower.includes('anthropic') ||
+    lower.includes('bedrock') ||
+    lower.includes('vertex')
+  ) {
+    return 'provider api_error'
+  }
+  return fallback
+}
+
+export async function finalizeAutonomyCommandsForTurn(params: {
+  commands: QueuedCommand[]
+  outcome: AutonomyTurnOutcome
+  currentDir?: string
+  priority?: 'now' | 'next' | 'later'
+  workload?: string
+}): Promise<QueuedCommand[]> {
+  const nextCommands: QueuedCommand[] = []
+  for (const command of params.commands) {
+    const autonomy = command.autonomy
+    if (!autonomy?.runId) {
+      continue
+    }
+    if (params.outcome.type === 'completed') {
+      nextCommands.push(
+        ...(await finalizeAutonomyRunCompleted({
+          runId: autonomy.runId,
+          rootDir: autonomy.rootDir,
+          currentDir: params.currentDir,
+          priority: params.priority,
+          workload: command.workload ?? params.workload,
+        })),
+      )
+    } else if (params.outcome.type === 'cancelled') {
+      await markAutonomyRunCancelled(autonomy.runId, autonomy.rootDir)
+    } else {
+      await finalizeAutonomyRunFailed({
+        runId: autonomy.runId,
+        rootDir: autonomy.rootDir,
+        error:
+          params.outcome.message ??
+          sanitizeAutonomyFailureForPersistence(params.outcome.error),
+      })
+    }
+  }
+  return nextCommands
+}
diff --git a/src/utils/autonomyRuns.ts b/src/utils/autonomyRuns.ts
index 02ce08c201..d850be9289 100644
--- a/src/utils/autonomyRuns.ts
+++ b/src/utils/autonomyRuns.ts
@@ -1,7 +1,7 @@
 import { randomUUID } from 'crypto'
 import { mkdir, writeFile } from 'fs/promises'
 import { dirname, join, resolve } from 'path'
-import { getProjectRoot } from '../bootstrap/state.js'
+import { getProjectRoot, getSessionId } from '../bootstrap/state.js'
 import type { MessageOrigin } from '../types/message.js'
 import type { QueuedCommand } from '../types/textInputTypes.js'
 import {
@@ -27,11 +27,34 @@ import {
   type AutonomyFlowSyncMode,
   type ManagedAutonomyFlowStepDefinition,
 } from './autonomyFlows.js'
-import { withAutonomyPersistenceLock } from './autonomyPersistence.js'
+import {
+  retainActiveFirst,
+  withAutonomyPersistenceLock,
+} from './autonomyPersistence.js'
 import { getFsImplementation } from './fsOperations.js'
+import { isProcessRunning } from './genericProcessUtils.js'
+import { logError } from './log.js'
 
 const AUTONOMY_RUNS_MAX = 200
+// Diagnostic threshold for active (queued/running) runs. Active records are
+// deliberately exempt from AUTONOMY_RUNS_MAX so a leak in finalization cannot
+// silently evict in-flight work; that exemption only makes sense if a leak is
+// loud when it appears. Crossing this threshold warns once per process so
+// operators see the divergence in logs before runs.json grows pathologically.
+const AUTONOMY_ACTIVE_RUNS_WARN_THRESHOLD = 100
+let warnedActiveRunsThresholdCrossed = false
 const AUTONOMY_RUNS_RELATIVE_PATH = join(AUTONOMY_DIR, 'runs.json')
+// Sentinel string surfaced to operators via runs.json error fields and
+// referenced literally by the HEARTBEAT.md `stale-recovery-health` task.
+// A unit test asserts the HEARTBEAT.md file contains this exact prefix —
+// changing the value will fail the test, forcing the heartbeat prompt
+// to be updated in the same change.
+export const STALE_ACTIVE_RUN_ERROR_PREFIX =
+  'Recovered stale active autonomy run'
+
+// Guards the legacy-block warning so it fires once per (process, runId) instead
+// of every dedup tick while a no-owner record sits there.
+const warnedLegacyBlockRunIds = new Set<string>()
 
 export type AutonomyRunStatus =
   | 'queued'
@@ -59,6 +82,8 @@ export type AutonomyRunRecord = {
   flowStepName?: string
   promptPreview: string
   createdAt: number
+  ownerProcessId?: number
+  ownerSessionId?: string
   startedAt?: number
   endedAt?: number
   error?: string
@@ -77,6 +102,19 @@ type AutonomyRunFlowRef = {
   stepName: string
 }
 
+type CreateAutonomyRunParams = {
+  trigger: AutonomyTriggerKind
+  prompt: string
+  rootDir?: string
+  currentDir?: string
+  sourceId?: string
+  sourceLabel?: string
+  runtime?: AutonomyRunRuntime
+  ownerKey?: string
+  flow?: AutonomyRunFlowRef
+  nowMs?: number
+}
+
 function truncatePromptPreview(prompt: string): string {
   const singleLine = prompt.replace(/\s+/g, ' ').trim()
   return singleLine.length <= 240
@@ -95,6 +133,34 @@ function cloneRunRecord(run: AutonomyRunRecord): AutonomyRunRecord {
   return { ...run }
 }
 
+function isAutonomyRunActive(run: AutonomyRunRecord): boolean {
+  return run.status === 'queued' || run.status === 'running'
+}
+
+function selectPersistedAutonomyRuns(
+  runs: AutonomyRunRecord[],
+): AutonomyRunRecord[] {
+  const cloned = runs.map(cloneRunRecord)
+  const activeCount = cloned.filter(isAutonomyRunActive).length
+  if (
+    !warnedActiveRunsThresholdCrossed &&
+    activeCount >= AUTONOMY_ACTIVE_RUNS_WARN_THRESHOLD
+  ) {
+    warnedActiveRunsThresholdCrossed = true
+    logError(
+      new Error(
+        `autonomy: ${activeCount} active runs exceed warn threshold ${AUTONOMY_ACTIVE_RUNS_WARN_THRESHOLD}; check for finalize leaks`,
+      ),
+    )
+  }
+  return retainActiveFirst(
+    cloned,
+    isAutonomyRunActive,
+    run => run.createdAt,
+    AUTONOMY_RUNS_MAX,
+  )
+}
+
 function normalizePersistedRunRecord(
   run: PersistedAutonomyRunRecord,
 ): AutonomyRunRecord {
@@ -157,11 +223,7 @@ async function writeAutonomyRuns(
     path,
     `${JSON.stringify(
       {
-        runs: runs
-          .slice()
-          .map(cloneRunRecord)
-          .sort((left, right) => right.createdAt - left.createdAt)
-          .slice(0, AUTONOMY_RUNS_MAX),
+        runs: selectPersistedAutonomyRuns(runs),
       } satisfies AutonomyRunsFile,
       null,
       2,
@@ -172,7 +234,7 @@ async function writeAutonomyRuns(
 
 async function updateAutonomyRun(
   runId: string,
-  updater: (current: AutonomyRunRecord) => AutonomyRunRecord,
+  updater: (current: AutonomyRunRecord) => AutonomyRunRecord | null,
   rootDir: string = getProjectRoot(),
 ): Promise<AutonomyRunRecord | null> {
   return withAutonomyPersistenceLock(rootDir, async () => {
@@ -181,7 +243,11 @@ async function updateAutonomyRun(
     if (index === -1) {
       return null
     }
-    const updated = cloneRunRecord(updater(cloneRunRecord(runs[index]!)))
+    const next = updater(cloneRunRecord(runs[index]!))
+    if (!next) {
+      return null
+    }
+    const updated = cloneRunRecord(next)
     runs[index] = updated
     await writeAutonomyRuns(runs, rootDir)
     return updated
@@ -196,21 +262,112 @@ export async function getAutonomyRunById(
   return runs.find(run => run.runId === runId) ?? null
 }
 
-export async function createAutonomyRun(params: {
+function isActiveAutonomyRunStatus(status: AutonomyRunStatus): boolean {
+  return status === 'queued' || status === 'running'
+}
+
+function isValidOwnerProcessId(pid: number | undefined): pid is number {
+  // Reject non-numeric, negative, zero (Linux: send-to-process-group), and
+  // non-integer values. A forged record with pid=0 or pid<0 used to be
+  // treated as live and could permanently block dedup; treating them as
+  // stale closes that availability hole.
+  return (
+    typeof pid === 'number' &&
+    Number.isInteger(pid) &&
+    pid > 0 &&
+    pid <= 4_194_304
+  )
+}
+
+function isStaleActiveAutonomyRun(run: AutonomyRunRecord): boolean {
+  if (!isActiveAutonomyRunStatus(run.status)) {
+    return false
+  }
+  if (run.ownerProcessId === undefined) {
+    return false
+  }
+  if (!isValidOwnerProcessId(run.ownerProcessId)) {
+    return true
+  }
+  return !isProcessRunning(run.ownerProcessId)
+}
+
+function staleActiveRunError(run: AutonomyRunRecord): string {
+  return `${STALE_ACTIVE_RUN_ERROR_PREFIX}: owner process ${run.ownerProcessId} is no longer running.`
+}
+
+function failAutonomyRunRecord(
+  run: AutonomyRunRecord,
+  error: string,
+  nowMs: number,
+): AutonomyRunRecord {
+  return {
+    ...run,
+    status: 'failed',
+    endedAt: nowMs,
+    error,
+  }
+}
+
+function recoverStaleActiveAutonomyRun(
+  run: AutonomyRunRecord,
+  nowMs: number,
+): AutonomyRunRecord {
+  return failAutonomyRunRecord(run, staleActiveRunError(run), nowMs)
+}
+
+async function syncFailedManagedFlowForRun(
+  run: AutonomyRunRecord,
+  rootDir: string,
+): Promise<void> {
+  if (run.parentFlowId && run.parentFlowSyncMode === 'managed') {
+    await markManagedAutonomyFlowStepFailed({
+      flowId: run.parentFlowId,
+      runId: run.runId,
+      error: run.error ?? 'Autonomy run failed.',
+      rootDir,
+      nowMs: run.endedAt,
+    })
+  }
+}
+
+function matchesActiveAutonomyRunSource(
+  run: AutonomyRunRecord,
+  params: {
+    trigger: AutonomyTriggerKind
+    sourceId: string
+    ownerKey?: string
+  },
+): boolean {
+  return (
+    run.trigger === params.trigger &&
+    run.sourceId === params.sourceId &&
+    (params.ownerKey === undefined || run.ownerKey === params.ownerKey) &&
+    isActiveAutonomyRunStatus(run.status)
+  )
+}
+
+export async function hasActiveAutonomyRunForSource(params: {
   trigger: AutonomyTriggerKind
-  prompt: string
+  sourceId: string
   rootDir?: string
-  currentDir?: string
-  sourceId?: string
-  sourceLabel?: string
-  runtime?: AutonomyRunRuntime
   ownerKey?: string
-  flow?: AutonomyRunFlowRef
-  nowMs?: number
-}): Promise<AutonomyRunRecord> {
-  const rootDir = resolve(params.rootDir ?? getProjectRoot())
-  const currentDir = resolve(params.currentDir ?? rootDir)
-  const record: AutonomyRunRecord = {
+}): Promise<boolean> {
+  const runs = await listAutonomyRuns(params.rootDir)
+  return runs.some(
+    run =>
+      matchesActiveAutonomyRunSource(run, params) &&
+      !isStaleActiveAutonomyRun(run),
+  )
+}
+
+function buildAutonomyRunRecord(
+  params: CreateAutonomyRunParams,
+  rootDir: string,
+  currentDir: string,
+): AutonomyRunRecord {
+  const createdAt = params.nowMs ?? Date.now()
+  return {
     runId: randomUUID(),
     runtime: params.runtime ?? (params.flow ? 'flow_step' : 'automatic'),
     trigger: params.trigger,
@@ -231,13 +388,77 @@ export async function createAutonomyRun(params: {
         }
       : {}),
     promptPreview: truncatePromptPreview(params.prompt),
-    createdAt: params.nowMs ?? Date.now(),
+    createdAt,
+    ownerProcessId: process.pid,
+    ownerSessionId: getSessionId(),
   }
+}
+
+async function persistAutonomyRunRecord(
+  record: AutonomyRunRecord,
+  rootDir: string,
+  skipWhenActiveSource: boolean,
+): Promise<{
+  created: boolean
+  recoveredStaleRuns: AutonomyRunRecord[]
+}> {
+  let created = false
+  const recoveredStaleRuns: AutonomyRunRecord[] = []
   await withAutonomyPersistenceLock(rootDir, async () => {
     const runs = await listAutonomyRuns(rootDir)
+    const sourceId = record.sourceId
+    if (skipWhenActiveSource && sourceId) {
+      let hasBlockingActiveRun = false
+      let staleRecoveriesApplied = false
+      for (let i = 0; i < runs.length; i++) {
+        const run = runs[i]!
+        if (
+          !matchesActiveAutonomyRunSource(run, {
+            trigger: record.trigger,
+            sourceId,
+            ownerKey: record.ownerKey,
+          })
+        ) {
+          continue
+        }
+        if (isStaleActiveAutonomyRun(run)) {
+          const recovered = recoverStaleActiveAutonomyRun(run, record.createdAt)
+          runs[i] = recovered
+          recoveredStaleRuns.push(recovered)
+          staleRecoveriesApplied = true
+          continue
+        }
+        if (
+          run.ownerProcessId === undefined &&
+          !warnedLegacyBlockRunIds.has(run.runId)
+        ) {
+          warnedLegacyBlockRunIds.add(run.runId)
+          logError(
+            new Error(
+              `[autonomyRuns] blocked by legacy un-owned active run ${run.runId} (createdAt=${run.createdAt}); cancel manually if this is a stale upgrade artifact`,
+            ),
+          )
+        }
+        hasBlockingActiveRun = true
+      }
+      if (hasBlockingActiveRun) {
+        if (staleRecoveriesApplied) {
+          await writeAutonomyRuns(runs, rootDir)
+        }
+        return
+      }
+    }
     runs.unshift(record)
     await writeAutonomyRuns(runs, rootDir)
+    created = true
   })
+  return { created, recoveredStaleRuns }
+}
+
+async function queueManagedFlowStepRunForRecord(
+  record: AutonomyRunRecord,
+  rootDir: string,
+): Promise<void> {
   if (
     record.parentFlowId &&
     record.flowStepId &&
@@ -258,9 +479,47 @@ export async function createAutonomyRun(params: {
       nowMs: record.createdAt,
     })
   }
+}
+
+async function createAutonomyRunCore(
+  params: CreateAutonomyRunParams,
+  skipIfActiveSource: boolean,
+): Promise<AutonomyRunRecord | null> {
+  const rootDir = resolve(params.rootDir ?? getProjectRoot())
+  const currentDir = resolve(params.currentDir ?? rootDir)
+  const record = buildAutonomyRunRecord(params, rootDir, currentDir)
+
+  const { created, recoveredStaleRuns } = await persistAutonomyRunRecord(
+    record,
+    rootDir,
+    skipIfActiveSource,
+  )
+  for (const recovered of recoveredStaleRuns) {
+    await syncFailedManagedFlowForRun(recovered, rootDir)
+  }
+  if (!created) {
+    return null
+  }
+  await queueManagedFlowStepRunForRecord(record, rootDir)
+  return record
+}
+
+export async function createAutonomyRun(
+  params: CreateAutonomyRunParams,
+): Promise<AutonomyRunRecord> {
+  const record = await createAutonomyRunCore(params, false)
+  if (!record) {
+    throw new Error('Autonomy run was unexpectedly skipped.')
+  }
   return record
 }
 
+export async function createAutonomyRunIfNoActiveSource(
+  params: CreateAutonomyRunParams & { sourceId: string },
+): Promise<AutonomyRunRecord | null> {
+  return createAutonomyRunCore(params, true)
+}
+
 function buildManagedFlowStepPrompt(
   flow: AutonomyFlowRecord,
   stepIndex: number,
@@ -336,6 +595,7 @@ async function createOrRecoverManagedFlowStepCommand(params: {
         workload: params.workload,
         autonomy: {
           runId: run.runId,
+          rootDir: run.rootDir,
           trigger: 'managed-flow-step',
           sourceId: run.sourceId,
           sourceLabel: run.sourceLabel,
@@ -426,11 +686,16 @@ export async function markAutonomyRunRunning(
 ): Promise<AutonomyRunRecord | null> {
   const updated = await updateAutonomyRun(
     runId,
-    current => ({
-      ...current,
-      status: 'running',
-      startedAt: nowMs ?? Date.now(),
-    }),
+    current =>
+      current.status === 'queued'
+        ? {
+            ...current,
+            status: 'running',
+            startedAt: nowMs ?? Date.now(),
+            ownerProcessId: process.pid,
+            ownerSessionId: getSessionId(),
+          }
+        : null,
     rootDir,
   )
   if (updated?.parentFlowId && updated.parentFlowSyncMode === 'managed') {
@@ -451,12 +716,15 @@ export async function markAutonomyRunCompleted(
 ): Promise<AutonomyRunRecord | null> {
   const updated = await updateAutonomyRun(
     runId,
-    current => ({
-      ...current,
-      status: 'completed',
-      endedAt: nowMs ?? Date.now(),
-      error: undefined,
-    }),
+    current =>
+      current.status === 'queued' || current.status === 'running'
+        ? {
+            ...current,
+            status: 'completed',
+            endedAt: nowMs ?? Date.now(),
+            error: undefined,
+          }
+        : null,
     rootDir,
   )
   if (updated?.parentFlowId && updated.parentFlowSyncMode === 'managed') {
@@ -476,24 +744,17 @@ export async function markAutonomyRunFailed(
   rootDir?: string,
   nowMs?: number,
 ): Promise<AutonomyRunRecord | null> {
+  const endedAt = nowMs ?? Date.now()
   const updated = await updateAutonomyRun(
     runId,
-    current => ({
-      ...current,
-      status: 'failed',
-      endedAt: nowMs ?? Date.now(),
-      error,
-    }),
+    current =>
+      isActiveAutonomyRunStatus(current.status)
+        ? failAutonomyRunRecord(current, error, endedAt)
+        : null,
     rootDir,
   )
-  if (updated?.parentFlowId && updated.parentFlowSyncMode === 'managed') {
-    await markManagedAutonomyFlowStepFailed({
-      flowId: updated.parentFlowId,
-      runId: updated.runId,
-      error,
-      rootDir,
-      nowMs: updated.endedAt,
-    })
+  if (updated) {
+    await syncFailedManagedFlowForRun(updated, rootDir ?? updated.rootDir)
   }
   return updated
 }
@@ -505,12 +766,15 @@ export async function markAutonomyRunCancelled(
 ): Promise<AutonomyRunRecord | null> {
   const updated = await updateAutonomyRun(
     runId,
-    current => ({
-      ...current,
-      status: 'cancelled',
-      endedAt: nowMs ?? Date.now(),
-      error: undefined,
-    }),
+    current =>
+      current.status === 'queued' || current.status === 'running'
+        ? {
+            ...current,
+            status: 'cancelled',
+            endedAt: nowMs ?? Date.now(),
+            error: undefined,
+          }
+        : null,
     rootDir,
   )
   if (updated?.parentFlowId && updated.parentFlowSyncMode === 'managed') {
@@ -612,6 +876,7 @@ export async function createAutonomyQueuedPrompt(params: {
   currentDir?: string
   sourceId?: string
   sourceLabel?: string
+  ownerKey?: string
   workload?: string
   priority?: 'now' | 'next' | 'later'
   shouldCreate?: () => boolean
@@ -634,39 +899,130 @@ export async function createAutonomyQueuedPrompt(params: {
     currentDir,
     sourceId: params.sourceId,
     sourceLabel: params.sourceLabel,
+    ownerKey: params.ownerKey,
     workload: params.workload,
     priority: params.priority,
     flow: params.flow,
   })
 }
 
+export async function createAutonomyQueuedPromptIfNoActiveSource(params: {
+  trigger: AutonomyTriggerKind
+  basePrompt: string
+  rootDir?: string
+  currentDir?: string
+  sourceId: string
+  sourceLabel?: string
+  ownerKey?: string
+  workload?: string
+  priority?: 'now' | 'next' | 'later'
+  shouldCreate?: () => boolean
+}): Promise<QueuedCommand | null> {
+  const rootDir = resolve(params.rootDir ?? getProjectRoot())
+  const currentDir = resolve(params.currentDir ?? getCwd())
+  // Cheap optimistic pre-check: skip the AGENTS.md / HEARTBEAT.md disk
+  // reads + prompt assembly when an active run for this source already
+  // blocks dedup. The lock-side check inside persistAutonomyRunRecord
+  // remains authoritative; this only fast-paths the common storm case.
+  if (
+    await hasActiveAutonomyRunForSource({
+      trigger: params.trigger,
+      sourceId: params.sourceId,
+      rootDir,
+      ownerKey: params.ownerKey,
+    })
+  ) {
+    return null
+  }
+  const prepared = await prepareAutonomyTurnPrompt({
+    basePrompt: params.basePrompt,
+    trigger: params.trigger,
+    rootDir,
+    currentDir,
+  })
+  if (params.shouldCreate && !params.shouldCreate()) {
+    return null
+  }
+  return commitAutonomyQueuedPromptIfNoActiveSource({
+    prepared,
+    rootDir,
+    currentDir,
+    sourceId: params.sourceId,
+    sourceLabel: params.sourceLabel,
+    ownerKey: params.ownerKey,
+    workload: params.workload,
+    priority: params.priority,
+  })
+}
+
 export async function commitAutonomyQueuedPrompt(params: {
   prepared: Awaited<ReturnType<typeof prepareAutonomyTurnPrompt>>
   rootDir?: string
   currentDir?: string
   sourceId?: string
   sourceLabel?: string
+  ownerKey?: string
   workload?: string
   priority?: 'now' | 'next' | 'later'
   flow?: AutonomyRunFlowRef
 }): Promise<QueuedCommand> {
+  const command = await commitAutonomyQueuedPromptInternal(params, false)
+  if (!command) {
+    throw new Error('Autonomy queued prompt was unexpectedly skipped.')
+  }
+  return command
+}
+
+async function commitAutonomyQueuedPromptIfNoActiveSource(params: {
+  prepared: Awaited<ReturnType<typeof prepareAutonomyTurnPrompt>>
+  rootDir?: string
+  currentDir?: string
+  sourceId: string
+  sourceLabel?: string
+  ownerKey?: string
+  workload?: string
+  priority?: 'now' | 'next' | 'later'
+}): Promise<QueuedCommand | null> {
+  return commitAutonomyQueuedPromptInternal(params, true)
+}
+
+async function commitAutonomyQueuedPromptInternal(
+  params: {
+    prepared: Awaited<ReturnType<typeof prepareAutonomyTurnPrompt>>
+    rootDir?: string
+    currentDir?: string
+    sourceId?: string
+    sourceLabel?: string
+    ownerKey?: string
+    workload?: string
+    priority?: 'now' | 'next' | 'later'
+    flow?: AutonomyRunFlowRef
+  },
+  skipWhenActiveSource: boolean,
+): Promise<QueuedCommand | null> {
   const rootDir = resolve(
     params.rootDir ?? params.prepared.rootDir ?? getProjectRoot(),
   )
   const currentDir = resolve(
     params.currentDir ?? params.prepared.currentDir ?? getCwd(),
   )
-  commitPreparedAutonomyTurn(params.prepared)
   const value = params.prepared.prompt
-  const run = await createAutonomyRun({
+  const runParams: CreateAutonomyRunParams = {
     trigger: params.prepared.trigger,
     prompt: value,
     rootDir,
     currentDir,
     sourceId: params.sourceId,
     sourceLabel: params.sourceLabel,
+    ownerKey: params.ownerKey,
     flow: params.flow,
-  })
+  }
+  const useDedup = skipWhenActiveSource && Boolean(params.sourceId)
+  const run = await createAutonomyRunCore(runParams, useDedup)
+  if (!run) {
+    return null
+  }
+  commitPreparedAutonomyTurn(params.prepared)
   const origin = {
     kind: 'autonomy',
     trigger: params.prepared.trigger,
@@ -683,6 +1039,7 @@ export async function commitAutonomyQueuedPrompt(params: {
     workload: params.workload,
     autonomy: {
       runId: run.runId,
+      rootDir: run.rootDir,
       trigger: params.prepared.trigger,
       sourceId: params.sourceId,
       sourceLabel: params.sourceLabel,
diff --git a/src/utils/handlePromptSubmit.ts b/src/utils/handlePromptSubmit.ts
index 97b05758f1..e8c387167b 100644
--- a/src/utils/handlePromptSubmit.ts
+++ b/src/utils/handlePromptSubmit.ts
@@ -19,19 +19,20 @@ import {
 } from '../types/textInputTypes.js'
 import { createAbortController } from './abortController.js'
 import type { PastedContent } from './config.js'
+import { getCwd } from './cwd.js'
 import { logForDebugging } from './debug.js'
 import type { EffortValue } from './effort.js'
 import type { FileHistoryState } from './fileHistory.js'
 import { fileHistoryEnabled, fileHistoryMakeSnapshot } from './fileHistory.js'
 import { gracefulShutdownSync } from './gracefulShutdown.js'
+import { toError } from './errors.js'
+import { logError } from './log.js'
 import { enqueue } from './messageQueueManager.js'
 import { resolveSkillModelOverride } from './model/model.js'
 import {
-  finalizeAutonomyRunCompleted,
-  finalizeAutonomyRunFailed,
-  markAutonomyRunFailed,
-  markAutonomyRunRunning,
-} from './autonomyRuns.js'
+  claimConsumableQueuedAutonomyCommands,
+  finalizeAutonomyCommandsForTurn,
+} from './autonomyQueueLifecycle.js'
 import type { ProcessUserInputContext } from './processUserInput/processUserInput.js'
 import { processUserInput } from './processUserInput/processUserInput.js'
 import type { QueryGuard } from './QueryGuard.js'
@@ -75,7 +76,7 @@ type BaseExecutionParams = {
     onBeforeQuery?: (input: string, newMessages: Message[]) => Promise<boolean>,
     input?: string,
     effort?: EffortValue,
-  ) => Promise<void>
+  ) => Promise<boolean>
   setAppState: (updater: (prev: AppState) => AppState) => void
   onBeforeQuery?: (input: string, newMessages: Message[]) => Promise<boolean>
   canUseTool?: CanUseToolFn
@@ -459,7 +460,18 @@ async function executeUserInput(params: ExecuteUserInputParams): Promise<void> {
     // Iterate all commands uniformly. First command gets attachments +
     // ideSelection + pastedContents, rest skip attachments to avoid
     // duplicating turn-level context (IDE selection, todos, diffs).
-    const commands = queuedCommands ?? []
+    let commands = queuedCommands ?? []
+    const queuedAutonomyClaim =
+      await claimConsumableQueuedAutonomyCommands(commands)
+    commands = queuedAutonomyClaim.attachmentCommands
+    const claimedAutonomyCommands = queuedAutonomyClaim.claimedCommands
+    if (commands.length === 0) {
+      // Clear the abort controller published a few lines above so this turn's
+      // stale controller does not leak into the next turn when every claimed
+      // autonomy command was skipped as non-consumable.
+      setAbortController(null)
+      return
+    }
 
     // Compute the workload tag for this turn. queueProcessor can batch a
     // cron prompt with a same-tick human prompt; only tag when EVERY
@@ -471,7 +483,7 @@ async function executeUserInput(params: ExecuteUserInputParams): Promise<void> {
       commands.every(c => c.workload === firstWorkload)
         ? firstWorkload
         : undefined
-    let autonomyRunIds: string[] | undefined
+    const deferredAutonomyRunIds = new Set<string>()
 
     // Wrap the entire turn (processUserInput loop + onQuery) in an
     // AsyncLocalStorage context. This is the ONLY way to correctly
@@ -481,15 +493,13 @@ async function executeUserInput(params: ExecuteUserInputParams): Promise<void> {
     // context — isolated from the parent's continuation. A process-global
     // mutable slot would be clobbered at the detached closure's first
     // await by this function's synchronous return path. See state.ts.
+    let turnError: unknown
     try {
       await runWithWorkload(turnWorkload, async () => {
         for (let i = 0; i < commands.length; i++) {
           const cmd = commands[i]!
           const isFirst = i === 0
-          if (cmd.autonomy?.runId) {
-            ;(autonomyRunIds ??= []).push(cmd.autonomy.runId)
-            await markAutonomyRunRunning(cmd.autonomy.runId)
-          }
+          const runId = cmd.autonomy?.runId
           const result = await processUserInput({
             input: cmd.value,
             preExpansionInput: cmd.preExpansionValue,
@@ -510,7 +520,11 @@ async function executeUserInput(params: ExecuteUserInputParams): Promise<void> {
             bridgeOrigin: cmd.bridgeOrigin,
             isMeta: cmd.isMeta,
             skipAttachments: !isFirst,
+            autonomy: cmd.autonomy,
           })
+          if (runId && result.deferAutonomyCompletion) {
+            deferredAutonomyRunIds.add(runId)
+          }
           // Stamp origin here rather than threading another arg through
           // processUserInput → processUserInputBase → processTextPrompt → createUserMessage.
           // Derive origin from mode for task-notifications — mirrors the origin
@@ -611,28 +625,52 @@ async function executeUserInput(params: ExecuteUserInputParams): Promise<void> {
           }
         }
       }) // end runWithWorkload — ALS context naturally scoped, no finally needed
-      if (autonomyRunIds?.length) {
-        for (const runId of autonomyRunIds) {
-          const nextCommands = await finalizeAutonomyRunCompleted({
-            runId,
+    } catch (error) {
+      turnError = error
+    }
+
+    // Finalize claimed autonomy commands as `completed` only if the turn
+    // body itself succeeded. Run the finalize call in its own try/catch so a
+    // failure there does not double-finalize the same commands as `failed`
+    // (which previously cancelled follow-up queue state after a successful
+    // turn).
+    if (claimedAutonomyCommands.length) {
+      const finalizableCommands = claimedAutonomyCommands.filter(command => {
+        const runId = command.autonomy?.runId
+        return !runId || !deferredAutonomyRunIds.has(runId)
+      })
+      if (turnError) {
+        try {
+          await finalizeAutonomyCommandsForTurn({
+            commands: finalizableCommands,
+            outcome: { type: 'failed', error: turnError },
+            currentDir: getCwd(),
+            priority: 'later',
+            workload: turnWorkload,
+          })
+        } catch (finalizeError) {
+          logError(toError(finalizeError))
+        }
+      } else {
+        try {
+          const nextCommands = await finalizeAutonomyCommandsForTurn({
+            commands: finalizableCommands,
+            outcome: { type: 'completed' },
+            currentDir: getCwd(),
             priority: 'later',
             workload: turnWorkload,
           })
           for (const nextCommand of nextCommands) {
             enqueue(nextCommand)
           }
+        } catch (finalizeError) {
+          logError(toError(finalizeError))
         }
       }
-    } catch (error) {
-      if (autonomyRunIds?.length) {
-        for (const runId of autonomyRunIds) {
-          await finalizeAutonomyRunFailed({
-            runId,
-            error: String(error),
-          })
-        }
-      }
-      throw error
+    }
+
+    if (turnError) {
+      throw turnError
     }
   } finally {
     // Safety net: release the guard reservation if processUserInput threw
diff --git a/src/utils/model/__tests__/providers.test.ts b/src/utils/model/__tests__/providers.test.ts
index 0ed816f9e1..6790a3e6f9 100644
--- a/src/utils/model/__tests__/providers.test.ts
+++ b/src/utils/model/__tests__/providers.test.ts
@@ -1,173 +1,162 @@
-import { describe, expect, test, beforeEach, afterEach } from "bun:test";
-import { mock } from "bun:test";
+import { describe, expect, test, beforeEach, afterEach } from 'bun:test'
 
-let mockedModelType: "gemini" | undefined;
+const { getAPIProvider, isFirstPartyAnthropicBaseUrl } = await import(
+  '../providers'
+)
 
-mock.module("../../settings/settings.js", () => ({
-  getInitialSettings: () =>
-    mockedModelType ? { modelType: mockedModelType } : {},
-}));
-
-const { getAPIProvider, isFirstPartyAnthropicBaseUrl } =
-  await import("../providers");
-
-describe("getAPIProvider", () => {
+describe('getAPIProvider', () => {
   const envKeys = [
-    "CLAUDE_CODE_USE_GEMINI",
-    "CLAUDE_CODE_USE_BEDROCK",
-    "CLAUDE_CODE_USE_VERTEX",
-    "CLAUDE_CODE_USE_FOUNDRY",
-    "CLAUDE_CODE_USE_OPENAI",
-  ] as const;
-  const savedEnv: Record<string, string | undefined> = {};
-
+    'CLAUDE_CODE_USE_GEMINI',
+    'CLAUDE_CODE_USE_BEDROCK',
+    'CLAUDE_CODE_USE_VERTEX',
+    'CLAUDE_CODE_USE_FOUNDRY',
+    'CLAUDE_CODE_USE_OPENAI',
+    'CLAUDE_CODE_USE_GROK',
+  ] as const
+  const savedEnv: Record<string, string | undefined> = {}
 
   beforeEach(() => {
     // Save and clear environment variables
-    mockedModelType = undefined;
     for (const key of envKeys) {
-      savedEnv[key] = process.env[key];
-      delete process.env[key];
+      savedEnv[key] = process.env[key]
+      delete process.env[key]
     }
-  });
+  })
 
   afterEach(() => {
     // Restore environment variables
-    mockedModelType = undefined;
     for (const key of envKeys) {
       if (savedEnv[key] !== undefined) {
-        process.env[key] = savedEnv[key];
+        process.env[key] = savedEnv[key]
       } else {
-        delete process.env[key];
+        delete process.env[key]
       }
     }
-  });
+  })
 
   test('returns "firstParty" by default', () => {
-    expect(getAPIProvider()).toBe("firstParty");
-  });
+    expect(getAPIProvider({})).toBe('firstParty')
+  })
 
   test('returns "gemini" when modelType is gemini', () => {
-    mockedModelType = "gemini";
-    expect(getAPIProvider()).toBe("gemini");
-  });
+    expect(getAPIProvider({ modelType: 'gemini' })).toBe('gemini')
+  })
 
-  test("modelType takes precedence over environment variables", () => {
-    mockedModelType = "gemini";
-    process.env.CLAUDE_CODE_USE_BEDROCK = "1";
-    expect(getAPIProvider()).toBe("gemini");
-  });
+  test('modelType takes precedence over environment variables', () => {
+    process.env.CLAUDE_CODE_USE_BEDROCK = '1'
+    expect(getAPIProvider({ modelType: 'gemini' })).toBe('gemini')
+  })
 
   test('returns "gemini" when CLAUDE_CODE_USE_GEMINI is set', () => {
-    process.env.CLAUDE_CODE_USE_GEMINI = "1";
-    expect(getAPIProvider()).toBe("gemini");
-  });
+    process.env.CLAUDE_CODE_USE_GEMINI = '1'
+    expect(getAPIProvider({})).toBe('gemini')
+  })
 
   test('returns "bedrock" when CLAUDE_CODE_USE_BEDROCK is set', () => {
-    process.env.CLAUDE_CODE_USE_BEDROCK = "1";
-    expect(getAPIProvider()).toBe("bedrock");
-  });
+    process.env.CLAUDE_CODE_USE_BEDROCK = '1'
+    expect(getAPIProvider({})).toBe('bedrock')
+  })
 
   test('returns "vertex" when CLAUDE_CODE_USE_VERTEX is set', () => {
-    process.env.CLAUDE_CODE_USE_VERTEX = "1";
-    expect(getAPIProvider()).toBe("vertex");
-  });
+    process.env.CLAUDE_CODE_USE_VERTEX = '1'
+    expect(getAPIProvider({})).toBe('vertex')
+  })
 
   test('returns "foundry" when CLAUDE_CODE_USE_FOUNDRY is set', () => {
-    process.env.CLAUDE_CODE_USE_FOUNDRY = "1";
-    expect(getAPIProvider()).toBe("foundry");
-  });
-
-  test("bedrock takes precedence over gemini", () => {
-    process.env.CLAUDE_CODE_USE_BEDROCK = "1";
-    process.env.CLAUDE_CODE_USE_GEMINI = "1";
-    expect(getAPIProvider()).toBe("bedrock");
-  });
-
-  test("bedrock takes precedence over vertex", () => {
-    process.env.CLAUDE_CODE_USE_BEDROCK = "1";
-    process.env.CLAUDE_CODE_USE_VERTEX = "1";
-    expect(getAPIProvider()).toBe("bedrock");
-  });
-
-  test("bedrock wins when all three env vars are set", () => {
-    process.env.CLAUDE_CODE_USE_BEDROCK = "1";
-    process.env.CLAUDE_CODE_USE_VERTEX = "1";
-    process.env.CLAUDE_CODE_USE_FOUNDRY = "1";
-    expect(getAPIProvider()).toBe("bedrock");
-  });
+    process.env.CLAUDE_CODE_USE_FOUNDRY = '1'
+    expect(getAPIProvider({})).toBe('foundry')
+  })
+
+  test('bedrock takes precedence over gemini', () => {
+    process.env.CLAUDE_CODE_USE_BEDROCK = '1'
+    process.env.CLAUDE_CODE_USE_GEMINI = '1'
+    expect(getAPIProvider({})).toBe('bedrock')
+  })
+
+  test('bedrock takes precedence over vertex', () => {
+    process.env.CLAUDE_CODE_USE_BEDROCK = '1'
+    process.env.CLAUDE_CODE_USE_VERTEX = '1'
+    expect(getAPIProvider({})).toBe('bedrock')
+  })
+
+  test('bedrock wins when all three env vars are set', () => {
+    process.env.CLAUDE_CODE_USE_BEDROCK = '1'
+    process.env.CLAUDE_CODE_USE_VERTEX = '1'
+    process.env.CLAUDE_CODE_USE_FOUNDRY = '1'
+    expect(getAPIProvider({})).toBe('bedrock')
+  })
 
   test('"true" is truthy', () => {
-    process.env.CLAUDE_CODE_USE_BEDROCK = "true";
-    expect(getAPIProvider()).toBe("bedrock");
-  });
+    process.env.CLAUDE_CODE_USE_BEDROCK = 'true'
+    expect(getAPIProvider({})).toBe('bedrock')
+  })
 
   test('"0" is not truthy', () => {
-    process.env.CLAUDE_CODE_USE_BEDROCK = "0";
-    expect(getAPIProvider()).toBe("firstParty");
-  });
+    process.env.CLAUDE_CODE_USE_BEDROCK = '0'
+    expect(getAPIProvider({})).toBe('firstParty')
+  })
 
   test('empty string is not truthy', () => {
-    process.env.CLAUDE_CODE_USE_BEDROCK = "";
-    expect(getAPIProvider()).toBe("firstParty");
-  });
-});
+    process.env.CLAUDE_CODE_USE_BEDROCK = ''
+    expect(getAPIProvider({})).toBe('firstParty')
+  })
+})
 
-describe("isFirstPartyAnthropicBaseUrl", () => {
-  const originalBaseUrl = process.env.ANTHROPIC_BASE_URL;
-  const originalUserType = process.env.USER_TYPE;
+describe('isFirstPartyAnthropicBaseUrl', () => {
+  const originalBaseUrl = process.env.ANTHROPIC_BASE_URL
+  const originalUserType = process.env.USER_TYPE
 
   afterEach(() => {
     if (originalBaseUrl !== undefined) {
-      process.env.ANTHROPIC_BASE_URL = originalBaseUrl;
+      process.env.ANTHROPIC_BASE_URL = originalBaseUrl
     } else {
-      delete process.env.ANTHROPIC_BASE_URL;
+      delete process.env.ANTHROPIC_BASE_URL
     }
     if (originalUserType !== undefined) {
-      process.env.USER_TYPE = originalUserType;
+      process.env.USER_TYPE = originalUserType
     } else {
-      delete process.env.USER_TYPE;
+      delete process.env.USER_TYPE
     }
-  });
-
-  test("returns true when ANTHROPIC_BASE_URL is not set", () => {
-    delete process.env.ANTHROPIC_BASE_URL;
-    expect(isFirstPartyAnthropicBaseUrl()).toBe(true);
-  });
-
-  test("returns true for api.anthropic.com", () => {
-    process.env.ANTHROPIC_BASE_URL = "https://api.anthropic.com";
-    expect(isFirstPartyAnthropicBaseUrl()).toBe(true);
-  });
-
-  test("returns false for custom URL", () => {
-    process.env.ANTHROPIC_BASE_URL = "https://my-proxy.com";
-    expect(isFirstPartyAnthropicBaseUrl()).toBe(false);
-  });
-
-  test("returns false for invalid URL", () => {
-    process.env.ANTHROPIC_BASE_URL = "not-a-url";
-    expect(isFirstPartyAnthropicBaseUrl()).toBe(false);
-  });
-
-  test("returns true for staging URL when USER_TYPE is ant", () => {
-    process.env.ANTHROPIC_BASE_URL = "https://api-staging.anthropic.com";
-    process.env.USER_TYPE = "ant";
-    expect(isFirstPartyAnthropicBaseUrl()).toBe(true);
-  });
-
-  test("returns true for URL with path", () => {
-    process.env.ANTHROPIC_BASE_URL = "https://api.anthropic.com/v1";
-    expect(isFirstPartyAnthropicBaseUrl()).toBe(true);
-  });
-
-  test("returns true for trailing slash", () => {
-    process.env.ANTHROPIC_BASE_URL = "https://api.anthropic.com/";
-    expect(isFirstPartyAnthropicBaseUrl()).toBe(true);
-  });
-
-  test("returns false for subdomain attack", () => {
-    process.env.ANTHROPIC_BASE_URL = "https://evil-api.anthropic.com";
-    expect(isFirstPartyAnthropicBaseUrl()).toBe(false);
-  });
-});
+  })
+
+  test('returns true when ANTHROPIC_BASE_URL is not set', () => {
+    delete process.env.ANTHROPIC_BASE_URL
+    expect(isFirstPartyAnthropicBaseUrl()).toBe(true)
+  })
+
+  test('returns true for api.anthropic.com', () => {
+    process.env.ANTHROPIC_BASE_URL = 'https://api.anthropic.com'
+    expect(isFirstPartyAnthropicBaseUrl()).toBe(true)
+  })
+
+  test('returns false for custom URL', () => {
+    process.env.ANTHROPIC_BASE_URL = 'https://my-proxy.com'
+    expect(isFirstPartyAnthropicBaseUrl()).toBe(false)
+  })
+
+  test('returns false for invalid URL', () => {
+    process.env.ANTHROPIC_BASE_URL = 'not-a-url'
+    expect(isFirstPartyAnthropicBaseUrl()).toBe(false)
+  })
+
+  test('returns true for staging URL when USER_TYPE is ant', () => {
+    process.env.ANTHROPIC_BASE_URL = 'https://api-staging.anthropic.com'
+    process.env.USER_TYPE = 'ant'
+    expect(isFirstPartyAnthropicBaseUrl()).toBe(true)
+  })
+
+  test('returns true for URL with path', () => {
+    process.env.ANTHROPIC_BASE_URL = 'https://api.anthropic.com/v1'
+    expect(isFirstPartyAnthropicBaseUrl()).toBe(true)
+  })
+
+  test('returns true for trailing slash', () => {
+    process.env.ANTHROPIC_BASE_URL = 'https://api.anthropic.com/'
+    expect(isFirstPartyAnthropicBaseUrl()).toBe(true)
+  })
+
+  test('returns false for subdomain attack', () => {
+    process.env.ANTHROPIC_BASE_URL = 'https://evil-api.anthropic.com'
+    expect(isFirstPartyAnthropicBaseUrl()).toBe(false)
+  })
+})
diff --git a/src/utils/model/providers.ts b/src/utils/model/providers.ts
index 79572d42e8..d4784da844 100644
--- a/src/utils/model/providers.ts
+++ b/src/utils/model/providers.ts
@@ -1,5 +1,6 @@
 import type { AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS } from '../../services/analytics/index.js'
 import { getInitialSettings } from '../settings/settings.js'
+import type { SettingsJson } from '../settings/types.js'
 import { isEnvTruthy } from '../envUtils.js'
 
 export type APIProvider =
@@ -11,8 +12,10 @@ export type APIProvider =
   | 'gemini'
   | 'grok'
 
-export function getAPIProvider(): APIProvider {
-  const modelType = getInitialSettings().modelType
+export function getAPIProvider(
+  settings: Pick<SettingsJson, 'modelType'> = getInitialSettings(),
+): APIProvider {
+  const modelType = settings.modelType
   if (modelType === 'openai') return 'openai'
   if (modelType === 'gemini') return 'gemini'
   if (modelType === 'grok') return 'grok'
diff --git a/src/utils/processUserInput/__tests__/processSlashCommand.test.ts b/src/utils/processUserInput/__tests__/processSlashCommand.test.ts
new file mode 100644
index 0000000000..7ba0f3c2b3
--- /dev/null
+++ b/src/utils/processUserInput/__tests__/processSlashCommand.test.ts
@@ -0,0 +1,375 @@
+import { afterEach, beforeEach, describe, expect, mock, test } from 'bun:test'
+import type { QueuedCommand } from '../../../types/textInputTypes'
+import {
+  resetStateForTests,
+  setCwdState,
+  setOriginalCwd,
+  setProjectRoot,
+} from '../../../bootstrap/state'
+import {
+  createAutonomyQueuedPrompt,
+  getAutonomyRunById,
+  listAutonomyRuns,
+  markAutonomyRunRunning,
+} from '../../autonomyRuns'
+import { resetAutonomyAuthorityForTests } from '../../autonomyAuthority'
+import { createScheduledTaskQueuedCommand } from '../../../hooks/useScheduledTasks'
+import {
+  cleanupTempDir,
+  createTempDir,
+} from '../../../../tests/mocks/file-system'
+
+let runAgentBlocker: Promise<void> | null = null
+let releaseRunAgentBlocker: (() => void) | null = null
+let runAgentStartCount = 0
+let originalNodeEnv: string | undefined
+let originalAnthropicApiKey: string | undefined
+const commandQueue: QueuedCommand[] = []
+
+function enqueue(command: QueuedCommand): void {
+  commandQueue.push({ ...command, priority: command.priority ?? 'next' })
+}
+
+function enqueuePendingNotification(command: QueuedCommand): void {
+  commandQueue.push({ ...command, priority: command.priority ?? 'later' })
+}
+
+function getCommandQueue(): QueuedCommand[] {
+  return [...commandQueue]
+}
+
+function hasCommandsInQueue(): boolean {
+  return commandQueue.length > 0
+}
+
+function resetCommandQueue(): void {
+  commandQueue.length = 0
+}
+
+function createMessageQueueManagerMock() {
+  return {
+    enqueue,
+    enqueuePendingNotification,
+    getCommandQueue,
+    hasCommandsInQueue,
+    resetCommandQueue,
+  }
+}
+
+function holdRunAgent(): void {
+  runAgentBlocker = new Promise(resolve => {
+    releaseRunAgentBlocker = resolve
+  })
+}
+
+function releaseRunAgent(): void {
+  releaseRunAgentBlocker?.()
+  runAgentBlocker = null
+  releaseRunAgentBlocker = null
+}
+
+mock.module('bun:bundle', () => ({
+  feature: (name: string) => name === 'KAIROS',
+}))
+
+mock.module(
+  '@claude-code-best/builtin-tools/tools/AgentTool/runAgent.js',
+  () => ({
+    runAgent: async function* () {
+      runAgentStartCount += 1
+      if (runAgentBlocker) {
+        await runAgentBlocker
+      }
+      yield {
+        type: 'assistant',
+        uuid: 'assistant-1',
+        timestamp: new Date().toISOString(),
+        message: {
+          id: 'msg_1',
+          type: 'message',
+          role: 'assistant',
+          model: 'test-model',
+          content: [{ type: 'text', text: 'forked command done' }],
+          stop_reason: 'end_turn',
+          stop_sequence: null,
+          usage: {
+            input_tokens: 0,
+            output_tokens: 0,
+          },
+        },
+      }
+    },
+  }),
+)
+
+mock.module('@claude-code-best/builtin-tools/tools/AgentTool/UI.js', () => ({
+  AgentPromptDisplay: () => null,
+  AgentResponseDisplay: () => null,
+  extractLastToolInfo: () => null,
+  renderGroupedAgentToolUse: () => null,
+  renderToolResultMessage: () => null,
+  renderToolUseErrorMessage: () => null,
+  renderToolUseMessage: () => null,
+  renderToolUseProgressMessage: () => null,
+  renderToolUseRejectedMessage: () => null,
+  renderToolUseTag: () => null,
+  userFacingName: () => 'Agent',
+  userFacingNameBackgroundColor: () => 'gray',
+}))
+
+mock.module('../../messageQueueManager', createMessageQueueManagerMock)
+mock.module('../../messageQueueManager.js', createMessageQueueManagerMock)
+
+const { processSlashCommand } = await import('../processSlashCommand')
+
+let tempDir = ''
+
+function createScheduledTaskQueuedCommandForTest(task: {
+  id: string
+  prompt: string
+}) {
+  return createScheduledTaskQueuedCommand(task, {
+    rootDir: tempDir,
+    currentDir: tempDir,
+  })
+}
+
+async function waitForRunStatus(
+  runId: string,
+  status: 'queued' | 'running' | 'completed' | 'failed' | 'cancelled',
+): Promise<void> {
+  for (let i = 0; i < 200; i++) {
+    const run = await getAutonomyRunById(runId, tempDir)
+    if (run?.status === status) {
+      return
+    }
+    await new Promise(resolve => setTimeout(resolve, 10))
+  }
+  const run = await getAutonomyRunById(runId, tempDir)
+  throw new Error(`Expected ${runId} to be ${status}, got ${run?.status}`)
+}
+
+async function waitForRunAgentStarts(expected: number): Promise<void> {
+  for (let i = 0; i < 200; i++) {
+    if (runAgentStartCount >= expected) {
+      return
+    }
+    await new Promise(resolve => setTimeout(resolve, 10))
+  }
+  throw new Error(
+    `Expected runAgent to start ${expected} time(s), got ${runAgentStartCount}`,
+  )
+}
+
+async function waitForCommandQueueLength(expected: number): Promise<void> {
+  for (let i = 0; i < 200; i++) {
+    if (getCommandQueue().length === expected) {
+      return
+    }
+    await new Promise(resolve => setTimeout(resolve, 10))
+  }
+  throw new Error(
+    `Expected command queue length ${expected}, got ${getCommandQueue().length}`,
+  )
+}
+
+beforeEach(async () => {
+  tempDir = await createTempDir('process-slash-command-')
+  originalNodeEnv = process.env.NODE_ENV
+  originalAnthropicApiKey = process.env.ANTHROPIC_API_KEY
+  process.env.NODE_ENV = 'test'
+  process.env.ANTHROPIC_API_KEY = 'test-key'
+  runAgentBlocker = null
+  releaseRunAgentBlocker = null
+  runAgentStartCount = 0
+  resetStateForTests()
+  resetAutonomyAuthorityForTests()
+  resetCommandQueue()
+  setOriginalCwd(tempDir)
+  setProjectRoot(tempDir)
+  setCwdState(tempDir)
+})
+
+afterEach(async () => {
+  releaseRunAgent()
+  if (originalNodeEnv === undefined) {
+    delete process.env.NODE_ENV
+  } else {
+    process.env.NODE_ENV = originalNodeEnv
+  }
+  if (originalAnthropicApiKey === undefined) {
+    delete process.env.ANTHROPIC_API_KEY
+  } else {
+    process.env.ANTHROPIC_API_KEY = originalAnthropicApiKey
+  }
+  resetStateForTests()
+  resetAutonomyAuthorityForTests()
+  resetCommandQueue()
+  if (tempDir) {
+    await cleanupTempDir(tempDir)
+  }
+  mock.restore()
+})
+
+describe('processSlashCommand', () => {
+  const forkedCommand = {
+    type: 'prompt',
+    name: 'forked',
+    description: 'test forked command',
+    progressMessage: 'forking',
+    contentLength: 0,
+    source: 'builtin',
+    context: 'fork',
+    getPromptForCommand: async () => [
+      { type: 'text', text: 'review from fork' },
+    ],
+  } as const
+
+  function createContext() {
+    return {
+      getAppState: () => ({
+        kairosEnabled: true,
+        mcp: { clients: [] },
+        toolPermissionContext: {
+          mode: 'default',
+          alwaysAllowRules: {},
+        },
+      }),
+      options: {
+        commands: [forkedCommand],
+        allowBackgroundForkedSlashCommands: true,
+        tools: [],
+        refreshTools: () => [],
+        agentDefinitions: {
+          activeAgents: [{ agentType: 'general-purpose' }],
+        },
+      },
+      setResponseLength: mock((_updater: (length: number) => number) => {}),
+    } as any
+  }
+
+  test('defers autonomy completion until a KAIROS background forked command completes', async () => {
+    const queued = await createAutonomyQueuedPrompt({
+      basePrompt: '/forked review',
+      trigger: 'scheduled-task',
+      rootDir: tempDir,
+      currentDir: tempDir,
+      sourceId: 'cron-1',
+    })
+    expect(queued).not.toBeNull()
+    const runId = queued!.autonomy!.runId
+    await markAutonomyRunRunning(runId, tempDir, 100)
+
+    const result = await processSlashCommand(
+      '/forked review',
+      [],
+      [],
+      [],
+      createContext(),
+      mock(() => {}),
+      undefined,
+      false,
+      async () => ({ behavior: 'allow', updatedInput: {} }) as any,
+      queued!.autonomy,
+    )
+
+    expect(result).toMatchObject({
+      messages: [],
+      shouldQuery: false,
+      deferAutonomyCompletion: true,
+    })
+
+    await waitForRunStatus(runId, 'completed')
+    await waitForCommandQueueLength(1)
+    expect(getCommandQueue()).toEqual([
+      expect.objectContaining({
+        mode: 'prompt',
+        isMeta: true,
+        skipSlashCommands: true,
+        value: expect.stringContaining(
+          '<scheduled-task-result command="/forked">',
+        ),
+      }),
+    ])
+  })
+
+  test('keeps repeated /loop scheduled fires bounded while a background fork is running', async () => {
+    const task = {
+      id: 'cron-loop',
+      prompt: '/forked review',
+    }
+    const first = await createScheduledTaskQueuedCommandForTest(task)
+    expect(first?.autonomy?.runId).toBeDefined()
+    const runId = first!.autonomy!.runId
+    await markAutonomyRunRunning(runId, tempDir, 100)
+
+    holdRunAgent()
+    const result = await processSlashCommand(
+      '/forked review',
+      [],
+      [],
+      [],
+      createContext(),
+      mock(() => {}),
+      undefined,
+      false,
+      async () => ({ behavior: 'allow', updatedInput: {} }) as any,
+      first!.autonomy,
+    )
+
+    expect(result.deferAutonomyCompletion).toBe(true)
+    await waitForRunAgentStarts(1)
+
+    const repeatedFires = await Promise.all(
+      Array.from({ length: 200 }, () =>
+        createScheduledTaskQueuedCommandForTest(task),
+      ),
+    )
+    expect(repeatedFires.every(command => command === null)).toBe(true)
+    expect(
+      (await listAutonomyRuns(tempDir)).filter(
+        run => run.sourceId === 'cron-loop',
+      ),
+    ).toHaveLength(1)
+    expect(getCommandQueue()).toHaveLength(0)
+
+    releaseRunAgent()
+    await waitForRunStatus(runId, 'completed')
+    await waitForCommandQueueLength(1)
+    expect(getCommandQueue()).toHaveLength(1)
+
+    const next = await createScheduledTaskQueuedCommandForTest(task)
+    expect(next?.autonomy?.runId).toBeDefined()
+    expect(
+      (await listAutonomyRuns(tempDir)).filter(
+        run => run.sourceId === 'cron-loop',
+      ),
+    ).toHaveLength(2)
+  })
+
+  test('rejects the background fork test override outside test runtime', async () => {
+    process.env.NODE_ENV = 'production'
+
+    const result = await processSlashCommand(
+      '/forked review',
+      [],
+      [],
+      [],
+      createContext(),
+      mock(() => {}),
+      undefined,
+      false,
+      async () => ({ behavior: 'allow', updatedInput: {} }) as any,
+    )
+
+    expect(result.shouldQuery).toBe(false)
+    expect(
+      result.messages.some(message =>
+        JSON.stringify(message).includes(
+          'allowBackgroundForkedSlashCommands is test-only',
+        ),
+      ),
+    ).toBe(true)
+    expect(runAgentStartCount).toBe(0)
+  })
+})
diff --git a/src/utils/processUserInput/processSlashCommand.tsx b/src/utils/processUserInput/processSlashCommand.tsx
index 6ee4bfe93b..da6763f7e6 100644
--- a/src/utils/processUserInput/processSlashCommand.tsx
+++ b/src/utils/processUserInput/processSlashCommand.tsx
@@ -1,10 +1,7 @@
-import { feature } from 'bun:bundle'
-import type {
-  ContentBlockParam,
-  TextBlockParam,
-} from '@anthropic-ai/sdk/resources'
-import { randomUUID } from 'crypto'
-import { setPromptId } from 'src/bootstrap/state.js'
+import { feature } from 'bun:bundle';
+import type { ContentBlockParam, TextBlockParam } from '@anthropic-ai/sdk/resources';
+import { randomUUID } from 'crypto';
+import { setPromptId } from 'src/bootstrap/state.js';
 import {
   builtInCommandNames,
   type Command,
@@ -14,9 +11,9 @@ import {
   getCommandName,
   hasCommand,
   type PromptCommand,
-} from 'src/commands.js'
-import { NO_CONTENT_MESSAGE } from 'src/constants/messages.js'
-import type { SetToolJSXFn, ToolUseContext } from 'src/Tool.js'
+} from 'src/commands.js';
+import { NO_CONTENT_MESSAGE } from 'src/constants/messages.js';
+import type { SetToolJSXFn, ToolUseContext } from 'src/Tool.js';
 import type {
   AssistantMessage,
   AttachmentMessage,
@@ -24,42 +21,37 @@ import type {
   NormalizedUserMessage,
   ProgressMessage,
   UserMessage,
-} from 'src/types/message.js'
-import { addInvokedSkill, getSessionId } from '../../bootstrap/state.js'
-import { COMMAND_MESSAGE_TAG, COMMAND_NAME_TAG } from '../../constants/xml.js'
-import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'
+} from 'src/types/message.js';
+import type { QueuedCommand } from 'src/types/textInputTypes.js';
+import { addInvokedSkill, getSessionId } from '../../bootstrap/state.js';
+import { COMMAND_MESSAGE_TAG, COMMAND_NAME_TAG } from '../../constants/xml.js';
+import type { CanUseToolFn } from '../../hooks/useCanUseTool.js';
 import {
   type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
   type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
   logEvent,
-} from '../../services/analytics/index.js'
-import { getDumpPromptsPath } from '../../services/api/dumpPrompts.js'
-import { buildPostCompactMessages } from '../../services/compact/compact.js'
-import { resetMicrocompactState } from '../../services/compact/microCompact.js'
-import type { Progress as AgentProgress } from '@claude-code-best/builtin-tools/tools/AgentTool/AgentTool.js'
-import { runAgent } from '@claude-code-best/builtin-tools/tools/AgentTool/runAgent.js'
-import { renderToolUseProgressMessage } from '@claude-code-best/builtin-tools/tools/AgentTool/UI.js'
-import type { CommandResultDisplay } from '../../types/command.js'
-import { createAbortController } from '../abortController.js'
-import { getAgentContext } from '../agentContext.js'
-import {
-  createAttachmentMessage,
-  getAttachmentMessages,
-} from '../attachments.js'
-import { logForDebugging } from '../debug.js'
-import { isEnvTruthy } from '../envUtils.js'
-import { AbortError, MalformedCommandError } from '../errors.js'
-import { getDisplayPath } from '../file.js'
-import {
-  extractResultText,
-  prepareForkedCommandContext,
-} from '../forkedAgent.js'
-import { getFsImplementation } from '../fsOperations.js'
-import { isFullscreenEnvEnabled } from '../fullscreen.js'
-import { toArray } from '../generators.js'
-import { registerSkillHooks } from '../hooks/registerSkillHooks.js'
-import { logError } from '../log.js'
-import { enqueuePendingNotification } from '../messageQueueManager.js'
+} from '../../services/analytics/index.js';
+import { getDumpPromptsPath } from '../../services/api/dumpPrompts.js';
+import { buildPostCompactMessages } from '../../services/compact/compact.js';
+import { resetMicrocompactState } from '../../services/compact/microCompact.js';
+import type { Progress as AgentProgress } from '@claude-code-best/builtin-tools/tools/AgentTool/AgentTool.js';
+import { runAgent } from '@claude-code-best/builtin-tools/tools/AgentTool/runAgent.js';
+import { renderToolUseProgressMessage } from '@claude-code-best/builtin-tools/tools/AgentTool/UI.js';
+import type { CommandResultDisplay } from '../../types/command.js';
+import { createAbortController } from '../abortController.js';
+import { getAgentContext } from '../agentContext.js';
+import { createAttachmentMessage, getAttachmentMessages } from '../attachments.js';
+import { logForDebugging } from '../debug.js';
+import { isEnvTruthy } from '../envUtils.js';
+import { AbortError, MalformedCommandError } from '../errors.js';
+import { getDisplayPath } from '../file.js';
+import { extractResultText, prepareForkedCommandContext } from '../forkedAgent.js';
+import { getFsImplementation } from '../fsOperations.js';
+import { isFullscreenEnvEnabled } from '../fullscreen.js';
+import { toArray } from '../generators.js';
+import { registerSkillHooks } from '../hooks/registerSkillHooks.js';
+import { logError } from '../log.js';
+import { enqueue, enqueuePendingNotification } from '../messageQueueManager.js';
 import {
   createCommandInputMessage,
   createSyntheticUserCaveatMessage,
@@ -71,40 +63,44 @@ import {
   isSystemLocalCommandMessage,
   normalizeMessages,
   prepareUserContent,
-} from '../messages.js'
-import type { ModelAlias } from '../model/aliases.js'
-import { parseToolListFromCLI } from '../permissions/permissionSetup.js'
-import { hasPermissionsToUseTool } from '../permissions/permissions.js'
-import {
-  isOfficialMarketplaceName,
-  parsePluginIdentifier,
-} from '../plugins/pluginIdentifier.js'
-import {
-  isRestrictedToPluginOnly,
-  isSourceAdminTrusted,
-} from '../settings/pluginOnlyPolicy.js'
-import { parseSlashCommand } from '../slashCommandParsing.js'
-import { sleep } from '../sleep.js'
-import { recordSkillUsage } from '../suggestions/skillUsageTracking.js'
-import { logOTelEvent, redactIfDisabled } from '../telemetry/events.js'
-import { buildPluginCommandTelemetryFields } from '../telemetry/pluginTelemetry.js'
-import { getAssistantMessageContentLength } from '../tokens.js'
-import { createAgentId } from '../uuid.js'
-import { getWorkload } from '../workloadContext.js'
-import type {
-  ProcessUserInputBaseResult,
-  ProcessUserInputContext,
-} from './processUserInput.js'
+} from '../messages.js';
+import type { ModelAlias } from '../model/aliases.js';
+import { parseToolListFromCLI } from '../permissions/permissionSetup.js';
+import { hasPermissionsToUseTool } from '../permissions/permissions.js';
+import { isOfficialMarketplaceName, parsePluginIdentifier } from '../plugins/pluginIdentifier.js';
+import { isRestrictedToPluginOnly, isSourceAdminTrusted } from '../settings/pluginOnlyPolicy.js';
+import { parseSlashCommand } from '../slashCommandParsing.js';
+import { sleep } from '../sleep.js';
+import { recordSkillUsage } from '../suggestions/skillUsageTracking.js';
+import { logOTelEvent, redactIfDisabled } from '../telemetry/events.js';
+import { buildPluginCommandTelemetryFields } from '../telemetry/pluginTelemetry.js';
+import { getAssistantMessageContentLength } from '../tokens.js';
+import { createAgentId } from '../uuid.js';
+import { finalizeAutonomyRunCompleted, finalizeAutonomyRunFailed } from '../autonomyRuns.js';
+import { getWorkload } from '../workloadContext.js';
+import type { ProcessUserInputBaseResult, ProcessUserInputContext } from './processUserInput.js';
 
 type SlashCommandResult = ProcessUserInputBaseResult & {
-  command: Command
-}
+  command: Command;
+};
 
 // Poll interval and deadline for MCP settle before launching a background
 // forked subagent. MCP servers typically connect within 1-3s of startup;
 // 10s headroom covers slow SSE handshakes.
-const MCP_SETTLE_POLL_MS = 200
-const MCP_SETTLE_TIMEOUT_MS = 10_000
+const MCP_SETTLE_POLL_MS = 200;
+const MCP_SETTLE_TIMEOUT_MS = 10_000;
+
+function isTestRuntime(): boolean {
+  return process.env.NODE_ENV === 'test';
+}
+
+function assertBackgroundForkedSlashCommandTestOverrideAllowed(): void {
+  if (!isTestRuntime()) {
+    throw new Error(
+      'ToolUseContext.options.allowBackgroundForkedSlashCommands is test-only and cannot be enabled outside NODE_ENV=test.',
+    );
+  }
+}
 
 /**
  * Executes a slash command with context: fork in a sub-agent.
@@ -116,40 +112,35 @@ async function executeForkedSlashCommand(
   precedingInputBlocks: ContentBlockParam[],
   setToolJSX: SetToolJSXFn,
   canUseTool: CanUseToolFn,
+  autonomy?: QueuedCommand['autonomy'],
 ): Promise<SlashCommandResult> {
-  const agentId = createAgentId()
+  const agentId = createAgentId();
 
   const pluginMarketplace = command.pluginInfo
     ? parsePluginIdentifier(command.pluginInfo.repository).marketplace
-    : undefined
+    : undefined;
   logEvent('tengu_slash_command_forked', {
-    command_name:
-      command.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
-    invocation_trigger:
-      'user-slash' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    command_name: command.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    invocation_trigger: 'user-slash' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
     ...(command.pluginInfo && {
-      _PROTO_plugin_name: command.pluginInfo.pluginManifest
-        .name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+      _PROTO_plugin_name: command.pluginInfo.pluginManifest.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
       ...(pluginMarketplace && {
-        _PROTO_marketplace_name:
-          pluginMarketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+        _PROTO_marketplace_name: pluginMarketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
       }),
       ...buildPluginCommandTelemetryFields(command.pluginInfo),
     }),
-  })
+  });
 
-  const { skillContent, modifiedGetAppState, baseAgent, promptMessages } =
-    await prepareForkedCommandContext(command, args, context)
+  const { skillContent, modifiedGetAppState, baseAgent, promptMessages } = await prepareForkedCommandContext(
+    command,
+    args,
+    context,
+  );
 
   // Merge skill's effort into the agent definition so runAgent applies it
-  const agentDefinition =
-    command.effort !== undefined
-      ? { ...baseAgent, effort: command.effort }
-      : baseAgent
+  const agentDefinition = command.effort !== undefined ? { ...baseAgent, effort: command.effort } : baseAgent;
 
-  logForDebugging(
-    `Executing forked slash command /${command.name} with agent ${agentDefinition.agentType}`,
-  )
+  logForDebugging(`Executing forked slash command /${command.name} with agent ${agentDefinition.agentType}`);
 
   // Assistant mode: fire-and-forget. Launch subagent in background, return
   // immediately, re-enqueue the result as an isMeta prompt when done.
@@ -163,12 +154,25 @@ async function executeForkedSlashCommand(
   // isMeta prompts are hidden. Outside assistant mode, context:fork commands
   // are user-invoked skills (/commit etc.) that should run synchronously
   // with the progress UI.
-  if (feature('KAIROS') && (await context.getAppState()).kairosEnabled) {
+  const appState = await context.getAppState();
+  const allowBackgroundForkedSlashCommands = context.options.allowBackgroundForkedSlashCommands === true;
+  if (allowBackgroundForkedSlashCommands) {
+    assertBackgroundForkedSlashCommandTestOverrideAllowed();
+  }
+  let canRunBackgroundForkedSlashCommand = false;
+  if (appState.kairosEnabled) {
+    if (feature('KAIROS')) {
+      canRunBackgroundForkedSlashCommand = true;
+    } else if (allowBackgroundForkedSlashCommands) {
+      canRunBackgroundForkedSlashCommand = true;
+    }
+  }
+  if (canRunBackgroundForkedSlashCommand) {
     // Standalone abortController — background subagents survive main-thread
     // ESC (same policy as AgentTool's async path). They're cron-driven; if
     // killed mid-run they just re-fire on the next schedule.
-    const bgAbortController = createAbortController()
-    const commandName = getCommandName(command)
+    const bgAbortController = createAbortController();
+    const commandName = getCommandName(command);
 
     // Workload: handlePromptSubmit wraps the entire turn in runWithWorkload
     // (AsyncLocalStorage). ALS context is captured when this `void` fires
@@ -179,7 +183,7 @@ async function executeForkedSlashCommand(
     // handlePromptSubmit → fresh runWithWorkload boundary (which always
     // establishes a new context, even for `undefined`) → so it needs its
     // own QueuedCommand.workload tag to preserve attribution.
-    const spawnTimeWorkload = getWorkload()
+    const spawnTimeWorkload = getWorkload();
 
     // Re-enter the queue as a hidden prompt. isMeta: hides from queue
     // preview + placeholder + transcript. skipSlashCommands: prevents
@@ -195,7 +199,31 @@ async function executeForkedSlashCommand(
         isMeta: true,
         skipSlashCommands: true,
         workload: spawnTimeWorkload,
-      })
+      });
+    const finalizeDeferredAutonomyRunCompleted = async (): Promise<void> => {
+      if (!autonomy?.runId) {
+        return;
+      }
+      const nextCommands = await finalizeAutonomyRunCompleted({
+        runId: autonomy.runId,
+        rootDir: autonomy.rootDir,
+        priority: 'later',
+        workload: spawnTimeWorkload,
+      });
+      for (const nextCommand of nextCommands) {
+        enqueue(nextCommand);
+      }
+    };
+    const finalizeDeferredAutonomyRunFailed = async (error: unknown): Promise<void> => {
+      if (!autonomy?.runId) {
+        return;
+      }
+      await finalizeAutonomyRunFailed({
+        runId: autonomy.runId,
+        rootDir: autonomy.rootDir,
+        error: error instanceof Error ? error.message : String(error),
+      });
+    };
 
     void (async () => {
       // Wait for MCP servers to settle. Scheduled tasks fire at startup and
@@ -204,16 +232,15 @@ async function executeForkedSlashCommand(
       // accidentally avoided this — tasks serialized, so task N's drain
       // happened after task N-1's 30s run, by which time MCP was up.
       // Poll until no 'pending' clients remain, then refresh.
-      const deadline = Date.now() + MCP_SETTLE_TIMEOUT_MS
+      const deadline = Date.now() + MCP_SETTLE_TIMEOUT_MS;
       while (Date.now() < deadline) {
-        const s = context.getAppState()
-        if (!s.mcp.clients.some(c => c.type === 'pending')) break
-        await sleep(MCP_SETTLE_POLL_MS)
+        const s = context.getAppState();
+        if (!s.mcp.clients.some(c => c.type === 'pending')) break;
+        await sleep(MCP_SETTLE_POLL_MS);
       }
-      const freshTools =
-        context.options.refreshTools?.() ?? context.options.tools
+      const freshTools = context.options.refreshTools?.() ?? context.options.tools;
 
-      const agentMessages: Message[] = []
+      const agentMessages: Message[] = [];
       for await (const message of runAgent({
         agentDefinition,
         promptMessages,
@@ -229,40 +256,53 @@ async function executeForkedSlashCommand(
         availableTools: freshTools,
         override: { agentId },
       })) {
-        agentMessages.push(message)
+        agentMessages.push(message);
       }
-      const resultText = extractResultText(agentMessages, 'Command completed')
-      logForDebugging(
-        `Background forked command /${commandName} completed (agent ${agentId})`,
-      )
-      enqueueResult(
-        `<scheduled-task-result command="/${commandName}">\n${resultText}\n</scheduled-task-result>`,
-      )
-    })().catch(err => {
-      logError(err)
+      const resultText = extractResultText(agentMessages, 'Command completed');
+      logForDebugging(`Background forked command /${commandName} completed (agent ${agentId})`);
+      // Enqueue the worker's result before finalizing the autonomy run so the
+      // <scheduled-task-result> notification is observed before any follow-up
+      // autonomy commands the finalizer enqueues at the same priority. Without
+      // this ordering, both land at `priority: 'later'` and the next autonomy
+      // step can run before the main thread sees this worker's output.
+      enqueueResult(`<scheduled-task-result command="/${commandName}">\n${resultText}\n</scheduled-task-result>`);
+      // The slash command itself succeeded; an error from the finalize call
+      // must not surface as a contradictory <scheduled-task-result status="failed">
+      // via the outer catch below. Log it locally and stop.
+      try {
+        await finalizeDeferredAutonomyRunCompleted();
+      } catch (finalizeError) {
+        logError(finalizeError);
+      }
+    })().catch(async err => {
+      logError(err);
       enqueueResult(
         `<scheduled-task-result command="/${commandName}" status="failed">\n${err instanceof Error ? err.message : String(err)}\n</scheduled-task-result>`,
-      )
-    })
+      );
+      await finalizeDeferredAutonomyRunFailed(err);
+    });
 
     // Nothing to render, nothing to query — the background runner re-enters
     // the queue on its own schedule.
-    return { messages: [], shouldQuery: false, command }
+    return {
+      messages: [],
+      shouldQuery: false,
+      command,
+      deferAutonomyCompletion: Boolean(autonomy?.runId),
+    };
   }
 
   // Collect messages from the forked agent
-  const agentMessages: Message[] = []
+  const agentMessages: Message[] = [];
 
   // Build progress messages for the agent progress UI
-  const progressMessages: ProgressMessage<AgentProgress>[] = []
-  const parentToolUseID = `forked-command-${command.name}`
-  let toolUseCounter = 0
+  const progressMessages: ProgressMessage<AgentProgress>[] = [];
+  const parentToolUseID = `forked-command-${command.name}`;
+  let toolUseCounter = 0;
 
   // Helper to create a progress message from an agent message
-  const createProgressMessage = (
-    message: AssistantMessage | NormalizedUserMessage,
-  ): ProgressMessage<AgentProgress> => {
-    toolUseCounter++
+  const createProgressMessage = (message: AssistantMessage | NormalizedUserMessage): ProgressMessage<AgentProgress> => {
+    toolUseCounter++;
     return {
       type: 'progress',
       data: {
@@ -275,8 +315,8 @@ async function executeForkedSlashCommand(
       toolUseID: `${parentToolUseID}-${toolUseCounter}`,
       timestamp: new Date().toISOString(),
       uuid: randomUUID(),
-    }
-  }
+    };
+  };
 
   // Helper to update progress display using agent progress UI
   const updateProgress = (): void => {
@@ -288,11 +328,11 @@ async function executeForkedSlashCommand(
       shouldHidePromptInput: false,
       shouldContinueAnimation: true,
       showSpinner: true,
-    })
-  }
+    });
+  };
 
   // Show initial "Initializing…" state
-  updateProgress()
+  updateProgress();
 
   // Run the sub-agent
   try {
@@ -309,47 +349,45 @@ async function executeForkedSlashCommand(
       model: command.model as ModelAlias | undefined,
       availableTools: context.options.tools,
     })) {
-      agentMessages.push(message)
-      const normalizedNew = normalizeMessages([message])
+      agentMessages.push(message);
+      const normalizedNew = normalizeMessages([message]);
 
       // Add progress message for assistant messages (which contain tool uses)
       if (message.type === 'assistant') {
         // Increment token count in spinner for assistant messages
-        const contentLength = getAssistantMessageContentLength(message as AssistantMessage)
+        const contentLength = getAssistantMessageContentLength(message as AssistantMessage);
         if (contentLength > 0) {
-          context.setResponseLength(len => len + contentLength)
+          context.setResponseLength(len => len + contentLength);
         }
 
-        const normalizedMsg = normalizedNew[0]
+        const normalizedMsg = normalizedNew[0];
         if (normalizedMsg && normalizedMsg.type === 'assistant') {
-          progressMessages.push(createProgressMessage(message as AssistantMessage))
-          updateProgress()
+          progressMessages.push(createProgressMessage(message as AssistantMessage));
+          updateProgress();
         }
       }
 
       // Add progress message for user messages (which contain tool results)
       if (message.type === 'user') {
-        const normalizedMsg = normalizedNew[0]
+        const normalizedMsg = normalizedNew[0];
         if (normalizedMsg && normalizedMsg.type === 'user') {
-          progressMessages.push(createProgressMessage(normalizedMsg as AssistantMessage))
-          updateProgress()
+          progressMessages.push(createProgressMessage(normalizedMsg as AssistantMessage));
+          updateProgress();
         }
       }
     }
   } finally {
     // Clear the progress display
-    setToolJSX(null)
+    setToolJSX(null);
   }
 
-  let resultText = extractResultText(agentMessages, 'Command completed')
+  let resultText = extractResultText(agentMessages, 'Command completed');
 
-  logForDebugging(
-    `Forked slash command /${command.name} completed with agent ${agentId}`,
-  )
+  logForDebugging(`Forked slash command /${command.name} completed with agent ${agentId}`);
 
   // Prepend debug log for ant users so it appears inside the command output
   if (process.env.USER_TYPE === 'ant') {
-    resultText = `[ANT-ONLY] API calls: ${getDisplayPath(getDumpPromptsPath(agentId))}\n${resultText}`
+    resultText = `[ANT-ONLY] API calls: ${getDisplayPath(getDumpPromptsPath(agentId))}\n${resultText}`;
   }
 
   // Return the result as a user message (simulates the agent's output)
@@ -363,14 +401,14 @@ async function executeForkedSlashCommand(
     createUserMessage({
       content: `<local-command-stdout>\n${resultText}\n</local-command-stdout>`,
     }),
-  ]
+  ];
 
   return {
     messages,
     shouldQuery: false,
     command,
     resultText,
-  }
+  };
 }
 
 /**
@@ -383,7 +421,7 @@ async function executeForkedSlashCommand(
 export function looksLikeCommand(commandName: string): boolean {
   // Command names should only contain [a-zA-Z0-9:_-]
   // If it contains other characters, it's probably a file path or other input
-  return !/[^a-zA-Z0-9:\-_]/.test(commandName)
+  return !/[^a-zA-Z0-9:\-_]/.test(commandName);
 }
 
 export async function processSlashCommand(
@@ -396,11 +434,12 @@ export async function processSlashCommand(
   uuid?: string,
   isAlreadyProcessing?: boolean,
   canUseTool?: CanUseToolFn,
+  autonomy?: QueuedCommand['autonomy'],
 ): Promise<ProcessUserInputBaseResult> {
-  const parsed = parseSlashCommand(inputString)
+  const parsed = parseSlashCommand(inputString);
   if (!parsed) {
-    logEvent('tengu_input_slash_missing', {})
-    const errorMessage = 'Commands are in the form `/command [args]`'
+    logEvent('tengu_input_slash_missing', {});
+    const errorMessage = 'Commands are in the form `/command [args]`';
     return {
       messages: [
         createSyntheticUserCaveatMessage(),
@@ -414,35 +453,30 @@ export async function processSlashCommand(
       ],
       shouldQuery: false,
       resultText: errorMessage,
-    }
+    };
   }
 
-  const { commandName, args: parsedArgs, isMcp } = parsed
+  const { commandName, args: parsedArgs, isMcp } = parsed;
 
-  const sanitizedCommandName = isMcp
-    ? 'mcp'
-    : !builtInCommandNames().has(commandName)
-      ? 'custom'
-      : commandName
+  const sanitizedCommandName = isMcp ? 'mcp' : !builtInCommandNames().has(commandName) ? 'custom' : commandName;
 
   // Check if it's a real command before processing
   if (!hasCommand(commandName, context.options.commands)) {
     // Check if this looks like a command name vs a file path or other input
     // Also check if it's an actual file path that exists
-    let isFilePath = false
+    let isFilePath = false;
     try {
-      await getFsImplementation().stat(`/${commandName}`)
-      isFilePath = true
+      await getFsImplementation().stat(`/${commandName}`);
+      isFilePath = true;
     } catch {
       // Not a file path — treat as command name
     }
     if (looksLikeCommand(commandName) && !isFilePath) {
       logEvent('tengu_input_slash_invalid', {
-        input:
-          commandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
-      })
+        input: commandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      });
 
-      const unknownMessage = `Unknown skill: ${commandName}`
+      const unknownMessage = `Unknown skill: ${commandName}`;
       return {
         messages: [
           createSyntheticUserCaveatMessage(),
@@ -455,29 +489,22 @@ export async function processSlashCommand(
           }),
           // gh-32591: preserve args so the user can copy/resubmit without
           // retyping. System warning is UI-only (filtered before API).
-          ...(parsedArgs
-            ? [
-                createSystemMessage(
-                  `Args from unknown skill: ${parsedArgs}`,
-                  'warning',
-                ),
-              ]
-            : []),
+          ...(parsedArgs ? [createSystemMessage(`Args from unknown skill: ${parsedArgs}`, 'warning')] : []),
         ],
         shouldQuery: false,
         resultText: unknownMessage,
-      }
+      };
     }
 
-    const promptId = randomUUID()
-    setPromptId(promptId)
-    logEvent('tengu_input_prompt', {})
+    const promptId = randomUUID();
+    setPromptId(promptId);
+    logEvent('tengu_input_prompt', {});
     // Log user prompt event for OTLP
     void logOTelEvent('user_prompt', {
       prompt_length: String(inputString.length),
       prompt: redactIfDisabled(inputString),
       'prompt.id': promptId,
-    })
+    });
     return {
       messages: [
         createUserMessage({
@@ -487,7 +514,7 @@ export async function processSlashCommand(
         ...attachmentMessages,
       ],
       shouldQuery: true,
-    }
+    };
   }
 
   // Track slash command usage for feature discovery
@@ -502,6 +529,7 @@ export async function processSlashCommand(
     resultText,
     nextInput,
     submitNextInput,
+    deferAutonomyCompletion,
   } = await getMessagesForSlashCommand(
     commandName,
     parsedArgs,
@@ -512,66 +540,55 @@ export async function processSlashCommand(
     isAlreadyProcessing,
     canUseTool,
     uuid,
-  )
+    autonomy,
+  );
 
   // Local slash commands that skip messages
   if (newMessages.length === 0) {
     const eventData: Record<string, boolean | number | undefined> = {
-      input:
-        sanitizedCommandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
-    }
+      input: sanitizedCommandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    };
 
     // Add plugin metadata if this is a plugin command
     if (returnedCommand.type === 'prompt' && returnedCommand.pluginInfo) {
-      const { pluginManifest, repository } = returnedCommand.pluginInfo
-      const { marketplace } = parsePluginIdentifier(repository)
-      const isOfficial = isOfficialMarketplaceName(marketplace)
+      const { pluginManifest, repository } = returnedCommand.pluginInfo;
+      const { marketplace } = parsePluginIdentifier(repository);
+      const isOfficial = isOfficialMarketplaceName(marketplace);
       // _PROTO_* routes to PII-tagged plugin_name/marketplace_name BQ columns
       // (unredacted, all users); plugin_name/plugin_repository stay in
       // additional_metadata as redacted variants for general-access dashboards.
-      eventData._PROTO_plugin_name =
-        pluginManifest.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED
+      eventData._PROTO_plugin_name = pluginManifest.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED;
       if (marketplace) {
-        eventData._PROTO_marketplace_name =
-          marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED
+        eventData._PROTO_marketplace_name = marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED;
       }
       eventData.plugin_repository = (
         isOfficial ? repository : 'third-party'
-      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS;
       eventData.plugin_name = (
         isOfficial ? pluginManifest.name : 'third-party'
-      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+      ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS;
       if (isOfficial && pluginManifest.version) {
-        eventData.plugin_version =
-          pluginManifest.version as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+        eventData.plugin_version = pluginManifest.version as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS;
       }
-      Object.assign(
-        eventData,
-        buildPluginCommandTelemetryFields(returnedCommand.pluginInfo),
-      )
+      Object.assign(eventData, buildPluginCommandTelemetryFields(returnedCommand.pluginInfo));
     }
 
     logEvent('tengu_input_command', {
       ...eventData,
-      invocation_trigger:
-        'user-slash' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      invocation_trigger: 'user-slash' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
       ...(process.env.USER_TYPE === 'ant' && {
-        skill_name:
-          commandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        skill_name: commandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
         ...(returnedCommand.type === 'prompt' && {
-          skill_source:
-            returnedCommand.source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          skill_source: returnedCommand.source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
         }),
         ...(returnedCommand.loadedFrom && {
-          skill_loaded_from:
-            returnedCommand.loadedFrom as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          skill_loaded_from: returnedCommand.loadedFrom as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
         }),
         ...(returnedCommand.kind && {
-          skill_kind:
-            returnedCommand.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          skill_kind: returnedCommand.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
         }),
       }),
-    })
+    });
     return {
       messages: [],
       shouldQuery: false,
@@ -579,7 +596,8 @@ export async function processSlashCommand(
       model,
       nextInput,
       submitNextInput,
-    }
+      deferAutonomyCompletion,
+    };
   }
 
   // For invalid commands, preserve both the user message and error
@@ -591,15 +609,12 @@ export async function processSlashCommand(
   ) {
     // Don't log as invalid if it looks like a common file path
     const looksLikeFilePath =
-      inputString.startsWith('/var') ||
-      inputString.startsWith('/tmp') ||
-      inputString.startsWith('/private')
+      inputString.startsWith('/var') || inputString.startsWith('/tmp') || inputString.startsWith('/private');
 
     if (!looksLikeFilePath) {
       logEvent('tengu_input_slash_invalid', {
-        input:
-          commandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
-      })
+        input: commandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      });
     }
 
     return {
@@ -608,75 +623,58 @@ export async function processSlashCommand(
       allowedTools,
 
       model,
-    }
+    };
   }
 
   // A valid command
   const eventData: Record<string, boolean | number | undefined> = {
-    input:
-      sanitizedCommandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
-  }
+    input: sanitizedCommandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  };
 
   // Add plugin metadata if this is a plugin command
   if (returnedCommand.type === 'prompt' && returnedCommand.pluginInfo) {
-    const { pluginManifest, repository } = returnedCommand.pluginInfo
-    const { marketplace } = parsePluginIdentifier(repository)
-    const isOfficial = isOfficialMarketplaceName(marketplace)
-    eventData._PROTO_plugin_name =
-      pluginManifest.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED
+    const { pluginManifest, repository } = returnedCommand.pluginInfo;
+    const { marketplace } = parsePluginIdentifier(repository);
+    const isOfficial = isOfficialMarketplaceName(marketplace);
+    eventData._PROTO_plugin_name = pluginManifest.name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED;
     if (marketplace) {
-      eventData._PROTO_marketplace_name =
-        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED
+      eventData._PROTO_marketplace_name = marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED;
     }
     eventData.plugin_repository = (
       isOfficial ? repository : 'third-party'
-    ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+    ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS;
     eventData.plugin_name = (
       isOfficial ? pluginManifest.name : 'third-party'
-    ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+    ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS;
     if (isOfficial && pluginManifest.version) {
-      eventData.plugin_version =
-        pluginManifest.version as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+      eventData.plugin_version = pluginManifest.version as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS;
     }
-    Object.assign(
-      eventData,
-      buildPluginCommandTelemetryFields(returnedCommand.pluginInfo),
-    )
+    Object.assign(eventData, buildPluginCommandTelemetryFields(returnedCommand.pluginInfo));
   }
 
   logEvent('tengu_input_command', {
     ...eventData,
-    invocation_trigger:
-      'user-slash' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    invocation_trigger: 'user-slash' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
     ...(process.env.USER_TYPE === 'ant' && {
-      skill_name:
-        commandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      skill_name: commandName as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
       ...(returnedCommand.type === 'prompt' && {
-        skill_source:
-          returnedCommand.source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        skill_source: returnedCommand.source as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
       }),
       ...(returnedCommand.loadedFrom && {
-        skill_loaded_from:
-          returnedCommand.loadedFrom as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        skill_loaded_from: returnedCommand.loadedFrom as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
       }),
       ...(returnedCommand.kind && {
-        skill_kind:
-          returnedCommand.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        skill_kind: returnedCommand.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
       }),
     }),
-  })
+  });
 
   // Check if this is a compact result which handle their own synthetic caveat message ordering
-  const isCompactResult =
-    newMessages.length > 0 &&
-    newMessages[0] &&
-    isCompactBoundaryMessage(newMessages[0])
+  const isCompactResult = newMessages.length > 0 && newMessages[0] && isCompactBoundaryMessage(newMessages[0]);
 
   return {
     messages:
-      messageShouldQuery ||
-      newMessages.every(isSystemLocalCommandMessage) ||
-      isCompactResult
+      messageShouldQuery || newMessages.every(isSystemLocalCommandMessage) || isCompactResult
         ? newMessages
         : [createSyntheticUserCaveatMessage(), ...newMessages],
     shouldQuery: messageShouldQuery,
@@ -686,7 +684,8 @@ export async function processSlashCommand(
     resultText,
     nextInput,
     submitNextInput,
-  }
+    deferAutonomyCompletion,
+  };
 }
 
 async function getMessagesForSlashCommand(
@@ -699,12 +698,13 @@ async function getMessagesForSlashCommand(
   _isAlreadyProcessing?: boolean,
   canUseTool?: CanUseToolFn,
   uuid?: string,
+  autonomy?: QueuedCommand['autonomy'],
 ): Promise<SlashCommandResult> {
-  const command = getCommand(commandName, context.options.commands)
+  const command = getCommand(commandName, context.options.commands);
 
   // Track skill usage for ranking (only for prompt commands that are user-invocable)
   if (command.type === 'prompt' && command.userInvocable !== false) {
-    recordSkillUsage(commandName)
+    recordSkillUsage(commandName);
   }
 
   // Check if the command is user-invocable
@@ -724,25 +724,25 @@ async function getMessagesForSlashCommand(
       ],
       shouldQuery: false,
       command,
-    }
+    };
   }
 
   try {
     switch (command.type) {
       case 'local-jsx': {
         return new Promise<SlashCommandResult>(resolve => {
-          let doneWasCalled = false
+          let doneWasCalled = false;
           const onDone = (
             result?: string,
             options?: {
-              display?: CommandResultDisplay
-              shouldQuery?: boolean
-              metaMessages?: string[]
-              nextInput?: string
-              submitNextInput?: boolean
+              display?: CommandResultDisplay;
+              shouldQuery?: boolean;
+              metaMessages?: string[];
+              nextInput?: string;
+              submitNextInput?: boolean;
             },
           ) => {
-            doneWasCalled = true
+            doneWasCalled = true;
             // If display is 'skip', don't add any messages to the conversation
             if (options?.display === 'skip') {
               void resolve({
@@ -751,14 +751,14 @@ async function getMessagesForSlashCommand(
                 command,
                 nextInput: options?.nextInput,
                 submitNextInput: options?.submitNextInput,
-              })
-              return
+              });
+              return;
             }
 
             // Meta messages are model-visible but hidden from the user
-            const metaMessages = (options?.metaMessages ?? []).map(
-              (content: string) => createUserMessage({ content, isMeta: true }),
-            )
+            const metaMessages = (options?.metaMessages ?? []).map((content: string) =>
+              createUserMessage({ content, isMeta: true }),
+            );
 
             // In fullscreen the command just showed as a centered modal
             // pane — the transient notification is enough feedback. The
@@ -771,9 +771,7 @@ async function getMessagesForSlashCommand(
             // usage, /rename, /proactive) use display:system for actual
             // output that must reach the transcript.
             const skipTranscript =
-              isFullscreenEnvEnabled() &&
-              typeof result === 'string' &&
-              result.endsWith(' dismissed')
+              isFullscreenEnvEnabled() && typeof result === 'string' && result.endsWith(' dismissed');
 
             void resolve({
               messages:
@@ -781,12 +779,8 @@ async function getMessagesForSlashCommand(
                   ? skipTranscript
                     ? metaMessages
                     : [
-                        createCommandInputMessage(
-                          formatCommandInput(command, args),
-                        ),
-                        createCommandInputMessage(
-                          `<local-command-stdout>${result}</local-command-stdout>`,
-                        ),
+                        createCommandInputMessage(formatCommandInput(command, args)),
+                        createCommandInputMessage(`<local-command-stdout>${result}</local-command-stdout>`),
                         ...metaMessages,
                       ]
                   : [
@@ -809,21 +803,21 @@ async function getMessagesForSlashCommand(
               command,
               nextInput: options?.nextInput,
               submitNextInput: options?.submitNextInput,
-            })
-          }
+            });
+          };
 
           void command
             .load()
             .then(mod => mod.call(onDone, { ...context, canUseTool }, args))
             .then(jsx => {
-              if (jsx == null) return
+              if (jsx == null) return;
               if (context.options.isNonInteractiveSession) {
                 void resolve({
                   messages: [],
                   shouldQuery: false,
                   command,
-                })
-                return
+                });
+                return;
               }
               // Guard: if onDone fired during mod.call() (early-exit path
               // that calls onDone then returns JSX), skip setToolJSX. This
@@ -832,51 +826,51 @@ async function getMessagesForSlashCommand(
               // its setToolJSX({clearLocalJSX: true}) before we get here.
               // Setting isLocalJSXCommand after clear leaves it stuck true,
               // blocking useQueueProcessor and TextInput focus.
-              if (doneWasCalled) return
+              if (doneWasCalled) return;
               setToolJSX({
                 jsx,
                 shouldHidePromptInput: true,
                 showSpinner: false,
                 isLocalJSXCommand: true,
                 isImmediate: command.immediate === true,
-              })
+              });
             })
             .catch(e => {
               // If load()/call() throws and onDone never fired, the outer
               // Promise hangs forever, leaving queryGuard stuck in
               // 'dispatching' and deadlocking the queue processor.
-              logError(e)
-              if (doneWasCalled) return
-              doneWasCalled = true
+              logError(e);
+              if (doneWasCalled) return;
+              doneWasCalled = true;
               setToolJSX({
                 jsx: null,
                 shouldHidePromptInput: false,
                 clearLocalJSX: true,
-              })
-              void resolve({ messages: [], shouldQuery: false, command })
-            })
-        })
+              });
+              void resolve({ messages: [], shouldQuery: false, command });
+            });
+        });
       }
       case 'local': {
-        const displayArgs = command.isSensitive && args.trim() ? '***' : args
+        const displayArgs = command.isSensitive && args.trim() ? '***' : args;
         const userMessage = createUserMessage({
           content: prepareUserContent({
             inputString: formatCommandInput(command, displayArgs),
             precedingInputBlocks,
           }),
-        })
+        });
 
         try {
-          const syntheticCaveatMessage = createSyntheticUserCaveatMessage()
-          const mod = await command.load()
-          const result = await mod.call(args, context)
+          const syntheticCaveatMessage = createSyntheticUserCaveatMessage();
+          const mod = await command.load();
+          const result = await mod.call(args, context);
 
           if (result.type === 'skip') {
             return {
               messages: [],
               shouldQuery: false,
               command,
-            }
+            };
           }
 
           // Use discriminated union to handle different result types
@@ -899,52 +893,43 @@ async function getMessagesForSlashCommand(
                     }),
                   ]
                 : []),
-            ]
+            ];
             const compactionResultWithSlashMessages = {
               ...result.compactionResult,
-              messagesToKeep: [
-                ...(result.compactionResult.messagesToKeep ?? []),
-                ...slashCommandMessages,
-              ],
-            }
+              messagesToKeep: [...(result.compactionResult.messagesToKeep ?? []), ...slashCommandMessages],
+            };
             // Reset microcompact state since full compact replaces all
             // messages — old tool IDs are no longer relevant. Budget state
             // (on toolUseContext) needs no reset: stale entries are inert
             // (UUIDs never repeat, so they're never looked up).
-            resetMicrocompactState()
+            resetMicrocompactState();
             return {
-              messages: buildPostCompactMessages(
-                compactionResultWithSlashMessages,
-              ) as AssistantMessage[],
+              messages: buildPostCompactMessages(compactionResultWithSlashMessages) as AssistantMessage[],
               shouldQuery: false,
               command,
-            }
+            };
           }
 
           // Text result — use system message so it doesn't render as a user bubble
           return {
             messages: [
               userMessage,
-              createCommandInputMessage(
-                `<local-command-stdout>${result.value}</local-command-stdout>`,
-              ),
+              createCommandInputMessage(`<local-command-stdout>${result.value}</local-command-stdout>`),
             ],
             shouldQuery: false,
             command,
             resultText: result.value,
-          }
+          };
         } catch (e) {
-          logError(e)
+          logError(e);
           return {
             messages: [
               userMessage,
-              createCommandInputMessage(
-                `<local-command-stderr>${String(e)}</local-command-stderr>`,
-              ),
+              createCommandInputMessage(`<local-command-stderr>${String(e)}</local-command-stderr>`),
             ],
             shouldQuery: false,
             command,
-          }
+          };
         }
       }
       case 'prompt': {
@@ -958,7 +943,8 @@ async function getMessagesForSlashCommand(
               precedingInputBlocks,
               setToolJSX,
               canUseTool ?? hasPermissionsToUseTool,
-            )
+              autonomy,
+            );
           }
 
           return await getMessagesForPromptSlashCommand(
@@ -968,7 +954,7 @@ async function getMessagesForSlashCommand(
             precedingInputBlocks,
             imageContentBlocks,
             uuid,
-          )
+          );
         } catch (e) {
           // Handle abort errors specially to show proper "Interrupted" message
           if (e instanceof AbortError) {
@@ -984,7 +970,7 @@ async function getMessagesForSlashCommand(
               ],
               shouldQuery: false,
               command,
-            }
+            };
           }
           return {
             messages: [
@@ -1000,7 +986,7 @@ async function getMessagesForSlashCommand(
             ],
             shouldQuery: false,
             command,
-          }
+          };
         }
       }
     }
@@ -1017,46 +1003,40 @@ async function getMessagesForSlashCommand(
         ],
         shouldQuery: false,
         command,
-      }
+      };
     }
-    throw e
+    throw e;
   }
 }
 
 function formatCommandInput(command: CommandBase, args: string): string {
-  return formatCommandInputTags(getCommandName(command), args)
+  return formatCommandInputTags(getCommandName(command), args);
 }
 
 /**
  * Formats the metadata for a skill loading message.
  * Used by the Skill tool and for subagent skill preloading.
  */
-export function formatSkillLoadingMetadata(
-  skillName: string,
-  _progressMessage: string = 'loading',
-): string {
+export function formatSkillLoadingMetadata(skillName: string, _progressMessage: string = 'loading'): string {
   // Use skill name only - UserCommandMessage renders as "Skill(name)"
   return [
     `<${COMMAND_MESSAGE_TAG}>${skillName}</${COMMAND_MESSAGE_TAG}>`,
     `<${COMMAND_NAME_TAG}>${skillName}</${COMMAND_NAME_TAG}>`,
     `<skill-format>true</skill-format>`,
-  ].join('\n')
+  ].join('\n');
 }
 
 /**
  * Formats the metadata for a slash command loading message.
  */
-function formatSlashCommandLoadingMetadata(
-  commandName: string,
-  args?: string,
-): string {
+function formatSlashCommandLoadingMetadata(commandName: string, args?: string): string {
   return [
     `<${COMMAND_MESSAGE_TAG}>${commandName}</${COMMAND_MESSAGE_TAG}>`,
     `<${COMMAND_NAME_TAG}>/${commandName}</${COMMAND_NAME_TAG}>`,
     args ? `<command-args>${args}</command-args>` : null,
   ]
     .filter(Boolean)
-    .join('\n')
+    .join('\n');
 }
 
 /**
@@ -1064,26 +1044,19 @@ function formatSlashCommandLoadingMetadata(
  * User-invocable skills use slash command format (/name), while model-only
  * skills use the skill format ("The X skill is running").
  */
-function formatCommandLoadingMetadata(
-  command: CommandBase & PromptCommand,
-  args?: string,
-): string {
+function formatCommandLoadingMetadata(command: CommandBase & PromptCommand, args?: string): string {
   // Use command.name (the qualified name including plugin prefix, e.g.
   // "product-management:feature-spec") instead of userFacingName() which may
   // strip the plugin prefix via displayName fallback.
   // User-invocable skills should show as /command-name like regular slash commands
   if (command.userInvocable !== false) {
-    return formatSlashCommandLoadingMetadata(command.name, args)
+    return formatSlashCommandLoadingMetadata(command.name, args);
   }
   // Model-only skills (userInvocable: false) show as "The X skill is running"
-  if (
-    command.loadedFrom === 'skills' ||
-    command.loadedFrom === 'plugin' ||
-    command.loadedFrom === 'mcp'
-  ) {
-    return formatSkillLoadingMetadata(command.name, command.progressMessage)
+  if (command.loadedFrom === 'skills' || command.loadedFrom === 'plugin' || command.loadedFrom === 'mcp') {
+    return formatSkillLoadingMetadata(command.name, command.progressMessage);
   }
-  return formatSlashCommandLoadingMetadata(command.name, args)
+  return formatSlashCommandLoadingMetadata(command.name, args);
 }
 
 export async function processPromptSlashCommand(
@@ -1093,22 +1066,16 @@ export async function processPromptSlashCommand(
   context: ToolUseContext,
   imageContentBlocks: ContentBlockParam[] = [],
 ): Promise<SlashCommandResult> {
-  const command = findCommand(commandName, commands)
+  const command = findCommand(commandName, commands);
   if (!command) {
-    throw new MalformedCommandError(`Unknown command: ${commandName}`)
+    throw new MalformedCommandError(`Unknown command: ${commandName}`);
   }
   if (command.type !== 'prompt') {
     throw new Error(
       `Unexpected ${command.type} command. Expected 'prompt' command. Use /${commandName} directly in the main conversation.`,
-    )
+    );
   }
-  return getMessagesForPromptSlashCommand(
-    command,
-    args,
-    context,
-    [],
-    imageContentBlocks,
-  )
+  return getMessagesForPromptSlashCommand(command, args, context, [], imageContentBlocks);
 }
 
 async function getMessagesForPromptSlashCommand(
@@ -1128,33 +1095,23 @@ async function getMessagesForPromptSlashCommand(
   // parent env, so we also check !context.agentId: agentId is only set for
   // subagents, letting workers fall through to getPromptForCommand and receive
   // the real skill content when they invoke the Skill tool.
-  if (
-    feature('COORDINATOR_MODE') &&
-    isEnvTruthy(process.env.CLAUDE_CODE_COORDINATOR_MODE) &&
-    !context.agentId
-  ) {
-    const metadata = formatCommandLoadingMetadata(command, args)
-    const parts: string[] = [
-      `Skill "/${command.name}" is available for workers.`,
-    ]
+  if (feature('COORDINATOR_MODE') && isEnvTruthy(process.env.CLAUDE_CODE_COORDINATOR_MODE) && !context.agentId) {
+    const metadata = formatCommandLoadingMetadata(command, args);
+    const parts: string[] = [`Skill "/${command.name}" is available for workers.`];
     if (command.description) {
-      parts.push(`Description: ${command.description}`)
+      parts.push(`Description: ${command.description}`);
     }
     if (command.whenToUse) {
-      parts.push(`When to use: ${command.whenToUse}`)
+      parts.push(`When to use: ${command.whenToUse}`);
     }
-    const skillAllowedTools = command.allowedTools ?? []
+    const skillAllowedTools = command.allowedTools ?? [];
     if (skillAllowedTools.length > 0) {
-      parts.push(
-        `This skill grants workers additional tool permissions: ${skillAllowedTools.join(', ')}`,
-      )
+      parts.push(`This skill grants workers additional tool permissions: ${skillAllowedTools.join(', ')}`);
     }
     parts.push(
       `\nInstruct a worker to use this skill by including "Use the /${command.name} skill" in your Agent prompt. The worker has access to the Skill tool and will receive the skill's content and permissions when it invokes it.`,
-    )
-    const summaryContent: ContentBlockParam[] = [
-      { type: 'text', text: parts.join('\n') },
-    ]
+    );
+    const summaryContent: ContentBlockParam[] = [{ type: 'text', text: parts.join('\n') }];
     return {
       messages: [
         createUserMessage({ content: metadata, uuid }),
@@ -1164,55 +1121,45 @@ async function getMessagesForPromptSlashCommand(
       model: command.model,
       effort: command.effort,
       command,
-    }
+    };
   }
 
-  const result = await command.getPromptForCommand(args, context)
+  const result = await command.getPromptForCommand(args, context);
 
   // Register skill hooks if defined. Under ["hooks"]-only (skills not locked),
   // user skills still load and reach this point — block hook REGISTRATION here
   // where source is known. Mirrors the agent frontmatter gate in runAgent.ts.
-  const hooksAllowedForThisSkill =
-    !isRestrictedToPluginOnly('hooks') || isSourceAdminTrusted(command.source)
+  const hooksAllowedForThisSkill = !isRestrictedToPluginOnly('hooks') || isSourceAdminTrusted(command.source);
   if (command.hooks && hooksAllowedForThisSkill) {
-    const sessionId = getSessionId()
+    const sessionId = getSessionId();
     registerSkillHooks(
       context.setAppState,
       sessionId,
       command.hooks,
       command.name,
       command.type === 'prompt' ? command.skillRoot : undefined,
-    )
+    );
   }
 
   // Record skill invocation for compaction preservation, scoped by agent context.
   // Skills are tagged with their agentId so only skills belonging to the current
   // agent are restored during compaction (preventing cross-agent leaks).
-  const skillPath = command.source
-    ? `${command.source}:${command.name}`
-    : command.name
+  const skillPath = command.source ? `${command.source}:${command.name}` : command.name;
   const skillContent = result
     .filter((b): b is TextBlockParam => b.type === 'text')
     .map(b => b.text)
-    .join('\n\n')
-  addInvokedSkill(
-    command.name,
-    skillPath,
-    skillContent,
-    getAgentContext()?.agentId ?? null,
-  )
+    .join('\n\n');
+  addInvokedSkill(command.name, skillPath, skillContent, getAgentContext()?.agentId ?? null);
 
-  const metadata = formatCommandLoadingMetadata(command, args)
+  const metadata = formatCommandLoadingMetadata(command, args);
 
-  const additionalAllowedTools = parseToolListFromCLI(
-    command.allowedTools ?? [],
-  )
+  const additionalAllowedTools = parseToolListFromCLI(command.allowedTools ?? []);
 
   // Create content for the main message, including any pasted images
   const mainMessageContent: ContentBlockParam[] =
     imageContentBlocks.length > 0 || precedingInputBlocks.length > 0
       ? [...imageContentBlocks, ...precedingInputBlocks, ...result]
-      : result
+      : result;
 
   // Extract attachments from command arguments (@-mentions, MCP resources,
   // agent mentions in SKILL.md). skipSkillDiscovery prevents the SKILL.md
@@ -1232,7 +1179,7 @@ async function getMessagesForPromptSlashCommand(
       'repl_main_thread',
       { skipSkillDiscovery: true },
     ),
-  )
+  );
 
   const messages = [
     createUserMessage({
@@ -1249,7 +1196,7 @@ async function getMessagesForPromptSlashCommand(
       allowedTools: additionalAllowedTools,
       model: command.model,
     }),
-  ]
+  ];
 
   return {
     messages,
@@ -1258,5 +1205,5 @@ async function getMessagesForPromptSlashCommand(
     model: command.model,
     effort: command.effort,
     command,
-  }
+  };
 }
diff --git a/src/utils/processUserInput/processUserInput.ts b/src/utils/processUserInput/processUserInput.ts
index 94682aebfb..e625eeea4c 100644
--- a/src/utils/processUserInput/processUserInput.ts
+++ b/src/utils/processUserInput/processUserInput.ts
@@ -28,6 +28,7 @@ import type {
 import type { PermissionMode } from '../../types/permissions.js'
 import {
   isValidImagePaste,
+  type QueuedCommand,
   type PromptInputMode,
 } from '../../types/textInputTypes.js'
 import {
@@ -80,6 +81,9 @@ export type ProcessUserInputBaseResult = {
   // Used by /discover to chain into the selected feature's command
   nextInput?: string
   submitNextInput?: boolean
+  // When true, the command started detached work that will finalize its
+  // autonomy run after the background work completes.
+  deferAutonomyCompletion?: boolean
 }
 
 export async function processUserInput({
@@ -100,6 +104,7 @@ export async function processUserInput({
   bridgeOrigin,
   isMeta,
   skipAttachments,
+  autonomy,
 }: {
   input: string | Array<ContentBlockParam>
   /**
@@ -137,6 +142,7 @@ export async function processUserInput({
    */
   isMeta?: boolean
   skipAttachments?: boolean
+  autonomy?: QueuedCommand['autonomy']
 }): Promise<ProcessUserInputBaseResult> {
   const inputString = typeof input === 'string' ? input : null
   // Immediately show the user input prompt while we are still processing the input.
@@ -168,6 +174,7 @@ export async function processUserInput({
     isMeta,
     skipAttachments,
     preExpansionInput,
+    autonomy,
   )
   queryCheckpoint('query_process_user_input_base_end')
 
@@ -296,6 +303,7 @@ async function processUserInputBase(
   isMeta?: boolean,
   skipAttachments?: boolean,
   preExpansionInput?: string,
+  autonomy?: QueuedCommand['autonomy'],
 ): Promise<ProcessUserInputBaseResult> {
   let inputString: string | null = null
   let precedingInputBlocks: ContentBlockParam[] = []
@@ -491,6 +499,7 @@ async function processUserInputBase(
       uuid,
       isAlreadyProcessing,
       canUseTool,
+      autonomy,
     )
     return addImageMetadataMessage(slashResult, imageMetadataTexts)
   }
@@ -549,6 +558,7 @@ async function processUserInputBase(
       uuid,
       isAlreadyProcessing,
       canUseTool,
+      autonomy,
     )
     return addImageMetadataMessage(slashResult, imageMetadataTexts)
   }
diff --git a/src/utils/swarm/inProcessRunner.ts b/src/utils/swarm/inProcessRunner.ts
index 5320fd2940..f01582ea16 100644
--- a/src/utils/swarm/inProcessRunner.ts
+++ b/src/utils/swarm/inProcessRunner.ts
@@ -424,8 +424,7 @@ function createInProcessCanUseTool(
                     feedback: parsed.error,
                   })
                 }
-                cleanup()
-                return
+                return // Callback already resolves the promise
               }
             }
           }
@@ -675,6 +674,7 @@ type WaitResult =
       type: 'new_message'
       message: string
       autonomyRunId?: string
+      autonomyRootDir?: string
       from: string
       color?: string
       summary?: string
@@ -739,12 +739,16 @@ async function waitForNextPromptOrShutdown(
         `[inProcessRunner] ${identity.agentName} found pending user message (poll #${pollCount})`,
       )
       if (pending.autonomyRunId) {
-        await markAutonomyRunRunning(pending.autonomyRunId)
+        await markAutonomyRunRunning(
+          pending.autonomyRunId,
+          pending.autonomyRootDir,
+        )
       }
       return {
         type: 'new_message',
         message: pending.message,
         autonomyRunId: pending.autonomyRunId,
+        autonomyRootDir: pending.autonomyRootDir,
         from: 'user',
       }
     }
@@ -1022,6 +1026,7 @@ export async function runInProcessTeammate(
   )
   let currentPrompt = wrappedInitialPrompt
   let currentAutonomyRunId: string | undefined
+  let currentAutonomyRootDir: string | undefined
   let shouldExit = false
 
   // Try to claim an available task immediately so the UI can show activity
@@ -1319,12 +1324,21 @@ export async function runInProcessTeammate(
           setAppState,
         )
         if (currentAutonomyRunId) {
-          await markAutonomyRunFailed(currentAutonomyRunId, ERROR_MESSAGE_USER_ABORT)
+          await markAutonomyRunFailed(
+            currentAutonomyRunId,
+            ERROR_MESSAGE_USER_ABORT,
+            currentAutonomyRootDir,
+          )
           currentAutonomyRunId = undefined
+          currentAutonomyRootDir = undefined
         }
       } else if (currentAutonomyRunId) {
-        await markAutonomyRunCompleted(currentAutonomyRunId)
+        await markAutonomyRunCompleted(
+          currentAutonomyRunId,
+          currentAutonomyRootDir,
+        )
         currentAutonomyRunId = undefined
+        currentAutonomyRootDir = undefined
       }
 
       // Check if already idle before updating (to skip duplicate notification)
@@ -1398,6 +1412,7 @@ export async function runInProcessTeammate(
             setAppState,
           )
           currentAutonomyRunId = undefined
+          currentAutonomyRootDir = undefined
           break
 
         case 'new_message':
@@ -1410,6 +1425,7 @@ export async function runInProcessTeammate(
           if (waitResult.from === 'user') {
             currentPrompt = waitResult.message
             currentAutonomyRunId = waitResult.autonomyRunId
+            currentAutonomyRootDir = waitResult.autonomyRootDir
           } else {
             currentPrompt = formatAsTeammateMessage(
               waitResult.from,
@@ -1426,6 +1442,7 @@ export async function runInProcessTeammate(
               setAppState,
             )
             currentAutonomyRunId = undefined
+            currentAutonomyRootDir = undefined
           }
           break
 
@@ -1533,7 +1550,11 @@ export async function runInProcessTeammate(
       })
     }
     if (currentAutonomyRunId) {
-      await markAutonomyRunFailed(currentAutonomyRunId, errorMessage)
+      await markAutonomyRunFailed(
+        currentAutonomyRunId,
+        errorMessage,
+        currentAutonomyRootDir,
+      )
     }
 
     // Send idle notification with failure via file-based mailbox
diff --git a/src/utils/swarm/spawnInProcess.ts b/src/utils/swarm/spawnInProcess.ts
index 5cfa0ab5aa..77768b67ca 100644
--- a/src/utils/swarm/spawnInProcess.ts
+++ b/src/utils/swarm/spawnInProcess.ts
@@ -234,7 +234,7 @@ export function killInProcessTeammate(
   let agentId: string | null = null
   let toolUseId: string | undefined
   let description: string | undefined
-  let pendingAutonomyRunIds: string[] = []
+  let pendingAutonomyRuns: Array<{ runId: string; rootDir?: string }> = []
 
   setAppState((prev: AppState) => {
     const task = prev.tasks[taskId]
@@ -255,9 +255,18 @@ export function killInProcessTeammate(
     description = teammateTask.description
 
     // Capture pending autonomy run IDs before clearing them
-    pendingAutonomyRunIds = teammateTask.pendingUserMessages
-      .map(message => message.autonomyRunId)
-      .filter((runId): runId is string => runId !== undefined)
+    pendingAutonomyRuns = teammateTask.pendingUserMessages.flatMap(message =>
+      message.autonomyRunId
+        ? [
+            {
+              runId: message.autonomyRunId,
+              ...(message.autonomyRootDir
+                ? { rootDir: message.autonomyRootDir }
+                : {}),
+            },
+          ]
+        : [],
+    )
 
     // Abort the controller to stop execution
     teammateTask.abortController?.abort()
@@ -311,10 +320,11 @@ export function killInProcessTeammate(
   }
 
   if (killed) {
-    for (const runId of pendingAutonomyRunIds) {
+    for (const run of pendingAutonomyRuns) {
       void markAutonomyRunFailed(
-        runId,
+        run.runId,
         `Teammate ${agentId ?? taskId} was stopped before it could consume the queued autonomy prompt.`,
+        run.rootDir,
       )
     }
     void evictTaskOutput(taskId)
diff --git a/tests/integration/autonomy-lifecycle-user-flow.test.ts b/tests/integration/autonomy-lifecycle-user-flow.test.ts
new file mode 100644
index 0000000000..b9e7bd172e
--- /dev/null
+++ b/tests/integration/autonomy-lifecycle-user-flow.test.ts
@@ -0,0 +1,148 @@
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { existsSync, mkdtempSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join, resolve } from 'node:path'
+import {
+  resetStateForTests,
+  setOriginalCwd,
+  setProjectRoot,
+} from '../../src/bootstrap/state'
+import {
+  listAutonomyRuns,
+  startManagedAutonomyFlowFromHeartbeatTask,
+} from '../../src/utils/autonomyRuns'
+import { listAutonomyFlows } from '../../src/utils/autonomyFlows'
+
+const CLI_ENTRYPOINT = resolve(import.meta.dir, '../../src/entrypoints/cli.tsx')
+
+let tempDir = ''
+let configDir = ''
+let previousConfigDir: string | undefined
+
+async function runAutonomyCli(args: string[]): Promise<string> {
+  const proc = Bun.spawn({
+    cmd: [process.execPath, CLI_ENTRYPOINT, 'autonomy', ...args],
+    cwd: tempDir,
+    env: {
+      ...process.env,
+      CLAUDE_CONFIG_DIR: configDir,
+      CI: 'true',
+      GITHUB_ACTIONS: 'true',
+      NODE_ENV: 'development',
+      NO_COLOR: '1',
+    },
+    stdin: 'ignore',
+    stdout: 'pipe',
+    stderr: 'pipe',
+  })
+
+  const [stdout, stderr, exitCode] = await Promise.all([
+    new Response(proc.stdout).text(),
+    new Response(proc.stderr).text(),
+    proc.exited,
+  ])
+
+  expect(stderr, `unexpected stderr output:\n${stderr}`).toBe('')
+  expect(exitCode, `non-zero exit ${exitCode}; stderr:\n${stderr}`).toBe(0)
+  return stdout
+}
+
+beforeEach(() => {
+  tempDir = mkdtempSync(join(tmpdir(), 'autonomy-user-flow-'))
+  configDir = join(tempDir, 'config')
+  previousConfigDir = process.env.CLAUDE_CONFIG_DIR
+  process.env.CLAUDE_CONFIG_DIR = configDir
+  resetStateForTests()
+  setOriginalCwd(tempDir)
+  setProjectRoot(tempDir)
+})
+
+afterEach(() => {
+  resetStateForTests()
+  if (previousConfigDir === undefined) {
+    delete process.env.CLAUDE_CONFIG_DIR
+  } else {
+    process.env.CLAUDE_CONFIG_DIR = previousConfigDir
+  }
+  if (tempDir) {
+    rmSync(tempDir, { recursive: true, force: true })
+  }
+})
+
+describe('autonomy lifecycle user-equivalent CLI flow', () => {
+  test('status --deep works from a clean project without creating autonomy state', async () => {
+    const output = await runAutonomyCli(['status', '--deep'])
+
+    expect(output).toContain('# Autonomy Deep Status')
+    expect(output).toContain('Autonomy runs: 0')
+    expect(output).toContain('Autonomy flows: 0')
+    expect(existsSync(join(tempDir, '.claude', 'autonomy', 'runs.json'))).toBe(
+      false,
+    )
+    expect(existsSync(join(tempDir, '.claude', 'autonomy', 'flows.json'))).toBe(
+      false,
+    )
+  })
+
+  test('real CLI can inspect, resume, and cancel a persisted managed flow', async () => {
+    await startManagedAutonomyFlowFromHeartbeatTask({
+      rootDir: tempDir,
+      currentDir: tempDir,
+      task: {
+        name: 'manual-user-flow',
+        interval: '1h',
+        prompt: 'Manual lifecycle acceptance',
+        steps: [
+          {
+            name: 'approve',
+            prompt: 'Wait for manual approval',
+            waitFor: 'manual',
+          },
+          {
+            name: 'execute',
+            prompt: 'Execute approved work',
+          },
+        ],
+      },
+    })
+    const [waitingFlow] = await listAutonomyFlows(tempDir)
+    expect(waitingFlow?.status).toBe('waiting')
+
+    const status = await runAutonomyCli(['status', '--deep'])
+    expect(status).toContain('Autonomy flows: 1')
+    expect(status).toContain('Waiting: 1')
+
+    const flows = await runAutonomyCli(['flows', '5'])
+    expect(flows).toContain(waitingFlow!.flowId)
+    expect(flows).toContain('waiting')
+
+    const detailBefore = await runAutonomyCli(['flow', waitingFlow!.flowId])
+    expect(detailBefore).toContain('Status: waiting')
+    expect(detailBefore).toContain('Current step: approve')
+
+    const resume = await runAutonomyCli(['flow', 'resume', waitingFlow!.flowId])
+    expect(resume).toContain('Prepared the next managed step')
+    expect(resume).toContain('Prompt:')
+
+    const detailAfterResume = await runAutonomyCli([
+      'flow',
+      waitingFlow!.flowId,
+    ])
+    expect(detailAfterResume).toContain('Status: queued')
+    expect(detailAfterResume).toContain('Latest run:')
+
+    const cancel = await runAutonomyCli(['flow', 'cancel', waitingFlow!.flowId])
+    expect(cancel).toContain('Cancelled flow')
+
+    const [cancelledRun] = await listAutonomyRuns(tempDir)
+    const [cancelledFlow] = await listAutonomyFlows(tempDir)
+    expect(cancelledRun?.status).toBe('cancelled')
+    expect(cancelledFlow?.status).toBe('cancelled')
+
+    const detailAfterCancel = await runAutonomyCli([
+      'flow',
+      waitingFlow!.flowId,
+    ])
+    expect(detailAfterCancel).toContain('Status: cancelled')
+  }, 30000)
+})
diff --git a/tests/integration/dependency-overrides.test.ts b/tests/integration/dependency-overrides.test.ts
index b1549e82b3..65a6679527 100644
--- a/tests/integration/dependency-overrides.test.ts
+++ b/tests/integration/dependency-overrides.test.ts
@@ -2,13 +2,42 @@ import { describe, expect, test } from 'bun:test'
 import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'
 import { createRequire } from 'node:module'
 import { tmpdir } from 'node:os'
-import { join, resolve } from 'node:path'
+import { dirname, join, resolve } from 'node:path'
 import { pathToFileURL } from 'node:url'
 
 const repoRoot = resolve(import.meta.dir, '..', '..')
 const uuidV4Pattern =
   /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/
 
+async function findPackageJson(
+  startPath: string,
+  expectedName: string,
+): Promise<string> {
+  let current = dirname(startPath)
+  for (let depth = 0; depth < 10; depth++) {
+    const candidate = join(current, 'package.json')
+    const file = Bun.file(candidate)
+    if (await file.exists()) {
+      try {
+        const parsed = JSON.parse(await file.text()) as { name?: unknown }
+        if (parsed.name === expectedName) {
+          return candidate
+        }
+      } catch {
+        // ignore parse errors and keep walking up
+      }
+    }
+    const parent = dirname(current)
+    if (parent === current) {
+      break
+    }
+    current = parent
+  }
+  throw new Error(
+    `package.json with name "${expectedName}" not found above ${startPath}`,
+  )
+}
+
 describe('dependency security overrides', () => {
   test('mcpb can load patched inquirer prompts from its package context', async () => {
     const mcpbRequire = createRequire(import.meta.resolve('@anthropic-ai/mcpb'))
@@ -28,10 +57,7 @@ describe('dependency security overrides', () => {
     )
     const gaxios = vertexRequire('gaxios') as {
       request(options: {
-        adapter(options: {
-          headers: Headers
-          url: string
-        }): Promise<{
+        adapter(options: { headers: Headers; url: string }): Promise<{
           config: unknown
           data: string
           headers: Record<string, string>
@@ -39,7 +65,7 @@ describe('dependency security overrides', () => {
           status: number
           statusText: string
         }>
-      multipart: Array<{ body: string; headers: Record<string, string> }>
+        multipart: Array<{ body: string; headers: Record<string, string> }>
         url: string
       }): Promise<{ status: number }>
     }
@@ -47,8 +73,10 @@ describe('dependency security overrides', () => {
 
     const response = await gaxios.request({
       url: 'https://example.com/upload',
-      multipart: [{ body: 'payload', headers: { 'Content-Type': 'text/plain' } }],
-      adapter: async (options) => {
+      multipart: [
+        { body: 'payload', headers: { 'Content-Type': 'text/plain' } },
+      ],
+      adapter: async options => {
         contentType = options.headers.get('content-type') ?? undefined
         return {
           config: options,
@@ -62,14 +90,14 @@ describe('dependency security overrides', () => {
     })
 
     expect(response.status).toBe(200)
-    expect(contentType).toMatch(
-      /^multipart\/related; boundary=[0-9a-f-]{36}$/,
-    )
+    expect(contentType).toMatch(/^multipart\/related; boundary=[0-9a-f-]{36}$/)
     expect(contentType?.split('boundary=')[1]).toMatch(uuidV4Pattern)
   })
 
   test('azure identity msal guid generation works through its package context', () => {
-    const identityRequire = createRequire(import.meta.resolve('@azure/identity'))
+    const identityRequire = createRequire(
+      import.meta.resolve('@azure/identity'),
+    )
     const msal = identityRequire('@azure/msal-node') as {
       CryptoProvider: new () => { createNewGuid(): string }
     }
@@ -78,7 +106,7 @@ describe('dependency security overrides', () => {
     expect(cryptoProvider.createNewGuid()).toMatch(uuidV4Pattern)
   })
 
-  test('remote control markdown renderer loads streamdown and mermaid', async () => {
+  test('remote control markdown renderer resolves streamdown and mermaid', async () => {
     const rcsRequire = createRequire(
       join(repoRoot, 'packages/remote-control-server/package.json'),
     )
@@ -90,13 +118,26 @@ describe('dependency security overrides', () => {
     const uuid = (await import(
       pathToFileURL(streamdownRequire.resolve('uuid')).href
     )) as { v4(): string }
-    const mermaid = (await import(
-      pathToFileURL(streamdownRequire.resolve('mermaid')).href
-    )) as { default?: { initialize?: unknown } }
+    const mermaidPath = streamdownRequire.resolve('mermaid')
+    // mermaid does not export ./package.json in its exports map, so resolving
+    // 'mermaid/package.json' throws ERR_PACKAGE_PATH_NOT_EXPORTED in runtimes
+    // that honor exports semantics. Walk up from the resolved entry until a
+    // package.json with name === 'mermaid' is found.
+    const mermaidPackagePath = await findPackageJson(mermaidPath, 'mermaid')
+    const mermaidPackage = JSON.parse(
+      await Bun.file(mermaidPackagePath).text(),
+    ) as {
+      name?: unknown
+      exports?: { '.'?: { import?: unknown } }
+    }
 
     expect(streamdown.Streamdown).toBeDefined()
     expect(uuid.v4()).toMatch(uuidV4Pattern)
-    expect(typeof mermaid.default?.initialize).toBe('function')
+    expect(mermaidPackage.name).toBe('mermaid')
+    expect(mermaidPath).toContain('mermaid.core.mjs')
+    expect(mermaidPackage.exports?.['.']?.import).toBe(
+      './dist/mermaid.core.mjs',
+    )
   })
 
   test('grpc proto-loader keeps its protobuf 7 parser path working', () => {
diff --git a/tests/mocks/auth.ts b/tests/mocks/auth.ts
new file mode 100644
index 0000000000..7c0da17a75
--- /dev/null
+++ b/tests/mocks/auth.ts
@@ -0,0 +1,31 @@
+/**
+ * Shared mock for `src/utils/auth.js`. Use it via:
+ *
+ *   import { authMock } from '../../tests/mocks/auth'
+ *   mock.module('src/utils/auth.js', authMock)
+ *
+ * Tests that need different return values can override the helper used by
+ * the suite (e.g. by extending this object and re-registering with mock.module).
+ * Always extend here rather than inlining a different shape per test, so the
+ * surface stays consistent when `auth.ts` exports change.
+ */
+export const authMock = () => ({
+  // Mirrors the production contract: src/utils/auth.ts returns
+  // Promise<boolean> ("did the access token change") and a token object that
+  // carries scopes, subscriptionType, expiresAt, etc. Tests that branch on
+  // these values must see the full shape so they can not silently drift away
+  // from production.
+  checkAndRefreshOAuthTokenIfNeeded: async () => false,
+  getClaudeAIOAuthTokens: () => ({
+    accessToken: 'token',
+    refreshToken: null,
+    expiresAt: null,
+    scopes: ['user:inference'],
+    subscriptionType: null,
+    rateLimitTier: null,
+  }),
+  isClaudeAISubscriber: () => true,
+  isProSubscriber: () => false,
+  isMaxSubscriber: () => false,
+  isTeamSubscriber: () => false,
+})
diff --git a/tests/mocks/file-system.ts b/tests/mocks/file-system.ts
index e356ec0155..c46defc6c7 100644
--- a/tests/mocks/file-system.ts
+++ b/tests/mocks/file-system.ts
@@ -30,3 +30,21 @@ export async function createTempSubdir(
   await mkdir(path, { recursive: true })
   return path
 }
+
+/**
+ * Read a file under the test temp dir as utf-8 text. Mirrors the node:fs
+ * `readFileSync(path, 'utf-8')` ergonomics but uses Bun's native file API so
+ * tests stay on the Bun-only runtime contract.
+ */
+export async function readTempFile(path: string): Promise<string> {
+  return Bun.file(path).text()
+}
+
+/**
+ * Best-effort existence check for a path under the test temp dir. Uses Bun's
+ * native file API (works for files; directories return true via Bun.file().exists()
+ * iff the path resolves — reads directly from the filesystem).
+ */
+export async function tempPathExists(path: string): Promise<boolean> {
+  return Bun.file(path).exists()
+}