From d00822b6b3477e96dee6c85c69cb6f9f53cae4f9 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Mon, 18 May 2026 17:42:08 +0000 Subject: [PATCH 01/15] fix(supervisor): exit container when last tmux session ends Stop containers automatically when all agent sessions exit, eliminating the need for `jackin eject` after a clean session. - supervisor.sh now monitors /tmp/tmux-$(id -u)/default with a 1-second poll loop; exits 0 when the socket disappears (last session ended), exits 1 if no socket appears within a 60-second startup grace period - launch.rs: when docker exec returns with the container still Running, call inspect_agent_sessions; if no sessions remain treat it as Stopped/0 and run the full DinD/network/certs teardown path immediately - Roadmap: mark jackin-container-binary as Partially implemented; update Phase 1 to describe the bash approach and defer the Rust binary to Phase 2 Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- docker/runtime/supervisor.sh | 51 ++++++++++++++----- docs/src/content/docs/reference/roadmap.mdx | 2 +- .../roadmap/jackin-container-binary.mdx | 15 +++--- src/runtime/launch.rs | 34 +++++++++++-- 4 files changed, 77 insertions(+), 25 deletions(-) diff --git a/docker/runtime/supervisor.sh b/docker/runtime/supervisor.sh index a04aa77c4..ee01c8d7c 100644 --- a/docker/runtime/supervisor.sh +++ b/docker/runtime/supervisor.sh @@ -1,17 +1,22 @@ #!/bin/bash # Container supervisor — PID 1. # -# Keeps the container alive while agent sessions run via `docker exec`. -# Forwards SIGTERM and SIGINT so `docker stop` / `docker kill` terminate -# cleanly without a 10-second timeout. +# Exits 0 when the last tmux session ends so host-side cleanup fires +# automatically — no manual `jackin eject` needed. Exits 1 if no session +# appears within the startup grace period so diagnose_premature_exit can +# surface the container logs. # -# Interim placeholder: this script will be replaced by the `jackin-container` -# Rust binary (see reference/roadmap/jackin-container-binary) which adds -# last-session detection via inotify and a Unix socket status interface. +# The tmux server creates its socket at /tmp/tmux-/default when the +# first session starts and removes it when the last session ends and all +# clients have disconnected. Watching the socket file is reliable and +# requires no tmux hooks or configuration. # -# No `set -e`: `wait` returns the exit code of the child it waited on; -# a signal-killed sleep exits non-zero, and `set -e` would misread that -# as a supervisor failure on every clean `docker stop`. +# Will be replaced by the `jackin-container` Rust binary once Phase 2 +# (Unix socket status interface) justifies the build/distribution overhead. +# See reference/roadmap/jackin-container-binary for the full plan. +# +# No `set -e`: signal-killed `wait` exits non-zero; `set -e` would misread +# that as a supervisor failure on every clean `docker stop`. _cleanup() { kill "$!" 2>/dev/null || true @@ -19,9 +24,29 @@ _cleanup() { } trap '_cleanup' TERM INT -# Wait in a background-sleep loop so the trap fires promptly. -# `|| true` guards against a signal-killed sleep triggering an exit. -while true; do - sleep 3600 & +TMUX_SOCKET="/tmp/tmux-$(id -u)/default" + +# Grace period: wait up to 60 s for the first tmux session socket to +# appear. Without this the supervisor exits before `docker exec tmux +# new-session` has a chance to create it. +deadline=$((SECONDS + 60)) +while [ $SECONDS -lt $deadline ] && [ ! -S "$TMUX_SOCKET" ]; do + sleep 1 & wait $! || true done + +# No session appeared — something went wrong at startup. Exit non-zero so +# diagnose_premature_exit surfaces the container logs rather than returning +# a cryptic "container is not running" error. +if [ ! -S "$TMUX_SOCKET" ]; then + exit 1 +fi + +# Wait for the last session to end. The tmux server removes the socket +# immediately after the last session closes and all clients disconnect. +while [ -S "$TMUX_SOCKET" ]; do + sleep 1 & + wait $! || true +done + +exit 0 diff --git a/docs/src/content/docs/reference/roadmap.mdx b/docs/src/content/docs/reference/roadmap.mdx index c35147242..a8b4012d7 100644 --- a/docs/src/content/docs/reference/roadmap.mdx +++ b/docs/src/content/docs/reference/roadmap.mdx @@ -58,6 +58,7 @@ jackin' is a functional proof of concept. **`Claude Code`, `Codex`, `Amp`, `Kimi - [Workspace Claude token setup](/reference/roadmap/workspace-claude-token-setup/) — shipped token commands are documented in standard docs; roadmap tracks the canonical auth slot, TUI generate action, Apple Keychain backend, validity probe, and bulk migration - [Config versioning and migration framework](/reference/roadmap/config-versioning-migration/) — shipped per-file schema gates for config, workspace files, and role manifests, plus automatic config/workspace migration, desktop role manifest migration through `jackin role migrate`, and CI migration through `jackin-role migrate`; roadmap tracks deferred `--pr` automation and the Renovate-style auto-migration GitHub Action - [Console agent session control](/reference/roadmap/console-agent-session-control/) — Phases 1–3 shipped: instance discovery, console Instances panel, `hardline --shell`, tmux-backed primary session (supervisor as PID 1, `tmux new-session -A`), secondary agent sessions via `hardline --new`, and console `a`/`x` keybindings; Phase 4 (agent runtime status + resource panel integration) remains open +- [jackin-container: in-container supervisor binary](/reference/roadmap/jackin-container-binary/) — Phase 1 shipped: improved bash supervisor exits when the last tmux session ends, closing the automatic container teardown gap; Rust binary with inotify, Unix socket status interface, and daemon event streaming remain ## Planned @@ -77,7 +78,6 @@ jackin' is a functional proof of concept. **`Claude Code`, `Codex`, `Amp`, `Kimi ### Reactive daemon program -- **[jackin-container: in-container supervisor binary](/reference/roadmap/jackin-container-binary/)** — replace the interim bash supervisor with a Rust binary that acts as PID 1, watches for last-session exit via inotify on the tmux server socket, exits cleanly to trigger host-side cleanup, and exposes a Unix socket interface for session inventory queries; foundation for jackin daemon communication with running containers (status: open — design proposal) - **[jackin daemon — umbrella](/reference/roadmap/jackin-daemon/)** — introduce the long-running per-operator-user host process jackin will use for reactive features. One umbrella item that decides lifecycle, install method, control socket, security posture, and log redaction once so each reactive feature plugs into one daemon shape. The full list of phase-2/phase-3 reactive adapters lives in the program doc and the sidebar under **Reference → Roadmap → Reactive daemon program** (status: open — design proposal) - **[Jackin Desktop Agent Hub](/reference/roadmap/jackin-desktop-agent-hub/)** — native macOS menu bar and desktop companion for active Jackin workspaces, isolated agent sessions, PR jump links, and Claude/Codex/Amp account state. Keeps the agent TUIs as the primary agent UI while using the daemon as the shared state/event backend (status: open — design proposal) - **[Live bidirectional auth sync](/reference/roadmap/live-auth-sync/)** — Phase 2 adapter. Keep host and every running container in lock-step on each auth axis (`gh`, Claude, Codex, Amp, …). Subsumes the launch-time `sync` mode's bidirectional follow-up; reconsiders the `sync` name in the process (status: open — design proposal) diff --git a/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx b/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx index a53a037c8..5b26b21b7 100644 --- a/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx +++ b/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx @@ -3,7 +3,7 @@ title: "jackin-container: In-Container Supervisor Binary" --- import RepoFile from '../../../../components/RepoFile.astro' -**Status**: Open — design proposal +**Status**: Partially implemented — Phase 1 cleanup gap closed via improved bash supervisor; Rust binary with inotify, Unix socket interface, and daemon integration remain ## Problem @@ -114,14 +114,13 @@ When `docker run` starts a role container, jackin adds the socket mount and swit ## Implementation plan -### Phase 1 — Binary skeleton as PID 1 *(fixes the cleanup gap)* +### Phase 1 — Cleanup gap closed *(shipped — bash supervisor with socket polling)* -- Create `crates/jackin-container/` workspace member. -- Implement PID 1 bootstrap: zombie reaping via `SIGCHLD` handler, `SIGTERM`/`SIGINT` → clean exit. -- Implement tmux socket watch via `inotify`. Exit 0 when socket is deleted. -- Add CI job: cross-compile for `linux/amd64` and `linux/arm64`, publish to GitHub Releases. -- Replace `supervisor.sh` in the derived `Dockerfile` with the binary download and install. -- Remove (or keep as fallback for images built before this lands — pre-release, so no migration shim needed). +The cleanup gap is fixed. now monitors the tmux server socket (`/tmp/tmux-/default`) with a 1-second polling loop. When the socket disappears (last session ended and all clients disconnected), the supervisor exits 0 → container exits 0 → the host-side cleanup path runs automatically. A 60-second startup grace period prevents the supervisor from exiting before the first `docker exec tmux new-session` creates the socket. + + was updated alongside it: when `docker exec` returns and the container is still `Running`, the code now calls `inspect_agent_sessions` to distinguish a clean agent exit (no sessions — run teardown) from a detached terminal (sessions active — keep the container alive). + +The Rust binary with `inotify`-based watching is deferred. The bash polling approach closes the operator-facing gap at low implementation cost; the binary becomes worth the cross-compilation and distribution overhead once Phase 2 (Unix socket status interface) lands. ### Phase 2 — Unix socket + status command *(structured session inventory)* diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index b22c722ce..abc255cd1 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -13,7 +13,10 @@ use fs2::FileExt; use owo_colors::OwoColorize; use std::io::IsTerminal; -use super::attach::{ContainerState, hardline_agent, inspect_container_state, wait_for_dind}; +use super::attach::{ + AgentSessionInventory, ContainerState, hardline_agent, inspect_agent_sessions, + inspect_container_state, wait_for_dind, +}; use super::cleanup::{gc_orphaned_resources, run_cleanup_command}; use super::discovery::list_running_agent_display_names; use super::identity::{GitIdentity, build_config_rows, load_git_identity, load_host_identity}; @@ -2023,13 +2026,38 @@ fn load_role_with( // Classify how the interactive session ended so we know whether to // tear the container down or preserve it for `jackin hardline` to // restart: - // - Running → terminal was closed (user detached). Keep it. + // - Running + active sessions → terminal closed (user detached). Keep it. + // - Running + no sessions → agent exited; supervisor will stop the + // container within ≤1 s but inspect raced + // ahead. Treat the same as Stopped/0. // - Stopped / 0 → user exited cleanly inside Claude Code. Tear down. // - Stopped / ≠0 or OOM-killed → crash. Preserve so `jackin hardline` // can restart the existing container + DinD sidecar. #[allow(clippy::match_same_arms)] match inspect_container_state(runner, &container_name) { - ContainerState::Running => cleanup.disarm(), + ContainerState::Running => { + let sessions = inspect_agent_sessions( + runner, + &container_name, + &ContainerState::Running, + ); + if matches!(&sessions, AgentSessionInventory::Sessions(v) if v.is_empty()) { + if !matches!( + decision, + crate::isolation::finalize::FinalizeDecision::Preserved + ) { + write_instance_status( + paths, + &container_state, + &mut instance_manifest, + InstanceStatus::CleanExited, + )?; + } + cleanup.run(runner); + } else { + cleanup.disarm(); + } + } ContainerState::Stopped { exit_code: 0, oom_killed: false, From b41e5ae414aa874aac1e3a89cd65be413a9b7bae Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Mon, 18 May 2026 17:58:01 +0000 Subject: [PATCH 02/15] docs(roadmap): expand jackin-container to full multiplexer server vision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scope the roadmap item to its long-term goal: jackin-container evolves from bash supervisor → Rust PID 1 → in-container multiplexer server that replaces tmux entirely. Phase 3 adds PTY session management, agent-state inference (working / blocked / done / idle via PTY output), and a full session control API (session.create, session.kill, session.title, session.attach, events). Phase 4 connects the multiplexer to the daemon and desktop app. References Herdr (herdr.dev) as design prior art for agent-state inference and Unix socket semantics. No Herdr source reused (AGPL-3.0 conflict). Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- .../roadmap/jackin-container-binary.mdx | 143 ++++++++++-------- 1 file changed, 79 insertions(+), 64 deletions(-) diff --git a/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx b/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx index 5b26b21b7..aa1223a1e 100644 --- a/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx +++ b/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx @@ -1,26 +1,25 @@ --- -title: "jackin-container: In-Container Supervisor Binary" +title: "jackin-container: In-Container Multiplexer Server" --- import RepoFile from '../../../../components/RepoFile.astro' -**Status**: Partially implemented — Phase 1 cleanup gap closed via improved bash supervisor; Rust binary with inotify, Unix socket interface, and daemon integration remain +**Status**: Partially implemented — Phase 1 cleanup gap closed via improved bash supervisor; Rust binary, multiplexer server, and desktop bridge remain ## Problem -The current container supervisor is a bash wait loop () that keeps the container alive while agent sessions run via `docker exec`. It has two structural limitations: +The current container supervisor is a bash wait loop () that keeps the container alive while agent sessions run via `docker exec`. It has two immediate limitations and one deeper architectural gap: -**Last-session cleanup does not fire.** When a tmux session exits, the supervisor keeps the container running. Because the container state is `Running` rather than `Stopped`, the host jackin cleanup path (`finalize_foreground_session`, container teardown, DinD/network/certs removal) never fires automatically. The operator must explicitly run `jackin eject` to clean up a container after all sessions end. This is wrong for the normal lifecycle — the container should stop and clean up when the last session exits cleanly. +**Last-session cleanup does not fire.** When a tmux session exits, the supervisor keeps the container running. Because the container state is `Running` rather than `Stopped`, the host jackin cleanup path (`finalize_foreground_session`, container teardown, DinD/network/certs removal) never fires automatically. The operator must explicitly run `jackin eject` to clean up a container after all sessions end. -**Session inventory requires shelling out.** Querying which sessions are active requires `docker exec sh -c 'tmux list-sessions ...'` from the host. There is no structured interface for the host CLI or the future jackin daemon to ask "what is running in this container right now?" The tmux command output is a formatting detail, not a protocol. +**Session inventory requires shelling out.** Querying which sessions are active requires `docker exec sh -c 'tmux list-sessions ...'` from the host. There is no structured interface for the host CLI or the future jackin daemon to ask "what is running in this container right now?" -Both problems are symptoms of the same root cause: there is no jackin-owned process inside the container that understands session lifecycle and can communicate it to the host. +**tmux is not designed for the container-per-agent model.** tmux has no concept of agent state, no structured control plane, and no event stream. Every interaction requires `docker exec tmux ...` round-trips. Jackin cannot ask "is this agent blocked or still working?" without reading raw terminal output from the outside. This is the root reason observability, attention prompts, and desktop app integration are hard to build on top of the current architecture. A purpose-built process inside the container — one that owns the session lifecycle from the start — can expose exactly the information Jackin needs over a structured protocol. ## Goals -- Container stops automatically when the last agent session exits cleanly, triggering normal host-side cleanup. -- Host CLI and jackin daemon can query active sessions through a structured interface without parsing tmux output. -- The binary is small, self-contained, and built from the jackin workspace — no new runtime dependencies inside the container. -- The binary is a foundation for the jackin daemon to communicate with running containers, following the same Unix socket pattern Docker itself uses. +**Short-term (Phases 1–2):** fix the cleanup gap and replace `docker exec tmux list-sessions` with a structured socket query. + +**Long-term (Phases 3–4):** evolve `jackin-container` into the in-container multiplexer server. It replaces tmux as the session substrate, manages PTY processes directly, tracks agent state per session, and becomes the structured backend that the host CLI, the jackin daemon, and eventually the Jackin desktop app all talk to via its Unix socket API. Each container becomes a self-contained server: spawn a session, kill a session, query status, watch events. The operator's terminal, the console, and the desktop companion all drive the same control plane. ## Design @@ -43,25 +42,9 @@ During the derived image build, jackin downloads the `jackin-container` binary f As PID 1, `jackin-container` must: 1. **Reap zombie children.** Processes whose parent exits become children of PID 1. The binary must call `waitpid(-1, WNOHANG)` in a loop on `SIGCHLD` to reap them. `tokio` does not do this automatically. -2. **Forward signals cleanly.** `SIGTERM` from `docker stop` → exit 0. `SIGINT` → exit 0. Both trigger the session-end path described below. +2. **Forward signals cleanly.** `SIGTERM` from `docker stop` → exit 0. `SIGINT` → exit 0. Both trigger the session-end path. 3. **Never crash on unexpected input.** PID 1 death kills the entire container. All error paths either log and continue or exit deliberately. -### Session lifecycle tracking — tmux socket watch - -The binary tracks active sessions by watching the tmux server socket via `inotify` (Linux). The tmux server creates its socket at `/tmp/tmux-/default` when the first session starts and removes it when the last session ends. The binary watches for the socket's deletion: - -- Socket exists → at least one session running → binary stays alive -- Socket deleted → all sessions ended → binary runs the end-of-sessions path - -This is event-driven, not polled. No tmux modification or hook configuration is needed. - -**End-of-sessions path:** -1. Log that all sessions have ended -2. Run the finalize hook if present (reserved for future use) -3. Exit with code 0 - -The container stops with `exit 0`. The host jackin cleanup logic already handles this case: `finalize_foreground_session` runs, the container is torn down, DinD/network/certs are removed. No new host-side logic is required — the gap is closed by the binary exiting cleanly at the right moment. - ### Unix socket interface The binary listens on a Unix domain socket at `/run/jackin/jackin.sock` inside the container. The host mounts the socket from its own filesystem: @@ -72,45 +55,56 @@ docker run ... -v /run/jackin/.sock:/run/jackin/jackin.sock ... The socket directory on the host (`/run/jackin/`) is created by jackin. One socket file per instance, named by instance ID. -**Protocol:** Newline-delimited JSON. Each request is one JSON object followed by `\n`. Each response is one JSON object followed by `\n`. No framing, no length prefix — connection-per-request is acceptable for v1. +**Protocol:** Newline-delimited JSON. Each request is one JSON object followed by `\n`. Each response is one JSON object followed by `\n`. No framing, no length prefix — connection-per-request is fine for v1. + +### Session management API (evolution across phases) -**v1 commands:** +The API grows with each phase. The full target API is: -`status` — returns the active session list. This is the only command needed to fix the current gap and replace `docker exec tmux list-sessions` calls. +**Phase 2 — status query:** -Request: ```json {"method": "status"} ``` +Response: list of active sessions with name, agent type, created-at timestamp, and status. -Response: -```json -{ - "sessions": [ - {"name": "jackin-claude-1a2b3c", "agent": "claude", "created_at": "2026-05-17T13:00:00Z", "status": "running"}, - {"name": "jackin-codex-4d5e6f", "agent": "codex", "created_at": "2026-05-17T13:04:11Z", "status": "running"} - ], - "container_uptime_secs": 3721 -} -``` +**Phase 3 — full multiplexer control:** -`agent` is derived from the session name prefix (`jackin--` → ``). +| Method | Description | +|---|---| +| `status` | List all sessions with metadata and agent state | +| `session.create` | Spawn a new agent session inside the container | +| `session.kill` | Terminate a session by ID | +| `session.title` | Read the current title of a session (terminal title seq or process name) | +| `session.attach` | Request a PTY attachment handle so the client can attach to a running session | +| `events` | Upgrade connection to an event stream | -**Future commands (deferred — daemon phase):** +**Event stream (Phase 3):** `session-started`, `session-ended`, `all-sessions-ended`, `agent-state-changed {session, state}`. -- `attach {"session": "jackin-claude-1a2b3c"}` — instruct the binary to run the attach command on behalf of the daemon -- `events` — upgrade connection to an event stream: `session-started`, `session-ended`, `all-sessions-ended` +**Agent states per session:** `working` (output flowing, foreground process active), `blocked` (waiting for input — detected via output silence + process state), `done` (work finished, operator not yet reviewed), `idle` (reviewed or no work in progress). The two-stage `done` / `idle` split — validated by [Herdr research](/reference/roadmap/herdr-research/) — matters for the autonomous queue and the desktop app's "ready for review" indicator. -### Host side changes +Agent state is inferred from PTY output activity and foreground process state inside the container. No agent hooks or configuration changes are required. The heuristic design draws on concepts from the Herdr research (see [Herdr research](/reference/roadmap/herdr-research/)), rewritten from scratch without copying any Herdr source (Herdr is AGPL-3.0; Jackin is Apache-2.0). -When `docker run` starts a role container, jackin adds the socket mount and switches the entrypoint: +### Session lifecycle tracking -``` ---entrypoint /usr/local/bin/jackin-container --v /run/jackin/.sock:/run/jackin/jackin.sock -``` +**Phase 2** (Rust binary, tmux still present): binary watches the tmux server socket at `/tmp/tmux-/default` via `inotify`. Socket deleted → all sessions ended → exit 0. This replaces the 1-second bash polling loop from Phase 1. + +**Phase 3** (multiplexer): binary owns session lifecycle directly. No tmux. When the last session exits cleanly, the binary exits 0. When SIGTERM arrives, the binary terminates all sessions, then exits. + +### Why replace tmux + +- No structured API. Every tmux interaction from the host requires `docker exec tmux ...` — a subprocess round-trip with string output that must be parsed. +- No agent-state awareness. tmux reports session names and windows; it has no concept of whether the process inside is blocked, working, or done. +- No event stream. The host cannot subscribe to "a session ended" without polling. +- tmux adds size (binary in the image), startup overhead, and a dependency Jackin does not control. + +A purpose-built multiplexer in `jackin-container` gives Jackin a clean control plane: structured socket, typed events, agent-state inference, and no external process to coordinate with. -`inspect_agent_sessions` in is updated to connect to the socket and issue a `status` request instead of running `docker exec tmux list-sessions`. The existing `AgentSession` struct is populated from the response. +### Host-side changes per phase + +**Phase 2:** add socket mount to `docker run`; update `inspect_agent_sessions` in to connect to socket instead of `docker exec tmux list-sessions`. + +**Phase 3:** remove `tmux` from the derived image; update `docker exec tmux new-session` in and to issue `session.create` via the socket; update all other tmux shell-out call sites. ## Implementation plan @@ -120,23 +114,44 @@ The cleanup gap is fixed. now m was updated alongside it: when `docker exec` returns and the container is still `Running`, the code now calls `inspect_agent_sessions` to distinguish a clean agent exit (no sessions — run teardown) from a detached terminal (sessions active — keep the container alive). -The Rust binary with `inotify`-based watching is deferred. The bash polling approach closes the operator-facing gap at low implementation cost; the binary becomes worth the cross-compilation and distribution overhead once Phase 2 (Unix socket status interface) lands. +The Rust binary with `inotify`-based watching is deferred. The bash polling approach closes the operator-facing gap at low implementation cost; the binary is justified once Phase 2 lands. + +### Phase 2 — Rust binary skeleton + Unix socket status command *(structured session inventory)* + +- Create `crates/jackin-container/` workspace member. +- Implement PID 1 bootstrap: zombie reaping via `SIGCHLD`, `SIGTERM`/`SIGINT` → clean exit. +- Implement tmux socket watch via `inotify` (replaces bash polling). Exit 0 when socket deleted. +- Implement Unix socket listener and `status` command. +- Add CI job: cross-compile for `linux/amd64` and `linux/arm64`, publish to GitHub Releases. +- Add socket mount to `docker run` in . +- Update `inspect_agent_sessions` in to query socket instead of `docker exec tmux list-sessions`. +- Update `hardline --inspect` and console session inventory to use socket path. +- Remove (pre-release; no migration shim needed). + +### Phase 3 — In-container multiplexer *(replace tmux)* -### Phase 2 — Unix socket + status command *(structured session inventory)* +- Implement PTY session management in `jackin-container`: spawn sessions, manage PTY lifecycle, reap processes. +- Implement agent state inference from PTY output and foreground process state. +- Expand socket API: `session.create`, `session.kill`, `session.title`, `session.attach`, `events`. +- Implement event stream: `session-started`, `session-ended`, `all-sessions-ended`, `agent-state-changed`. +- Remove `tmux` from the derived image (`docker/construct/Dockerfile`). +- Replace all `docker exec tmux ...` call sites in the host CLI with socket API calls. +- Update console session panel to consume agent-state events rather than polling. -- Implement the Unix socket listener and `status` command in `jackin-container`. -- Add the socket mount (`-v /run/jackin/.sock:/run/jackin/jackin.sock`) to the `docker run` call in . -- Update `inspect_agent_sessions` in to query the socket instead of running `docker exec tmux list-sessions`. -- Update `hardline --inspect` and console session inventory to use the socket path. +This phase is the prerequisite for the desktop app: once `jackin-container` manages sessions directly and streams events, the desktop companion can subscribe to the socket and render live agent state without any polling or `docker exec` overhead. -### Phase 3 — Daemon integration *(deferred — depends on jackin daemon)* +### Phase 4 — Daemon integration and desktop app bridge *(deferred — depends on jackin daemon and desktop app)* -- Add `events` command: the binary streams `session-started`, `session-ended`, and `all-sessions-ended` events over the socket. -- Daemon subscribes to events per running container and updates the instance index reactively. -- See [jackin daemon](/reference/roadmap/jackin-daemon/) for the daemon architecture. +- Add daemon subscription: the jackin daemon connects to each running container's socket and maintains a live session index. +- Desktop app reads from the daemon's aggregated view; each container's `jackin-container` socket is the per-container data source. +- Advanced session commands: session snapshots, resource usage, log streaming. +- See [jackin daemon](/reference/roadmap/jackin-daemon/) and [Jackin Desktop Agent Hub](/reference/roadmap/jackin-desktop-agent-hub/). ## Relationship to other roadmap items -- **[Console agent session control](/reference/roadmap/console-agent-session-control/)** — Phase 4 (session reconciliation in the console) unblocks once Phase 2 of this item ships: the binary exposes live session state, eliminating the need to reconcile from a manifest snapshot. +- **[Herdr research](/reference/roadmap/herdr-research/)** — Herdr is the closest prior art for the multiplexer vision (agent-state inference, Unix socket API, session persistence). The two-stage done/idle state, workspace-level roll-up, and PTY inference heuristics from that research inform Phase 3 design. No Herdr source is reused (AGPL-3.0 license conflict). +- **[Console agent session control](/reference/roadmap/console-agent-session-control/)** — Phase 4 of that item (session reconciliation in the console) unblocks once Phase 2 of this item ships: the binary exposes live session state, eliminating manifest-snapshot reconciliation. - **[Agent runtime status](/reference/roadmap/agent-runtime-status/)** — idle/busy indicators per session require the binary to track agent process state inside the container. Phase 3 events are the natural delivery mechanism. -- **[jackin daemon](/reference/roadmap/jackin-daemon/)** — the binary is the per-container endpoint the daemon talks to. Phase 3 of this item and the daemon's container-watch phase are designed together. +- **[Agent attention prompts](/reference/roadmap/agent-attention-prompts/)** — `agent-state-changed` events from Phase 3 replace the PTY polling approach planned for that item. +- **[jackin daemon](/reference/roadmap/jackin-daemon/)** — the binary is the per-container endpoint the daemon talks to. Phase 4 of this item and the daemon's container-watch phase are designed together. +- **[Jackin Desktop Agent Hub](/reference/roadmap/jackin-desktop-agent-hub/)** — Phase 3 of this item is the prerequisite: the desktop app's live session view is driven by `jackin-container` socket events, aggregated by the daemon. From b0f98b15659912d3b20e7cb81dccc170e6a3fed4 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Mon, 18 May 2026 18:01:49 +0000 Subject: [PATCH 03/15] docs(roadmap): merge herdr research into jackin-container multiplexer item MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidate herdr-research.mdx into jackin-container-binary.mdx — the research informs the multiplexer design directly, so a separate page adds navigation overhead without clarity benefit. The merged item now covers: - Current state: tmux-based minimal approach, why it's temporary - Research motivation: Herdr validated agent-aware multiplexer concept - What to implement independently (two-stage done/idle, PTY inference, workspace roll-up, notification suppression, socket wait semantics) - What not to borrow (AGPL-3.0 license conflict, architecture mismatch) - Full 4-phase implementation plan through desktop app bridge Move sidebar entry from Infrastructure to Reactive daemon program; update agent-orchestrator-research to point to the combined item. Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- docs/astro.config.ts | 3 +- .../roadmap/agent-orchestrator-research.mdx | 2 +- .../docs/reference/roadmap/herdr-research.mdx | 88 ----------- .../roadmap/jackin-container-binary.mdx | 142 ++++++++++++------ 4 files changed, 94 insertions(+), 141 deletions(-) delete mode 100644 docs/src/content/docs/reference/roadmap/herdr-research.mdx diff --git a/docs/astro.config.ts b/docs/astro.config.ts index 134f975f5..308f4a5c4 100644 --- a/docs/astro.config.ts +++ b/docs/astro.config.ts @@ -172,7 +172,6 @@ export default defineConfig({ collapsed: true, items: [ { label: 'Overview', slug: 'reference/roadmap/agent-orchestrator-research' }, - { label: 'Herdr research', slug: 'reference/roadmap/herdr-research' }, { label: 'Fleet phase 1 — Foundation gaps', collapsed: true, @@ -286,6 +285,7 @@ export default defineConfig({ collapsed: true, items: [ { label: 'Overview', slug: 'reference/roadmap/jackin-daemon' }, + { label: 'jackin-container: in-container multiplexer server', slug: 'reference/roadmap/jackin-container-binary' }, { label: 'Jackin Desktop Agent Hub', slug: 'reference/roadmap/jackin-desktop-agent-hub' }, { label: 'Phase 2 — First reactive adapters', @@ -334,7 +334,6 @@ export default defineConfig({ items: [ { label: 'Bollard migration', slug: 'reference/roadmap/bollard-migration' }, { label: 'Construct user creation', slug: 'reference/roadmap/construct-user-creation' }, - { label: 'jackin-container: in-container supervisor binary', slug: 'reference/roadmap/jackin-container-binary' }, { label: 'Workspace registry cache', slug: 'reference/roadmap/workspace-registry-cache' }, ], }, diff --git a/docs/src/content/docs/reference/roadmap/agent-orchestrator-research.mdx b/docs/src/content/docs/reference/roadmap/agent-orchestrator-research.mdx index ffd85649b..442ce0611 100644 --- a/docs/src/content/docs/reference/roadmap/agent-orchestrator-research.mdx +++ b/docs/src/content/docs/reference/roadmap/agent-orchestrator-research.mdx @@ -27,7 +27,7 @@ the same problem: - Docker Sandboxes is the strongest commercial benchmark for **microVM sandboxing**: per-sandbox VM boundary, private Docker daemon, scoped workspace sharing, host-side network policy, and credential proxying. -- [Herdr](https://github.com/ogulcancelik/herdr) is the strongest reference for **terminal-multiplexer UX for multi-agent workflows**: four-state agent status (Blocked / Working / Done / Idle) detected with zero configuration via PTY heuristics, workspace-level status roll-up, notification suppression when the operator is already looking, sound escalation as opt-in, and blocking `wait` semantics on a Unix socket API. Its architecture (bare host processes, AGPL-3.0) rules out embedding, but its UX decisions directly inform [agent runtime status](/reference/roadmap/agent-runtime-status/), [agent attention prompts](/reference/roadmap/agent-attention-prompts/), and the daemon socket design. See [Herdr research](/reference/roadmap/herdr-research/) for the full evaluation. +- [Herdr](https://github.com/ogulcancelik/herdr) is the strongest reference for **terminal-multiplexer UX for multi-agent workflows**: four-state agent status (Blocked / Working / Done / Idle) detected with zero configuration via PTY heuristics, workspace-level status roll-up, notification suppression when the operator is already looking, sound escalation as opt-in, and blocking `wait` semantics on a Unix socket API. Its architecture (bare host processes, AGPL-3.0) rules out embedding, but its UX decisions directly inform [agent runtime status](/reference/roadmap/agent-runtime-status/), [agent attention prompts](/reference/roadmap/agent-attention-prompts/), and the daemon socket design. See [jackin-container: in-container multiplexer server](/reference/roadmap/jackin-container-binary/) for the full evaluation and the implementation plan that applies these concepts to Jackin's container model. - Conductor, Claude devcontainers, Trail of Bits' devcontainer, and private internal tools remain useful comparison points, but they are not the center of this program. diff --git a/docs/src/content/docs/reference/roadmap/herdr-research.mdx b/docs/src/content/docs/reference/roadmap/herdr-research.mdx deleted file mode 100644 index 4ae304f1c..000000000 --- a/docs/src/content/docs/reference/roadmap/herdr-research.mdx +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: "Herdr — Terminal Multiplexer Research" ---- - -**Status**: Open — research captured, no implementation committed - -## What Herdr is - -[Herdr](https://github.com/ogulcancelik/herdr) is a Rust terminal multiplexer purpose-built for managing multiple AI coding agents. It organises panes into workspaces scoped to a folder or git repo, tracks a four-state agent status (Blocked / Working / Done / Idle) by watching PTY output and foreground process state — zero configuration required, no agent hooks needed — and delivers host-side OS notifications with sound escalation. It exposes a Unix socket API that agent orchestration scripts can call to create workspaces, split panes, run commands, wait for status transitions, and read pane output. Sessions survive client detach and restore after a full restart. License: AGPL-3.0. - -## Why it is relevant - -Herdr and Jackin share the same problem space: multiple AI agents running in parallel, per-project workspace grouping, operator observability over which agent is blocked or waiting, and host-side notifications. That surface overlap is large enough to make Herdr the closest prior art to several open Jackin roadmap items — specifically [agent runtime status](/reference/roadmap/agent-runtime-status/), [agent attention prompts](/reference/roadmap/agent-attention-prompts/), and [console agent session control](/reference/roadmap/console-agent-session-control/) Phase 2. - -The fundamental difference is the containment model. Herdr wraps bare host processes; Jackin wraps Docker containers. That single difference shapes every downstream design decision: Herdr detects agent status by reading foreground process and PTY output directly, but those heuristics degrade when the agent runs inside a container and the terminal Herdr sees is `docker exec` rather than the agent itself. Herdr also has no concept of mount isolation, auth forwarding, role manifests, or any of the operator-contract surfaces Jackin is built around. - -The right relationship is research, not dependency. Herdr's design decisions validate and sharpen several open Jackin items; its code must not be embedded or distributed with Jackin. Ideas, concepts, and algorithms are not protected by copyright; the code that expresses them is. All reimplementation in Jackin must be written from scratch without copying Herdr source. - -## Evaluation against Jackin values - -| Value | Herdr | Notes for Jackin | -|---|---|---| -| Terminal-first | Yes — pure terminal, no Electron, no web dashboard | Validates jackin's own terminal-first position | -| Isolation before convenience | No — no container or process boundary | Herdr's whole approach collapses here; Jackin must not copy it | -| Runtime-neutral agents | Partial — Claude, Codex, Amp, Kimi, OpenCode, Gemini, Copilot all supported | Good runtime breadth, but via PTY wrapping rather than structured adapters | -| Role repos over ad-hoc setup | No — no role concept; agents run directly | Out of scope for Herdr by design | -| Explicit contracts | No — no launch summary, no mount visibility, no auth audit | Herdr assumes the host is already prepared | -| Real engineering environments | Partial — SSH remote tunneling works; no Kubernetes | Herdr's remote model is simpler than what `jackin-remote` needs | - -## Why not embed or depend on Herdr - -Three concrete blockers: - -1. **AGPL-3.0 license.** Any Jackin binary that contains or links Herdr source must be distributed under AGPL-3.0 or a compatible license. Jackin ships Apache 2.0. Embedding Herdr is a license conflict. -2. **Architecture mismatch.** Herdr detects agent status by reading the foreground process and PTY output of the agent's own terminal. When the agent runs inside a Docker container, the terminal Herdr wraps is `docker attach` or `docker exec` — not the agent. The status heuristics were designed for bare-host processes and degrade in the container-wrapped case. -3. **Redundancy with Jackin's own surfaces.** Herdr's agent status overlay is exactly what Jackin's own TUI console is building. Bringing Herdr in as an outer layer would mean maintaining two agent-status UIs with different data models. - -## Concepts to research and potentially reimplement independently - -### Two-stage done state - -Herdr distinguishes **Done** (work finished, operator has not reviewed it yet) from **Idle** (done and reviewed — the operator has seen the output and the slot is free). Jackin's current `AgentStatus` plan in [agent runtime status](/reference/roadmap/agent-runtime-status/) has a single `Idle` state. The two-stage split matters for the Desktop Agent Hub's "ready for review" surface and for the autonomous queue's dispatch logic: a `Done` slot should not be refilled until the operator has acknowledged the output. Research task: evaluate whether adding a `ReadyForReview` (or equivalent `Done`) variant to `AgentStatus` is worth the extra state, or whether the `Idle` + a separate operator-acknowledgement flag achieves the same effect with less complexity. - -### Workspace-level status roll-up - -Herdr's workspace sidebar shows the most urgent child status at the workspace level — one blocked agent makes the whole workspace show Blocked. Jackin's console Instances panel currently has no aggregation across instances within a workspace. The rule Herdr validates is simple: `most_urgent(children)` where the urgency order is `Blocked > Working > Done > Idle`. When Jackin's agent runtime status lands, the console workspace row should adopt this roll-up so the operator can scan a multi-workspace list and see at a glance which workspace needs attention. - -### Notification suppression when the operator is already looking - -Herdr suppresses sound and toast notifications when the relevant terminal tab or pane is already focused. Without this rule, a notification fires even when the operator is staring at the agent output — pure spam. Jackin's [agent attention prompts](/reference/roadmap/agent-attention-prompts/) design should include the equivalent: if the operator's console currently has that workspace visible and focused, skip or downgrade the OS notification. The precise triggering condition needs design — Jackin's TUI knows which workspace row is selected, but "focused" in the console sense is coarser than Herdr's pane-level focus. - -### Sound escalation as opt-in - -Herdr ships with configurable sound escalation (silent toast by default, sound after N seconds if the operator doesn't respond). Jackin's attention prompts design already has the `attention.sound_escalation_after` knob. Herdr's live usage confirms this is a real operator need and not over-engineering — the opt-in escalation pattern is validated and should stay in V1. - -### Click-to-focus routing from OS notification - -Herdr routes OS notification clicks to the specific agent pane. Jackin's attention prompts design already plans click-to-focus for Ghostty Phase 1. Herdr validates that the UX is worth the implementation effort and that operators expect clicking a notification to do something useful, not just open the terminal at whatever was last focused. - -### Unix socket API for external orchestration - -Herdr exposes a Unix socket API that orchestration scripts call to query workspace/pane state, run commands, and wait on status transitions (`herdr wait agent-status 1-1 --status done`). Jackin's daemon design already plans a similar socket (`~/.jackin/run/jackin-daemon.sock`); Herdr's API surface is useful prior art for the daemon's query endpoints. Specifically the `wait` semantics — blocking until a status transition rather than polling — are worth considering for Jackin's daemon event subscription protocol. - -### PTY inference fallback heuristic - -Herdr's zero-config status detection reads foreground process state plus absence of PTY output over a threshold. Jackin's [agent attention prompts](/reference/roadmap/agent-attention-prompts/) plans the same PTY inference fallback for agents that do not call the `jackin-attention` MCP server. Herdr's implementation is the closest available reference for how to tune the heuristic's false-positive rate before shipping. Research task: read Herdr's status detection source and evaluate whether the heuristic generalises to the container-attach case (where Jackin reads PTY from `docker exec`, not directly). - -## What to defer - -- **Replacing tmux with Herdr as the session substrate.** Phase 2 of [console agent session control](/reference/roadmap/console-agent-session-control/) plans reconnectable sessions using tmux inside containers. Herdr is an alternative approach for bare-host session persistence, but it cannot serve as the substrate for container-resident sessions. -- **Adopting Herdr's theme system.** Herdr ships 17 built-in themes. Jackin's TUI uses a fixed palette; adding themes is not a priority. -- **Herdr's SSH remote tunneling.** Herdr supports attaching through SSH. Jackin has a separate [jackin-remote](/reference/roadmap/jackin-remote/) item with a different threat model (explicit operator-controlled tunneling, not transparent PTY forwarding). -- **Herdr's socket API protocol verbatim.** The wire format and command vocabulary are Herdr-specific; Jackin's daemon protocol should be designed for Jackin's data model. Herdr is inspiration for the `wait` semantics, not a drop-in spec. - -## Research tasks - -- Study Herdr's PTY status heuristic implementation — understand how it determines the transition thresholds and what signals it uses beyond output absence. Evaluate how those signals degrade when the observed process is `docker attach` rather than the agent itself. -- Study Herdr's `herdr wait agent-status` blocking semantics — specifically how the socket API delivers the status-change event to the waiting caller. This is directly relevant to the daemon event subscription protocol design. -- Evaluate Herdr's two-stage done state (Done vs Idle) against Jackin's `AgentStatus` enum and propose a concrete update to [agent runtime status](/reference/roadmap/agent-runtime-status/) if the extra state is net-positive. -- Map Herdr's notification suppression rule (suppress when tab focused) to Jackin's attention prompts design and verify the "suppress when workspace focused in console" case is explicitly covered. - -## See Also - -- [Agent Orchestrator Research Program](/reference/roadmap/agent-orchestrator-research/) — parent program; Herdr should be added to the research map table -- [Agent runtime status](/reference/roadmap/agent-runtime-status/) — two-stage done state and workspace roll-up research applies here -- [Agent attention prompts](/reference/roadmap/agent-attention-prompts/) — notification suppression, sound escalation validation, and PTY inference fallback apply here -- [Console agent session control](/reference/roadmap/console-agent-session-control/) — Phase 2 reconnectable sessions; session persistence research applies here -- [jackin daemon](/reference/roadmap/jackin-daemon/) — daemon socket design; Herdr socket API semantics apply here diff --git a/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx b/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx index aa1223a1e..0a5582990 100644 --- a/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx +++ b/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx @@ -7,7 +7,7 @@ import RepoFile from '../../../../components/RepoFile.astro' ## Problem -The current container supervisor is a bash wait loop () that keeps the container alive while agent sessions run via `docker exec`. It has two immediate limitations and one deeper architectural gap: +The current container supervisor is a bash wait loop () that keeps the container alive while agent sessions run via `docker exec`. It has two immediate limitations and one deeper architectural gap. **Last-session cleanup does not fire.** When a tmux session exits, the supervisor keeps the container running. Because the container state is `Running` rather than `Stopped`, the host jackin cleanup path (`finalize_foreground_session`, container teardown, DinD/network/certs removal) never fires automatically. The operator must explicitly run `jackin eject` to clean up a container after all sessions end. @@ -15,11 +15,20 @@ The current container supervisor is a bash wait loop (.sock:/run/jackin/jackin.sock ... ``` -The socket directory on the host (`/run/jackin/`) is created by jackin. One socket file per instance, named by instance ID. +One socket file per instance on the host, named by instance ID. -**Protocol:** Newline-delimited JSON. Each request is one JSON object followed by `\n`. Each response is one JSON object followed by `\n`. No framing, no length prefix — connection-per-request is fine for v1. +**Protocol:** Newline-delimited JSON. Each request is one JSON object followed by `\n`. Each response is one JSON object followed by `\n`. No framing, no length prefix — connection-per-request for v1. -### Session management API (evolution across phases) +### Session management API The API grows with each phase. The full target API is: -**Phase 2 — status query:** - -```json -{"method": "status"} -``` -Response: list of active sessions with name, agent type, created-at timestamp, and status. +| Method | Phase | Description | +|---|---|---| +| `status` | 2 | List all sessions with name, agent type, created-at, and state | +| `session.create` | 3 | Spawn a new agent or shell session | +| `session.kill` | 3 | Terminate a session by ID | +| `session.title` | 3 | Read the current terminal title or process name for a session | +| `session.attach` | 3 | Return a PTY attachment handle so the client can connect | +| `events` | 3 | Upgrade connection to a streaming event channel | -**Phase 3 — full multiplexer control:** +**Event stream (Phase 3):** `session-started`, `session-ended`, `all-sessions-ended`, `agent-state-changed {session_id, state}`. -| Method | Description | -|---|---| -| `status` | List all sessions with metadata and agent state | -| `session.create` | Spawn a new agent session inside the container | -| `session.kill` | Terminate a session by ID | -| `session.title` | Read the current title of a session (terminal title seq or process name) | -| `session.attach` | Request a PTY attachment handle so the client can attach to a running session | -| `events` | Upgrade connection to an event stream | +### Agent state model -**Event stream (Phase 3):** `session-started`, `session-ended`, `all-sessions-ended`, `agent-state-changed {session, state}`. +Each session tracks one of four states. These states are inferred from PTY output activity and foreground process state — no agent hooks or configuration required. -**Agent states per session:** `working` (output flowing, foreground process active), `blocked` (waiting for input — detected via output silence + process state), `done` (work finished, operator not yet reviewed), `idle` (reviewed or no work in progress). The two-stage `done` / `idle` split — validated by [Herdr research](/reference/roadmap/herdr-research/) — matters for the autonomous queue and the desktop app's "ready for review" indicator. +| State | Meaning | +|---|---| +| `working` | Output flowing or foreground process actively running | +| `blocked` | Silent for N seconds with a foreground process present — waiting for operator input | +| `done` | Work finished; the operator has not yet reviewed the output | +| `idle` | Reviewed or no work in progress | -Agent state is inferred from PTY output activity and foreground process state inside the container. No agent hooks or configuration changes are required. The heuristic design draws on concepts from the Herdr research (see [Herdr research](/reference/roadmap/herdr-research/)), rewritten from scratch without copying any Herdr source (Herdr is AGPL-3.0; Jackin is Apache-2.0). +The two-stage `done` / `idle` split is important: a `done` slot should not be automatically refilled by the autonomous task queue or cleaned up from the console until the operator has acknowledged the output. This distinction drives the "ready for review" indicator in the desktop app and the dispatch logic in future autonomous queue work. ### Session lifecycle tracking -**Phase 2** (Rust binary, tmux still present): binary watches the tmux server socket at `/tmp/tmux-/default` via `inotify`. Socket deleted → all sessions ended → exit 0. This replaces the 1-second bash polling loop from Phase 1. +**Phase 2** (Rust binary, tmux still present): the binary watches the tmux server socket at `/tmp/tmux-/default` via `inotify`. Socket deleted → all sessions ended → exit 0. This replaces the 1-second bash polling loop from Phase 1. -**Phase 3** (multiplexer): binary owns session lifecycle directly. No tmux. When the last session exits cleanly, the binary exits 0. When SIGTERM arrives, the binary terminates all sessions, then exits. +**Phase 3** (multiplexer phase): the binary owns session lifecycle directly. No tmux. When the last session exits cleanly, the binary exits 0. When `SIGTERM` arrives, all sessions are terminated then the binary exits. ### Why replace tmux -- No structured API. Every tmux interaction from the host requires `docker exec tmux ...` — a subprocess round-trip with string output that must be parsed. -- No agent-state awareness. tmux reports session names and windows; it has no concept of whether the process inside is blocked, working, or done. -- No event stream. The host cannot subscribe to "a session ended" without polling. -- tmux adds size (binary in the image), startup overhead, and a dependency Jackin does not control. +- No structured API — every tmux interaction from the host requires `docker exec tmux ...`, a subprocess round-trip with string output that must be parsed. +- No agent-state awareness — tmux reports session names and windows; it has no concept of whether the process inside is blocked, working, or done. +- No event stream — the host cannot subscribe to "a session ended" without polling. +- tmux adds binary size to the image, startup overhead, and a dependency Jackin does not control. A purpose-built multiplexer in `jackin-container` gives Jackin a clean control plane: structured socket, typed events, agent-state inference, and no external process to coordinate with. @@ -104,17 +113,48 @@ A purpose-built multiplexer in `jackin-container` gives Jackin a clean control p **Phase 2:** add socket mount to `docker run`; update `inspect_agent_sessions` in to connect to socket instead of `docker exec tmux list-sessions`. -**Phase 3:** remove `tmux` from the derived image; update `docker exec tmux new-session` in and to issue `session.create` via the socket; update all other tmux shell-out call sites. +**Phase 3:** remove `tmux` from the derived image (); replace all `docker exec tmux ...` call sites in and with socket API calls. + +## Prior art: Herdr + +[Herdr](https://github.com/ogulcancelik/herdr) (`ogulcancelik/herdr`) is the closest public reference for the multiplexer server concept. It is a single Rust binary, built for exactly the same problem: managing multiple AI coding agents with per-project workspace grouping, four-state status tracking, a Unix socket API, and session persistence across client detach. + +Herdr cannot be embedded in or linked from Jackin: it is **AGPL-3.0**, which conflicts with Jackin's **Apache-2.0** license. Any reimplementation in `jackin-container` must be written from scratch. Ideas, concepts, and algorithms are not protected by copyright; the code that expresses them is. + +The deeper architectural difference: Herdr wraps **bare host processes**. Jackin wraps **Docker containers**. Herdr's status heuristics read foreground process state and PTY output from the agent's own terminal. When the agent runs inside a container and Herdr sees `docker attach` rather than the agent, the heuristics degrade. `jackin-container` runs *inside* the container — it sees the agent's PTY output directly, making the same heuristic approach reliable. + +### Concepts to implement independently + +**Two-stage done state.** Herdr distinguishes `Done` (work finished, not yet reviewed) from `Idle` (reviewed or empty). Jackin's `AgentStatus` model should adopt the same split for the same reasons: the autonomous task queue must not refill a `done` slot until the operator acknowledges, and the desktop app needs a distinct "ready for review" surface. Whether this lands as a `ReadyForReview` enum variant or as `Idle` + a separate acknowledgement flag is a design detail for Phase 3. + +**Workspace-level status roll-up.** Herdr's workspace sidebar shows the most urgent child status at the workspace level — one blocked agent makes the whole workspace show `Blocked`. The urgency order is `blocked > working > done > idle`. The console Instances panel should adopt this roll-up rule once agent-state events from Phase 3 are available. + +**Notification suppression when already looking.** Herdr suppresses sound and toast notifications when the relevant pane is focused. Jackin's attention prompts design should include the equivalent: if the console currently has that workspace row focused, downgrade or skip the OS notification. The precise condition needs design (console selection is coarser than Herdr's pane-level focus). + +**Sound escalation as opt-in.** Herdr ships silent toast by default, with opt-in sound escalation after N seconds. This pattern is validated by Herdr's live usage and should stay in the attention prompts V1 design. + +**Blocking `wait` semantics on the socket.** Herdr's socket API lets callers block until a status transition (`herdr wait agent-status 1-1 --status done`). This is a better interface than polling for automation scripts and for the daemon's event subscription. The `events` stream in Phase 3 should support the same pattern: a subscriber blocks on the stream and receives the event when the transition occurs. + +**PTY inference heuristic.** Herdr detects agent state with zero configuration by reading foreground process state plus output-silence duration. `jackin-container` will use the same approach — with the advantage that it runs inside the container and reads the agent's PTY directly rather than through a `docker attach` wrapper. The specific threshold tuning (how many seconds of silence = `blocked`) is a Phase 3 research task. + +### What not to borrow from Herdr + +- **Herdr as the session substrate.** Herdr manages bare-host PTY processes. `jackin-container` needs a PTY multiplexer that runs inside a container, is statically linked, and has no host-side dependencies. Herdr's internals are the wrong shape for this. +- **Herdr's theme system.** Jackin's TUI uses a fixed palette. Themes are not a priority. +- **Herdr's SSH remote tunneling.** Herdr's transparent PTY forwarding over SSH has a different threat model than the explicit operator-controlled tunneling planned for [jackin-remote](/reference/roadmap/jackin-remote/). +- **Herdr's wire protocol verbatim.** The socket API vocabulary should be designed for Jackin's data model. Herdr is inspiration for the `wait` semantics, not a drop-in spec. ## Implementation plan ### Phase 1 — Cleanup gap closed *(shipped — bash supervisor with socket polling)* -The cleanup gap is fixed. now monitors the tmux server socket (`/tmp/tmux-/default`) with a 1-second polling loop. When the socket disappears (last session ended and all clients disconnected), the supervisor exits 0 → container exits 0 → the host-side cleanup path runs automatically. A 60-second startup grace period prevents the supervisor from exiting before the first `docker exec tmux new-session` creates the socket. +**Current state:** containers still use tmux as the session layer. This is a temporary arrangement: `docker exec tmux new-session` creates sessions, `docker exec tmux attach-session` reconnects, and `docker exec tmux list-sessions` queries what is running. The bash supervisor wraps tmux from the outside and has no understanding of what is happening inside sessions. This is the minimal viable approach — it works, but it gives Jackin no visibility into agent state, no structured event stream, and no way to manage sessions without shelling into the container. + +**What shipped in Phase 1:** now monitors the tmux server socket (`/tmp/tmux-/default`) with a 1-second polling loop. When the socket disappears (last session ended and all clients disconnected), the supervisor exits 0 → container exits 0 → the host-side cleanup path runs automatically. A 60-second startup grace period prevents the supervisor from exiting before the first `docker exec tmux new-session` creates the socket. was updated alongside it: when `docker exec` returns and the container is still `Running`, the code now calls `inspect_agent_sessions` to distinguish a clean agent exit (no sessions — run teardown) from a detached terminal (sessions active — keep the container alive). -The Rust binary with `inotify`-based watching is deferred. The bash polling approach closes the operator-facing gap at low implementation cost; the binary is justified once Phase 2 lands. +**Why this is temporary and why a purpose-built multiplexer makes sense:** researching [Herdr](https://github.com/ogulcancelik/herdr) — a Rust terminal multiplexer purpose-built for AI coding agents — confirmed that the missing piece is a session layer that understands AI agents natively. Herdr tracks four agent states (blocked / working / done / idle) with zero configuration, exposes a Unix socket API for session control, and streams status-change events. It is designed for bare host processes (not containers) and is AGPL-3.0, so it cannot be embedded in Jackin. But the core concept is exactly right: a multiplexer that knows about agents, not just terminals. `jackin-container` will become that multiplexer — running inside the container where it can see the agent's PTY output directly, exposed through a Unix socket that the host and desktop app both drive. The tmux layer goes away in Phase 3. ### Phase 2 — Rust binary skeleton + Unix socket status command *(structured session inventory)* @@ -130,28 +170,30 @@ The Rust binary with `inotify`-based watching is deferred. The bash polling appr ### Phase 3 — In-container multiplexer *(replace tmux)* -- Implement PTY session management in `jackin-container`: spawn sessions, manage PTY lifecycle, reap processes. -- Implement agent state inference from PTY output and foreground process state. +- Implement PTY session management in `jackin-container`: spawn PTY sessions, manage their lifecycle, reap child processes. +- Implement agent state inference from PTY output and foreground process state (working / blocked / done / idle). Design the silence threshold and process-state heuristic. +- Implement the two-stage `done` / `idle` split and the operator-acknowledgement mechanism. - Expand socket API: `session.create`, `session.kill`, `session.title`, `session.attach`, `events`. - Implement event stream: `session-started`, `session-ended`, `all-sessions-ended`, `agent-state-changed`. -- Remove `tmux` from the derived image (`docker/construct/Dockerfile`). +- Implement workspace-level status roll-up (`blocked > working > done > idle`) on the host side, consuming events from the socket. +- Remove `tmux` from the derived image (). - Replace all `docker exec tmux ...` call sites in the host CLI with socket API calls. -- Update console session panel to consume agent-state events rather than polling. - -This phase is the prerequisite for the desktop app: once `jackin-container` manages sessions directly and streams events, the desktop companion can subscribe to the socket and render live agent state without any polling or `docker exec` overhead. +- Update console session panel to consume `agent-state-changed` events rather than polling. +- Update attention prompts to subscribe to `blocked` state events rather than doing PTY polling from the host. -### Phase 4 — Daemon integration and desktop app bridge *(deferred — depends on jackin daemon and desktop app)* +### Phase 4 — Daemon integration and desktop app bridge *(deferred)* -- Add daemon subscription: the jackin daemon connects to each running container's socket and maintains a live session index. -- Desktop app reads from the daemon's aggregated view; each container's `jackin-container` socket is the per-container data source. -- Advanced session commands: session snapshots, resource usage, log streaming. +- Daemon connects to each running container's socket and maintains a live session index across all containers. +- Desktop app reads from the daemon's aggregated view; each `jackin-container` socket is the per-container data source. The multiplexer IS the server; the desktop companion reads what is happening inside the container through it. +- Advanced commands: session snapshots, resource usage per session, log streaming. - See [jackin daemon](/reference/roadmap/jackin-daemon/) and [Jackin Desktop Agent Hub](/reference/roadmap/jackin-desktop-agent-hub/). ## Relationship to other roadmap items -- **[Herdr research](/reference/roadmap/herdr-research/)** — Herdr is the closest prior art for the multiplexer vision (agent-state inference, Unix socket API, session persistence). The two-stage done/idle state, workspace-level roll-up, and PTY inference heuristics from that research inform Phase 3 design. No Herdr source is reused (AGPL-3.0 license conflict). -- **[Console agent session control](/reference/roadmap/console-agent-session-control/)** — Phase 4 of that item (session reconciliation in the console) unblocks once Phase 2 of this item ships: the binary exposes live session state, eliminating manifest-snapshot reconciliation. -- **[Agent runtime status](/reference/roadmap/agent-runtime-status/)** — idle/busy indicators per session require the binary to track agent process state inside the container. Phase 3 events are the natural delivery mechanism. -- **[Agent attention prompts](/reference/roadmap/agent-attention-prompts/)** — `agent-state-changed` events from Phase 3 replace the PTY polling approach planned for that item. -- **[jackin daemon](/reference/roadmap/jackin-daemon/)** — the binary is the per-container endpoint the daemon talks to. Phase 4 of this item and the daemon's container-watch phase are designed together. -- **[Jackin Desktop Agent Hub](/reference/roadmap/jackin-desktop-agent-hub/)** — Phase 3 of this item is the prerequisite: the desktop app's live session view is driven by `jackin-container` socket events, aggregated by the daemon. +- **[Agent Orchestrator Research Program](/reference/roadmap/agent-orchestrator-research/)** — Herdr is evaluated there as the strongest prior art for the multiplexer vision. The full comparative table (Herdr vs. Jackin values) lives in the research overview. +- **[Console agent session control](/reference/roadmap/console-agent-session-control/)** — Phase 4 of that item unblocks once Phase 2 of this item ships: the binary exposes live session state, eliminating manifest-snapshot reconciliation. +- **[Agent runtime status](/reference/roadmap/agent-runtime-status/)** — the `agent-state-changed` event stream from Phase 3 is the delivery mechanism for blocked/working/done/idle indicators in the console and hardline. +- **[Agent attention prompts](/reference/roadmap/agent-attention-prompts/)** — `blocked` events replace PTY polling from the host; `done` events trigger the "ready for review" notification path. +- **[jackin daemon](/reference/roadmap/jackin-daemon/)** — `jackin-container` is the per-container endpoint the daemon subscribes to. Phase 4 of this item and the daemon's container-watch phase are designed together. +- **[Jackin Desktop Agent Hub](/reference/roadmap/jackin-desktop-agent-hub/)** — Phase 3 of this item is the prerequisite: the desktop app's live session view is driven by `jackin-container` socket events aggregated by the daemon. +- **[jackin-remote](/reference/roadmap/jackin-remote/)** — remote containers will need a `jackin-container` socket accessible over the tunnel; the socket mount and auth model need to account for the remote path. From 4b484fe9c739890a5aba116a2ba22a6d92b46907 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Mon, 18 May 2026 18:08:49 +0000 Subject: [PATCH 04/15] style: apply rustfmt to launch.rs Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- src/runtime/launch.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index abc255cd1..4edfdfed3 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -2036,11 +2036,8 @@ fn load_role_with( #[allow(clippy::match_same_arms)] match inspect_container_state(runner, &container_name) { ContainerState::Running => { - let sessions = inspect_agent_sessions( - runner, - &container_name, - &ContainerState::Running, - ); + let sessions = + inspect_agent_sessions(runner, &container_name, &ContainerState::Running); if matches!(&sessions, AgentSessionInventory::Sessions(v) if v.is_empty()) { if !matches!( decision, From 76d8fc19b42715c7f8617e75b0ac218fcbce0255 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Mon, 18 May 2026 18:16:02 +0000 Subject: [PATCH 05/15] refactor(launch): extract run_clean_exit_teardown helper Three match arms (Running+no-sessions, Stopped/0, NotFound) shared the same write_instance_status(CleanExited)+cleanup.run() pattern with an inline Preserved guard. Extract into a named helper to remove duplication and make the shared semantics explicit. Also name the sessions-empty condition (`no_sessions`) for clarity, and drop the now-unnecessary `#[allow(clippy::match_same_arms)]` annotation (clippy does not flag the Stopped/0 and NotFound arms as same since their patterns are semantically distinct). Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- src/runtime/launch.rs | 68 +++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index 4edfdfed3..276cd0c1a 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -2033,49 +2033,37 @@ fn load_role_with( // - Stopped / 0 → user exited cleanly inside Claude Code. Tear down. // - Stopped / ≠0 or OOM-killed → crash. Preserve so `jackin hardline` // can restart the existing container + DinD sidecar. - #[allow(clippy::match_same_arms)] match inspect_container_state(runner, &container_name) { ContainerState::Running => { let sessions = inspect_agent_sessions(runner, &container_name, &ContainerState::Running); - if matches!(&sessions, AgentSessionInventory::Sessions(v) if v.is_empty()) { - if !matches!( + let no_sessions = + matches!(&sessions, AgentSessionInventory::Sessions(v) if v.is_empty()); + if no_sessions { + run_clean_exit_teardown( + paths, + &container_state, + &mut instance_manifest, decision, - crate::isolation::finalize::FinalizeDecision::Preserved - ) { - write_instance_status( - paths, - &container_state, - &mut instance_manifest, - InstanceStatus::CleanExited, - )?; - } - cleanup.run(runner); + &cleanup, + runner, + )?; } else { cleanup.disarm(); } } - ContainerState::Stopped { - exit_code: 0, - oom_killed: false, - } if matches!( - decision, - crate::isolation::finalize::FinalizeDecision::Preserved - ) => - { - cleanup.run(runner); - } ContainerState::Stopped { exit_code: 0, oom_killed: false, } => { - write_instance_status( + run_clean_exit_teardown( paths, &container_state, &mut instance_manifest, - InstanceStatus::CleanExited, + decision, + &cleanup, + runner, )?; - cleanup.run(runner); } ContainerState::Stopped { .. } => { write_instance_status( @@ -2102,13 +2090,14 @@ fn load_role_with( crate::isolation::finalize::FinalizeDecision::Preserved ) => {} ContainerState::NotFound => { - write_instance_status( + run_clean_exit_teardown( paths, &container_state, &mut instance_manifest, - InstanceStatus::CleanExited, + decision, + &cleanup, + runner, )?; - cleanup.run(runner); } } @@ -2501,6 +2490,27 @@ fn matching_instance_manifests( ) } +/// Write `CleanExited` status and run container teardown, unless `decision` +/// is `Preserved` — in that case isolation already handled the worktree and +/// status tracking, so only teardown runs. +fn run_clean_exit_teardown( + paths: &JackinPaths, + state_dir: &std::path::Path, + manifest: &mut InstanceManifest, + decision: crate::isolation::finalize::FinalizeDecision, + cleanup: &LoadCleanup, + runner: &mut impl CommandRunner, +) -> anyhow::Result<()> { + if !matches!( + decision, + crate::isolation::finalize::FinalizeDecision::Preserved + ) { + write_instance_status(paths, state_dir, manifest, InstanceStatus::CleanExited)?; + } + cleanup.run(runner); + Ok(()) +} + fn write_instance_status( paths: &JackinPaths, state_dir: &std::path::Path, From 0518a806b40ad17a21745e9e3d7804da1e2a875d Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Mon, 18 May 2026 18:21:49 +0000 Subject: [PATCH 06/15] fix(launch): preserve Running+Preserved containers; log Unavailable; improve comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three review findings addressed: - Running+no-sessions+Preserved: guard with !is_preserved so container and DinD stay alive for `jackin hardline` reconnect. Previously run_clean_exit_teardown fired unconditionally, removing the container before the operator could address preserved isolation worktrees. - AgentSessionInventory::Unavailable: emit debug_log with the reason string instead of silently treating it as sessions-present. - supervisor.sh exit-1: write a diagnostic to stderr naming the missing socket path before exiting so diagnose_premature_exit log capture includes supervisor context. Also fix comment accuracy: "once Phase 2 justifies overhead" → "will be removed in Phase 2"; remove ≤1 s polling-specific bound from match classifier comment; add NotFound/InspectUnavailable arms to the comment; tighten run_clean_exit_teardown doc comment. Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- docker/runtime/supervisor.sh | 5 +++-- src/runtime/launch.rs | 31 ++++++++++++++++++++++++------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/docker/runtime/supervisor.sh b/docker/runtime/supervisor.sh index ee01c8d7c..d1921c046 100644 --- a/docker/runtime/supervisor.sh +++ b/docker/runtime/supervisor.sh @@ -11,8 +11,8 @@ # clients have disconnected. Watching the socket file is reliable and # requires no tmux hooks or configuration. # -# Will be replaced by the `jackin-container` Rust binary once Phase 2 -# (Unix socket status interface) justifies the build/distribution overhead. +# Will be removed in Phase 2 when the `jackin-container` Rust binary takes +# over as PID 1 with inotify-based socket watching. # See reference/roadmap/jackin-container-binary for the full plan. # # No `set -e`: signal-killed `wait` exits non-zero; `set -e` would misread @@ -39,6 +39,7 @@ done # diagnose_premature_exit surfaces the container logs rather than returning # a cryptic "container is not running" error. if [ ! -S "$TMUX_SOCKET" ]; then + echo "supervisor: no tmux socket at ${TMUX_SOCKET} after 60 s; is tmux installed and starting correctly?" >&2 exit 1 fi diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index 276cd0c1a..a5b7e4591 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -2027,19 +2027,36 @@ fn load_role_with( // tear the container down or preserve it for `jackin hardline` to // restart: // - Running + active sessions → terminal closed (user detached). Keep it. - // - Running + no sessions → agent exited; supervisor will stop the - // container within ≤1 s but inspect raced - // ahead. Treat the same as Stopped/0. + // - Running + no sessions + !Preserved → agent exited; supervisor will + // stop the container within its polling + // interval but inspect raced ahead. Tear + // down same as Stopped/0. + // - Running + no sessions + Preserved → agent exited with dirty + // worktrees. Keep container + DinD alive + // so `jackin hardline` can reconnect. // - Stopped / 0 → user exited cleanly inside Claude Code. Tear down. // - Stopped / ≠0 or OOM-killed → crash. Preserve so `jackin hardline` // can restart the existing container + DinD sidecar. + // - NotFound → container was removed externally. Treat as Stopped/0. + // - InspectUnavailable → Docker unreachable; keep everything alive. + let is_preserved = matches!( + decision, + crate::isolation::finalize::FinalizeDecision::Preserved + ); match inspect_container_state(runner, &container_name) { ContainerState::Running => { let sessions = inspect_agent_sessions(runner, &container_name, &ContainerState::Running); + if let AgentSessionInventory::Unavailable(ref reason) = sessions { + crate::debug_log!( + "instance", + "inspect_agent_sessions unavailable for {container_name}: {reason}; \ + treating as sessions-present (container preserved)", + ); + } let no_sessions = matches!(&sessions, AgentSessionInventory::Sessions(v) if v.is_empty()); - if no_sessions { + if no_sessions && !is_preserved { run_clean_exit_teardown( paths, &container_state, @@ -2490,9 +2507,9 @@ fn matching_instance_manifests( ) } -/// Write `CleanExited` status and run container teardown, unless `decision` -/// is `Preserved` — in that case isolation already handled the worktree and -/// status tracking, so only teardown runs. +/// Write `CleanExited` status and run container teardown. When `decision` is +/// `Preserved`, the status write is skipped (isolation wrote it earlier) but +/// teardown still runs. fn run_clean_exit_teardown( paths: &JackinPaths, state_dir: &std::path::Path, From db4abdf7f8e4bbcc13059402194cbfea67d44f7b Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Mon, 18 May 2026 18:24:17 +0000 Subject: [PATCH 07/15] refactor(launch): pass is_preserved bool to run_clean_exit_teardown The helper was taking FinalizeDecision and re-deriving the Preserved check internally, duplicating the matches! expression already computed as `is_preserved` at the call site. Passing the bool removes the redundancy and removes run_clean_exit_teardown's dependency on FinalizeDecision. Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- src/runtime/launch.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index a5b7e4591..7e99ee605 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -2061,7 +2061,7 @@ fn load_role_with( paths, &container_state, &mut instance_manifest, - decision, + is_preserved, &cleanup, runner, )?; @@ -2077,7 +2077,7 @@ fn load_role_with( paths, &container_state, &mut instance_manifest, - decision, + is_preserved, &cleanup, runner, )?; @@ -2111,7 +2111,7 @@ fn load_role_with( paths, &container_state, &mut instance_manifest, - decision, + is_preserved, &cleanup, runner, )?; @@ -2507,21 +2507,18 @@ fn matching_instance_manifests( ) } -/// Write `CleanExited` status and run container teardown. When `decision` is -/// `Preserved`, the status write is skipped (isolation wrote it earlier) but -/// teardown still runs. +/// Write `CleanExited` status and run container teardown. When `preserved` is +/// true, the status write is skipped (isolation wrote it earlier) but teardown +/// still runs. fn run_clean_exit_teardown( paths: &JackinPaths, state_dir: &std::path::Path, manifest: &mut InstanceManifest, - decision: crate::isolation::finalize::FinalizeDecision, + preserved: bool, cleanup: &LoadCleanup, runner: &mut impl CommandRunner, ) -> anyhow::Result<()> { - if !matches!( - decision, - crate::isolation::finalize::FinalizeDecision::Preserved - ) { + if !preserved { write_instance_status(paths, state_dir, manifest, InstanceStatus::CleanExited)?; } cleanup.run(runner); From 650035fbdbfd0e6724ddf294cb3e1b22e34b3c39 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Mon, 18 May 2026 18:28:33 +0000 Subject: [PATCH 08/15] fix(launch): log NotFound+Preserved anomaly; fix comment inaccuracy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The NotFound+Preserved guard arm was silently empty — a container removed externally while finalization was still running left a stale preserved-status record on disk with zero diagnostic trace. Add a debug_log so --debug output captures the anomaly. Also fix the match classifier comment: "NotFound → Treat as Stopped/0" was wrong for the Preserved case. Split into two bullets making the distinct behaviors explicit. Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- src/runtime/launch.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index 7e99ee605..cba9f1c61 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -2037,7 +2037,9 @@ fn load_role_with( // - Stopped / 0 → user exited cleanly inside Claude Code. Tear down. // - Stopped / ≠0 or OOM-killed → crash. Preserve so `jackin hardline` // can restart the existing container + DinD sidecar. - // - NotFound → container was removed externally. Treat as Stopped/0. + // - NotFound + !Preserved → removed externally. Treat as Stopped/0. + // - NotFound + Preserved → removed externally while preserved status + // stands on disk. Nothing to clean up; skip. // - InspectUnavailable → Docker unreachable; keep everything alive. let is_preserved = matches!( decision, @@ -2105,7 +2107,14 @@ fn load_role_with( if matches!( decision, crate::isolation::finalize::FinalizeDecision::Preserved - ) => {} + ) => + { + crate::debug_log!( + "instance", + "container {container_name} not found after session with Preserved decision; \ + removed externally during finalization — preserved status on disk stands", + ); + } ContainerState::NotFound => { run_clean_exit_teardown( paths, From 952eebdcc34f02cc97062687ea59823ffbd2379f Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Wed, 20 May 2026 00:51:10 +0700 Subject: [PATCH 09/15] fix(supervisor): detect stale tmux socket via list-sessions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `[ -S socket ]` only checks if the socket file exists. When tmux crashes or is killed without cleanup, it leaves a stale socket on disk — the file-existence check returns true forever and the supervisor never exits. Replace the monitoring loop condition with `tmux list-sessions &>/dev/null` which probes the server itself: if the socket is stale or the server has exited, the command fails and the supervisor exits 0, triggering host-side cleanup. The grace-period check (waiting for the socket file to appear) keeps the plain file test — at that stage the server hasn't started yet so list-sessions would always fail. Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- docker/runtime/supervisor.sh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docker/runtime/supervisor.sh b/docker/runtime/supervisor.sh index d1921c046..a8ab205be 100644 --- a/docker/runtime/supervisor.sh +++ b/docker/runtime/supervisor.sh @@ -7,9 +7,10 @@ # surface the container logs. # # The tmux server creates its socket at /tmp/tmux-/default when the -# first session starts and removes it when the last session ends and all -# clients have disconnected. Watching the socket file is reliable and -# requires no tmux hooks or configuration. +# first session starts. The grace period watches for the socket file to +# appear; the monitor loop polls via `tmux list-sessions` rather than a +# plain file-existence check so a stale socket (tmux crashed without +# cleanup) doesn't keep the supervisor alive indefinitely. # # Will be removed in Phase 2 when the `jackin-container` Rust binary takes # over as PID 1 with inotify-based socket watching. @@ -43,9 +44,11 @@ if [ ! -S "$TMUX_SOCKET" ]; then exit 1 fi -# Wait for the last session to end. The tmux server removes the socket -# immediately after the last session closes and all clients disconnect. -while [ -S "$TMUX_SOCKET" ]; do +# Wait for the last session to end. Poll via `tmux list-sessions` rather +# than a plain socket-file existence check: if tmux crashes or is killed +# without removing the socket, a stale file would keep the supervisor +# alive indefinitely. +while tmux list-sessions &>/dev/null; do sleep 1 & wait $! || true done From d1adde04798cc02df077f00cce101be2d41b6f75 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Wed, 20 May 2026 01:21:19 +0700 Subject: [PATCH 10/15] fix(finalize): check sessions before preserving still-running container MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the docker exec returns but the container is still Running, the old code immediately returned Preserved, treating it the same as a detach. This was incorrect for the common case where the agent exited cleanly: the container stays Running for up to 1 s while the supervisor's poll loop detects the tmux server exit and stops the container. The symptom: DinD sidecars and networks left running after the agent exits cleanly because cleanup was disarmed (is_preserved=true). Fix: when exit_code=None and not OOM-killed, probe the container for live tmux sessions before deciding: - sessions present → real detach (Ctrl-B D), preserve as before - no sessions → supervisor lag after clean agent exit, fall through to finalize_clean_exit so isolation worktrees are swept and the container/DinD/network are torn down Add has_tmux_sessions() helper that runs the same docker exec command as inspect_agent_sessions but returns a plain bool. Returns false on exec failure (container stopping) so the clean-exit path fires rather than silently preserving a terminating container. Update tests: - still_running_preserves_records → split into two tests: one with a session name in the queue (real detach → Preserved) and one with an empty response (supervisor lag → Cleaned) - load_agent_writes_instance_manifest: expected status changes from restore_available to clean_exited, reflecting the correct post-exit teardown when no sessions remain Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- src/isolation/finalize.rs | 65 +++++++++++++++++++++++++++++++++++++-- src/runtime/launch.rs | 6 +++- 2 files changed, 68 insertions(+), 3 deletions(-) diff --git a/src/isolation/finalize.rs b/src/isolation/finalize.rs index d6e7f5a2f..6fbe1c808 100644 --- a/src/isolation/finalize.rs +++ b/src/isolation/finalize.rs @@ -121,6 +121,27 @@ pub fn finalize_foreground_session( i = is_interactive, ); if outcome.exit_code.is_none() || outcome.oom_killed || outcome.exit_code != Some(0) { + // Still-running container (exit_code=None, not OOM): the docker exec + // returned but the supervisor's 1-second poll loop may not have caught the + // tmux server exit yet. Check sessions to distinguish detach from lag: + // - sessions present → real detach (Ctrl-B D); preserve as before + // - no sessions → supervisor lag after clean agent exit; fall through to + // finalize_clean_exit so isolation worktrees are swept normally + if outcome.exit_code.is_none() && !outcome.oom_killed && !has_tmux_sessions(runner, container_name) { + debug_log!( + "isolation", + "finalize: container={c} still running but no tmux sessions; \ + supervisor lag after clean exit — proceeding to isolation cleanup", + c = container_name, + ); + return finalize_clean_exit( + container_name, + container_state_dir, + is_interactive, + prompt, + runner, + ); + } debug_log!( "isolation", "finalize: container={c} preserved (non-clean exit)", @@ -137,6 +158,26 @@ pub fn finalize_foreground_session( ) } +fn has_tmux_sessions(runner: &mut impl CommandRunner, container_name: &str) -> bool { + // Run via sh to suppress the "no server running" error tmux emits when the + // socket is stale. Both "no server" and "no sessions" collapse to exit 0 with + // empty stdout so the caller only sees a non-empty result when sessions exist. + match runner.capture( + "docker", + &[ + "exec", + container_name, + "sh", + "-c", + "tmux list-sessions -F '#{session_name}' 2>/dev/null || true", + ], + None, + ) { + Ok(output) => !output.trim().is_empty(), + Err(_) => false, + } +} + fn finalize_clean_exit( container_name: &str, container_state_dir: &Path, @@ -547,10 +588,11 @@ mod tests { use crate::runtime::test_support::FakeRunner; #[test] - fn still_running_preserves_records() { + fn still_running_with_sessions_preserves() { + // Session list non-empty → real detach → Preserved. let dir = TempDir::new().unwrap(); let mut p = NoPrompt; - let mut r = FakeRunner::default(); + let mut r = fake_with_outputs(&["jackin-claude-abc"]); let dec = finalize_foreground_session( "jackin-x", dir.path(), @@ -563,6 +605,25 @@ mod tests { assert_eq!(dec, FinalizeDecision::Preserved); } + #[test] + fn still_running_no_sessions_proceeds_to_clean_exit() { + // Empty session list → supervisor lag after clean agent exit → + // finalize_clean_exit → Cleaned (no isolation records to preserve). + let dir = TempDir::new().unwrap(); + let mut p = NoPrompt; + let mut r = FakeRunner::default(); + let dec = finalize_foreground_session( + "jackin-x", + dir.path(), + AttachOutcome::still_running(), + false, + &mut p, + &mut r, + ) + .unwrap(); + assert_eq!(dec, FinalizeDecision::Cleaned); + } + #[test] fn stopped_non_zero_preserves_records() { let dir = TempDir::new().unwrap(); diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index cba9f1c61..7f873315a 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -6248,7 +6248,11 @@ plugins = [] assert!(body.contains(r#""role_key": "agent-smith""#)); assert!(body.contains(r#""agent_runtime": "claude""#)); assert!(body.contains(r#""host_workdir_fingerprint": "sha256:"#)); - assert!(body.contains(r#""status": "restore_available""#)); + // With no tmux sessions after docker exec returns, the new + // finalize path treats it as a clean exit (supervisor lag) rather + // than a detach — so the final status is clean_exited, not + // restore_available. + assert!(body.contains(r#""status": "clean_exited""#)); let index_body = std::fs::read_to_string(paths.data_dir.join("instances.json")).unwrap(); assert!(index_body.contains(&format!(r#""container_base": "{container_name}""#))); } From 51530daca5f48b035941b730b98c35d0d9fce5b3 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Wed, 20 May 2026 01:30:14 +0700 Subject: [PATCH 11/15] fix(launch): always tear down DinD when container exits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously DinD was kept alive in two cases to support jackin hardline: - Stopped with non-zero exit (crash) - Running + no sessions + Preserved (dirty worktrees) Neither case is necessary: isolation worktrees live on the host filesystem and are accessible without DinD. Keeping DinD alive only created orphaned sidecars when the operator exited the agent. New rule: tear down DinD/network whenever the container is no longer running with active sessions. The only case that keeps DinD alive is Running + sessions present (real detach via Ctrl-B D — operator can reconnect via jackin hardline). Changes: - Running + no sessions: drop the `&& !is_preserved` guard so teardown fires regardless of preserved isolation state - Stopped (non-zero / OOM): replace cleanup.disarm() with cleanup.run() - NotFound + Preserved: add cleanup.run() after the anomaly log Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- src/runtime/launch.rs | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index 7f873315a..b37ae488d 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -2023,23 +2023,20 @@ fn load_role_with( )?; } - // Classify how the interactive session ended so we know whether to - // tear the container down or preserve it for `jackin hardline` to - // restart: - // - Running + active sessions → terminal closed (user detached). Keep it. - // - Running + no sessions + !Preserved → agent exited; supervisor will - // stop the container within its polling - // interval but inspect raced ahead. Tear - // down same as Stopped/0. - // - Running + no sessions + Preserved → agent exited with dirty - // worktrees. Keep container + DinD alive - // so `jackin hardline` can reconnect. - // - Stopped / 0 → user exited cleanly inside Claude Code. Tear down. - // - Stopped / ≠0 or OOM-killed → crash. Preserve so `jackin hardline` - // can restart the existing container + DinD sidecar. - // - NotFound + !Preserved → removed externally. Treat as Stopped/0. - // - NotFound + Preserved → removed externally while preserved status - // stands on disk. Nothing to clean up; skip. + // Classify how the interactive session ended and tear down DinD/network + // unless the container is still running with active sessions (detach): + // - Running + active sessions → user detached (Ctrl-B D). Keep DinD so + // `jackin hardline` can reconnect. + // - Running + no sessions → agent exited; supervisor lag or stale socket. + // Tear down same as Stopped/0 regardless of + // preserved isolation state — worktrees live on + // the host and are accessible without DinD. + // - Stopped / 0 → user exited cleanly. Tear down. + // - Stopped / ≠0 or OOM-killed → crash. Tear down; DinD is no longer + // needed once the container has exited. + // - NotFound + !Preserved → removed externally. Tear down. + // - NotFound + Preserved → removed externally during finalization. + // Tear down DinD/network; status on disk stands. // - InspectUnavailable → Docker unreachable; keep everything alive. let is_preserved = matches!( decision, @@ -2058,7 +2055,7 @@ fn load_role_with( } let no_sessions = matches!(&sessions, AgentSessionInventory::Sessions(v) if v.is_empty()); - if no_sessions && !is_preserved { + if no_sessions { run_clean_exit_teardown( paths, &container_state, @@ -2091,7 +2088,7 @@ fn load_role_with( &mut instance_manifest, InstanceStatus::Crashed, )?; - cleanup.disarm(); + cleanup.run(runner); } ContainerState::InspectUnavailable(reason) => { cleanup.disarm(); @@ -2112,8 +2109,10 @@ fn load_role_with( crate::debug_log!( "instance", "container {container_name} not found after session with Preserved decision; \ - removed externally during finalization — preserved status on disk stands", + removed externally during finalization — tearing down DinD/network, \ + preserved status on disk stands", ); + cleanup.run(runner); } ContainerState::NotFound => { run_clean_exit_teardown( From db39c7ea9e0864ff781693aab7f3206b033fd451 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Wed, 20 May 2026 01:36:10 +0700 Subject: [PATCH 12/15] refactor(launch): skip redundant session re-query on clean exit When finalize_foreground_session returns Cleaned (!is_preserved), it already confirmed no tmux sessions exist via has_tmux_sessions. If inspect_container_state then shows Running (supervisor lag), calling inspect_agent_sessions immediately repeats the identical docker exec against a container where sessions are already known to be absent. Short-circuit: when !is_preserved in the Running branch, run teardown directly. Only re-query when is_preserved (finalize saw sessions at check-time and needs to detect sessions that may have ended in the interval before this inspect fired). Also reword the Unavailable debug message from "treating as sessions-present" to "treating conservatively as sessions-present" to clarify that the outcome is a fallback, not a positive detection. Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- src/runtime/launch.rs | 45 +++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index b37ae488d..681afbfab 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -2044,18 +2044,9 @@ fn load_role_with( ); match inspect_container_state(runner, &container_name) { ContainerState::Running => { - let sessions = - inspect_agent_sessions(runner, &container_name, &ContainerState::Running); - if let AgentSessionInventory::Unavailable(ref reason) = sessions { - crate::debug_log!( - "instance", - "inspect_agent_sessions unavailable for {container_name}: {reason}; \ - treating as sessions-present (container preserved)", - ); - } - let no_sessions = - matches!(&sessions, AgentSessionInventory::Sessions(v) if v.is_empty()); - if no_sessions { + if !is_preserved { + // Finalize already confirmed no sessions (supervisor lag after + // clean exit). Skip the redundant re-query and tear down. run_clean_exit_teardown( paths, &container_state, @@ -2065,7 +2056,31 @@ fn load_role_with( runner, )?; } else { - cleanup.disarm(); + // Finalize saw sessions at check-time (detach). Re-check: sessions + // may have ended in the interval between finalize and this inspect. + let sessions = + inspect_agent_sessions(runner, &container_name, &ContainerState::Running); + if let AgentSessionInventory::Unavailable(ref reason) = sessions { + crate::debug_log!( + "instance", + "inspect_agent_sessions unavailable for {container_name}: {reason}; \ + treating conservatively as sessions-present (container preserved)", + ); + } + let no_sessions = + matches!(&sessions, AgentSessionInventory::Sessions(v) if v.is_empty()); + if no_sessions { + run_clean_exit_teardown( + paths, + &container_state, + &mut instance_manifest, + is_preserved, + &cleanup, + runner, + )?; + } else { + cleanup.disarm(); + } } } ContainerState::Stopped { @@ -6247,10 +6262,6 @@ plugins = [] assert!(body.contains(r#""role_key": "agent-smith""#)); assert!(body.contains(r#""agent_runtime": "claude""#)); assert!(body.contains(r#""host_workdir_fingerprint": "sha256:"#)); - // With no tmux sessions after docker exec returns, the new - // finalize path treats it as a clean exit (supervisor lag) rather - // than a detach — so the final status is clean_exited, not - // restore_available. assert!(body.contains(r#""status": "clean_exited""#)); let index_body = std::fs::read_to_string(paths.data_dir.join("instances.json")).unwrap(); assert!(index_body.contains(&format!(r#""container_base": "{container_name}""#))); From b6b92e5b40d679eb2d7faf703f4329df5242b314 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Wed, 20 May 2026 01:50:13 +0700 Subject: [PATCH 13/15] style: apply rustfmt to finalize.rs Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- src/isolation/finalize.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/isolation/finalize.rs b/src/isolation/finalize.rs index 6fbe1c808..23b146770 100644 --- a/src/isolation/finalize.rs +++ b/src/isolation/finalize.rs @@ -127,7 +127,10 @@ pub fn finalize_foreground_session( // - sessions present → real detach (Ctrl-B D); preserve as before // - no sessions → supervisor lag after clean agent exit; fall through to // finalize_clean_exit so isolation worktrees are swept normally - if outcome.exit_code.is_none() && !outcome.oom_killed && !has_tmux_sessions(runner, container_name) { + if outcome.exit_code.is_none() + && !outcome.oom_killed + && !has_tmux_sessions(runner, container_name) + { debug_log!( "isolation", "finalize: container={c} still running but no tmux sessions; \ From 77c4cd56359bc407ee3a375f43a8df4c6fa55541 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Wed, 20 May 2026 01:53:32 +0700 Subject: [PATCH 14/15] docs(roadmap): update Phase 1 description to match shipped fixes The Phase 1 "What shipped" section described socket-file-disappear monitoring, but the supervisor now polls tmux list-sessions instead (more robust to stale sockets). Also missing were the two companion Rust fixes that make teardown actually fire: - finalize.rs: has_tmux_sessions check to distinguish supervisor lag from a real detach - launch.rs: always tear down DinD on exit; skip redundant session re-query on the clean-exit path Update the Problem section's first bullet to note it is resolved in Phase 1. Update Phase 1 status and heading to reflect the full scope. Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- .../docs/reference/roadmap/jackin-container-binary.mdx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx b/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx index 0a5582990..75a01cfb2 100644 --- a/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx +++ b/docs/src/content/docs/reference/roadmap/jackin-container-binary.mdx @@ -3,13 +3,13 @@ title: "jackin-container: In-Container Multiplexer Server" --- import RepoFile from '../../../../components/RepoFile.astro' -**Status**: Partially implemented — Phase 1 cleanup gap closed via improved bash supervisor; Rust binary, multiplexer server, and desktop bridge remain +**Status**: Partially implemented — Phase 1 cleanup gap closed; container and DinD tear down automatically on clean agent exit. Rust binary, multiplexer server, and desktop bridge remain. ## Problem The current container supervisor is a bash wait loop () that keeps the container alive while agent sessions run via `docker exec`. It has two immediate limitations and one deeper architectural gap. -**Last-session cleanup does not fire.** When a tmux session exits, the supervisor keeps the container running. Because the container state is `Running` rather than `Stopped`, the host jackin cleanup path (`finalize_foreground_session`, container teardown, DinD/network/certs removal) never fires automatically. The operator must explicitly run `jackin eject` to clean up a container after all sessions end. +**Last-session cleanup does not fire** *(resolved in Phase 1).* When a tmux session exits, the supervisor was keeping the container running. Because the container state is `Running` rather than `Stopped`, the host jackin cleanup path (`finalize_foreground_session`, container teardown, DinD/network/certs removal) would not fire automatically. The operator had to explicitly run `jackin eject` to clean up a container after all sessions end. **Session inventory requires shelling out.** Querying which sessions are active requires `docker exec sh -c 'tmux list-sessions ...'` from the host. There is no structured interface for the host CLI or the future jackin daemon to ask "what is running in this container right now?" @@ -146,13 +146,13 @@ The deeper architectural difference: Herdr wraps **bare host processes**. Jackin ## Implementation plan -### Phase 1 — Cleanup gap closed *(shipped — bash supervisor with socket polling)* +### Phase 1 — Cleanup gap closed *(shipped — bash supervisor + host-side teardown)* **Current state:** containers still use tmux as the session layer. This is a temporary arrangement: `docker exec tmux new-session` creates sessions, `docker exec tmux attach-session` reconnects, and `docker exec tmux list-sessions` queries what is running. The bash supervisor wraps tmux from the outside and has no understanding of what is happening inside sessions. This is the minimal viable approach — it works, but it gives Jackin no visibility into agent state, no structured event stream, and no way to manage sessions without shelling into the container. -**What shipped in Phase 1:** now monitors the tmux server socket (`/tmp/tmux-/default`) with a 1-second polling loop. When the socket disappears (last session ended and all clients disconnected), the supervisor exits 0 → container exits 0 → the host-side cleanup path runs automatically. A 60-second startup grace period prevents the supervisor from exiting before the first `docker exec tmux new-session` creates the socket. +**What shipped in Phase 1:** now polls `tmux list-sessions` every second rather than watching the socket file disappear. Using `tmux list-sessions` is robust to stale socket files left behind when tmux crashes or is killed without cleanup — a plain `[ -S socket ]` file-existence check would loop forever on a dead socket. When `tmux list-sessions` returns non-zero (server gone or no sessions), the supervisor exits 0. A 60-second startup grace period waits for the socket file to appear before entering the monitor loop, preventing a premature exit before the first `docker exec tmux new-session` creates it. - was updated alongside it: when `docker exec` returns and the container is still `Running`, the code now calls `inspect_agent_sessions` to distinguish a clean agent exit (no sessions — run teardown) from a detached terminal (sessions active — keep the container alive). +Two companion fixes in the host-side Rust code were required to make the cleanup path actually fire. (`finalize_foreground_session`) now calls `has_tmux_sessions` when the container is still `Running` after `docker exec` returns: an empty session list means supervisor lag, not a detach, so the code falls through to `finalize_clean_exit` and sweeps isolation worktrees normally instead of returning `Preserved`. now tears down the DinD sidecar and Docker network in every branch where the container has exited — including crashes (non-zero exit) — rather than preserving them for a `jackin hardline` reconnect that cannot work without a live DinD. When `docker exec` returns and the container is still `Running` with an empty session list, teardown fires immediately without a redundant re-query (the session check in `finalize_foreground_session` already confirmed no sessions are present). Only a genuinely live detach (container still `Running` AND sessions confirmed present) keeps the DinD alive for reconnect. **Why this is temporary and why a purpose-built multiplexer makes sense:** researching [Herdr](https://github.com/ogulcancelik/herdr) — a Rust terminal multiplexer purpose-built for AI coding agents — confirmed that the missing piece is a session layer that understands AI agents natively. Herdr tracks four agent states (blocked / working / done / idle) with zero configuration, exposes a Unix socket API for session control, and streams status-change events. It is designed for bare host processes (not containers) and is AGPL-3.0, so it cannot be embedded in Jackin. But the core concept is exactly right: a multiplexer that knows about agents, not just terminals. `jackin-container` will become that multiplexer — running inside the container where it can see the agent's PTY output directly, exposed through a Unix socket that the host and desktop app both drive. The tmux layer goes away in Phase 3. From c2634ef619a10def066e74ffd2389cd7f57fb8e5 Mon Sep 17 00:00:00 2001 From: Alexey Zhokhov Date: Wed, 20 May 2026 01:57:22 +0700 Subject: [PATCH 15/15] fix(lint): address clippy warnings in finalize.rs and launch.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - has_tmux_sessions: map_or(false, ...) → is_ok_and (clippy::unnecessary-map-or) - Running branch: if !is_preserved { A } else { B } → if is_preserved { B } else { A } (clippy::if-not-else) Co-authored-by: Claude Signed-off-by: Alexey Zhokhov --- src/isolation/finalize.rs | 27 +++++++++++++-------------- src/runtime/launch.rs | 24 ++++++++++++------------ 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/src/isolation/finalize.rs b/src/isolation/finalize.rs index 23b146770..3bfb3d7d3 100644 --- a/src/isolation/finalize.rs +++ b/src/isolation/finalize.rs @@ -165,20 +165,19 @@ fn has_tmux_sessions(runner: &mut impl CommandRunner, container_name: &str) -> b // Run via sh to suppress the "no server running" error tmux emits when the // socket is stale. Both "no server" and "no sessions" collapse to exit 0 with // empty stdout so the caller only sees a non-empty result when sessions exist. - match runner.capture( - "docker", - &[ - "exec", - container_name, - "sh", - "-c", - "tmux list-sessions -F '#{session_name}' 2>/dev/null || true", - ], - None, - ) { - Ok(output) => !output.trim().is_empty(), - Err(_) => false, - } + runner + .capture( + "docker", + &[ + "exec", + container_name, + "sh", + "-c", + "tmux list-sessions -F '#{session_name}' 2>/dev/null || true", + ], + None, + ) + .is_ok_and(|output| !output.trim().is_empty()) } fn finalize_clean_exit( diff --git a/src/runtime/launch.rs b/src/runtime/launch.rs index 681afbfab..9dab0e859 100644 --- a/src/runtime/launch.rs +++ b/src/runtime/launch.rs @@ -2044,18 +2044,7 @@ fn load_role_with( ); match inspect_container_state(runner, &container_name) { ContainerState::Running => { - if !is_preserved { - // Finalize already confirmed no sessions (supervisor lag after - // clean exit). Skip the redundant re-query and tear down. - run_clean_exit_teardown( - paths, - &container_state, - &mut instance_manifest, - is_preserved, - &cleanup, - runner, - )?; - } else { + if is_preserved { // Finalize saw sessions at check-time (detach). Re-check: sessions // may have ended in the interval between finalize and this inspect. let sessions = @@ -2081,6 +2070,17 @@ fn load_role_with( } else { cleanup.disarm(); } + } else { + // Finalize already confirmed no sessions (supervisor lag after + // clean exit). Skip the redundant re-query and tear down. + run_clean_exit_teardown( + paths, + &container_state, + &mut instance_manifest, + is_preserved, + &cleanup, + runner, + )?; } } ContainerState::Stopped {