refactor: move _drain_readers to finally block in blocking capture path

Kasper Junge · Ralphify · Kasper Junge · commit fe39186c23ae · 2026-04-05T00:14:27.000+02:00
The blocking capture path in _run_agent_blocking had _drain_readers
duplicated in the try body and except KeyboardInterrupt block, but
missing from the finally block. An unexpected exception between thread
start and drain could leak reader threads. This consolidates the drain
into finally, matching the pattern already used by _run_agent_streaming.

Co-authored-by: Ralphify &lt;noreply@ralphify.co&gt;
diff --git a/src/ralphify/_agent.py b/src/ralphify/_agent.py
@@ -502,13 +502,12 @@ def _run_agent_blocking(
         except subprocess.TimeoutExpired:
             _ensure_process_dead(proc)
             timed_out = True
-        _drain_readers(stdout_thread, stderr_thread)
     except KeyboardInterrupt:
         _ensure_process_dead(proc)
-        _drain_readers(stdout_thread, stderr_thread)
         raise
     finally:
         _ensure_process_dead(proc)
+        _drain_readers(stdout_thread, stderr_thread)
 
     stdout = "".join(stdout_lines) if stdout_lines is not None else None
     stderr = "".join(stderr_lines) if stderr_lines is not None else None
diff --git a/src/ralphify/_console_emitter.py b/src/ralphify/_console_emitter.py
@@ -6,6 +6,8 @@
 
 from __future__ import annotations
 
+import sys
+import threading
 import time
 from collections.abc import Callable
 from functools import partial
@@ -20,6 +22,7 @@
 from ralphify._events import (
     LOG_ERROR,
     STOP_COMPLETED,
+    AgentOutputLineData,
     CommandsCompletedData,
     Event,
     EventType,
@@ -99,12 +102,35 @@ def __rich_console__(
         yield text
 
 
+def _interactive_default_peek(console: Console) -> bool:
+    """Return True when live peek should be on by default.
+
+    Peek is only useful when both (a) the console is attached to a real
+    terminal (so the user can see the extra lines) and (b) stdin is a TTY
+    (so the keypress listener is actually active and the user can turn
+    peek back off).  Recording consoles used in tests fail check (a).
+    """
+    if not console.is_terminal:
+        return False
+    try:
+        return sys.stdin.isatty()
+    except (ValueError, OSError):
+        return False
+
+
 class ConsoleEmitter:
     """Renders engine events to the Rich console."""
 
     def __init__(self, console: Console) -> None:
         self._console = console
         self._live: Live | None = None
+        self._peek_enabled = _interactive_default_peek(console)
+        self.wants_agent_output: bool = self._peek_enabled
+        self._peek_lock = threading.Lock()
+        # Outer lock that serialises every ``_console.print`` call so that
+        # reader-thread / keypress-thread writes cannot interleave with
+        # main-thread event handlers while a Rich ``Live`` region is active.
+        self._console_lock = threading.Lock()
         self._handlers: dict[EventType, Callable[..., None]] = {
             EventType.RUN_STARTED: self._on_run_started,
             EventType.ITERATION_STARTED: self._on_iteration_started,
@@ -120,40 +146,76 @@ def __init__(self, console: Console) -> None:
             EventType.COMMANDS_COMPLETED: self._on_commands_completed,
             EventType.LOG_MESSAGE: self._on_log_message,
             EventType.RUN_STOPPED: self._on_run_stopped,
+            EventType.AGENT_OUTPUT_LINE: self._on_agent_output_line,
         }
 
+    def toggle_peek(self) -> bool:
+        """Flip live-output rendering on or off.
+
+        Safe to call from a non-main thread (e.g. the keypress listener).
+        Returns the new peek state.  A short status banner is printed so
+        the user gets visible feedback that the toggle took effect.
+
+        The banner print is issued while still holding ``_peek_lock`` so
+        that two rapid toggles cannot print their banners in an order that
+        disagrees with the final flag value.  ``_console_lock`` is acquired
+        after ``_peek_lock`` — this is the only nested-lock site, so the
+        order is uncontested and there is no deadlock risk.
+        """
+        with self._peek_lock:
+            self._peek_enabled = not self._peek_enabled
+            enabled = self._peek_enabled
+            with self._console_lock:
+                self._console.print(
+                    "[dim]peek on[/]" if enabled else "[dim]peek off[/]"
+                )
+        return enabled
+
+    def _on_agent_output_line(self, data: AgentOutputLineData) -> None:
+        if not self._peek_enabled:
+            return
+        line = escape_markup(data["line"])
+        with self._console_lock:
+            self._console.print(f"[dim]{line}[/]")
+
     def emit(self, event: Event) -> None:
         handler = self._handlers.get(event.type)
         if handler is not None:
             handler(event.data)
 
     def _on_run_started(self, data: RunStartedData) -> None:
         ralph_name = data["ralph_name"]
-        self._console.print(
-            f"\n[bold {_brand.PURPLE}]▶ Running:[/] [bold]{escape_markup(ralph_name)}[/]"
-        )
-        info = _format_run_info(data["timeout"], data["commands"], data["max_iterations"])
-        if info:
-            self._console.print(f"  [dim]{info}[/]")
+        with self._console_lock:
+            self._console.print(
+                f"\n[bold {_brand.PURPLE}]▶ Running:[/] [bold]{escape_markup(ralph_name)}[/]"
+            )
+            info = _format_run_info(
+                data["timeout"], data["commands"], data["max_iterations"]
+            )
+            if info:
+                self._console.print(f"  [dim]{info}[/]")
 
     def _start_live(self) -> None:
         spinner = _IterationSpinner()
-        self._live = Live(
-            spinner,
-            console=self._console,
-            transient=True,
-            refresh_per_second=_LIVE_REFRESH_RATE,
-        )
-        self._live.start()
+        with self._console_lock:
+            self._live = Live(
+                spinner,
+                console=self._console,
+                transient=True,
+                refresh_per_second=_LIVE_REFRESH_RATE,
+            )
+            self._live.start()
 
     def _stop_live(self) -> None:
         if self._live is not None:
-            self._live.stop()
-            self._live = None
+            with self._console_lock:
+                self._live.stop()
+                self._live = None
 
     def _on_iteration_started(self, data: IterationStartedData) -> None:
         iteration = data["iteration"]
-        self._console.print(f"\n[bold {_brand.BLUE}]── Iteration {iteration} ──[/]")
+        with self._console_lock:
+            self._console.print(f"\n[bold {_brand.BLUE}]── Iteration {iteration} ──[/]")
         self._start_live()
 
     def _on_iteration_ended(
@@ -162,29 +224,34 @@ def _on_iteration_ended(
         self._stop_live()
         iteration = data["iteration"]
         detail = data["detail"]
-        self._console.print(f"[{color}]{icon} Iteration {iteration} {detail}[/]")
         log_file = data["log_file"]
-        if log_file:
-            self._console.print(f"  [dim]{_ICON_ARROW} {escape_markup(log_file)}[/]")
         result_text = data["result_text"]
-        if result_text:
-            self._console.print(Markdown(result_text))
+        with self._console_lock:
+            self._console.print(f"[{color}]{icon} Iteration {iteration} {detail}[/]")
+            if log_file:
+                self._console.print(
+                    f"  [dim]{_ICON_ARROW} {escape_markup(log_file)}[/]"
+                )
+            if result_text:
+                self._console.print(Markdown(result_text))
 
     def _on_commands_completed(self, data: CommandsCompletedData) -> None:
         count = data["count"]
         if count:
-            self._console.print(f"  [bold]Commands:[/] {count} ran")
+            with self._console_lock:
+                self._console.print(f"  [bold]Commands:[/] {count} ran")
 
     def _on_log_message(self, data: LogMessageData) -> None:
         msg = escape_markup(data["message"])
         level = data["level"]
-        if level == LOG_ERROR:
-            self._console.print(f"[red]{msg}[/]")
-            tb = data.get("traceback")
-            if tb:
-                self._console.print(f"[dim]{escape_markup(tb)}[/]")
-        else:
-            self._console.print(f"[dim]{msg}[/]")
+        with self._console_lock:
+            if level == LOG_ERROR:
+                self._console.print(f"[red]{msg}[/]")
+                tb = data.get("traceback")
+                if tb:
+                    self._console.print(f"[dim]{escape_markup(tb)}[/]")
+            else:
+                self._console.print(f"[dim]{msg}[/]")
 
     def _on_run_stopped(self, data: RunStoppedData) -> None:
         self._stop_live()
@@ -194,5 +261,6 @@ def _on_run_stopped(self, data: RunStoppedData) -> None:
         summary = _format_summary(
             data["total"], data["completed"], data["failed"], data["timed_out_count"]
         )
-        self._console.print(f"\n[bold {_brand.BLUE}]──────────────────────[/]")
-        self._console.print(f"[bold {_brand.GREEN}]Done:[/] {summary}")
+        with self._console_lock:
+            self._console.print(f"\n[bold {_brand.BLUE}]──────────────────────[/]")
+            self._console.print(f"[bold {_brand.GREEN}]Done:[/] {summary}")
diff --git a/src/ralphify/_events.py b/src/ralphify/_events.py
@@ -81,6 +81,7 @@ class EventType(Enum):
 
     # ── Agent activity (live streaming) ─────────────────────────
     AGENT_ACTIVITY = "agent_activity"
+    AGENT_OUTPUT_LINE = "agent_output_line"
 
     # ── Other ───────────────────────────────────────────────────
     LOG_MESSAGE = "log_message"
@@ -139,6 +140,16 @@ class AgentActivityData(TypedDict):
     iteration: int
 
 
+OutputStream = Literal["stdout", "stderr"]
+"""Which standard stream an :class:`AgentOutputLineData` event came from."""
+
+
+class AgentOutputLineData(TypedDict):
+    line: str
+    stream: OutputStream
+    iteration: int
+
+
 class LogMessageData(TypedDict):
     message: str
     level: LogLevel
@@ -154,6 +165,7 @@ class LogMessageData(TypedDict):
     | CommandsCompletedData
     | PromptAssembledData
     | AgentActivityData
+    | AgentOutputLineData
     | LogMessageData
 )
 """Union of all typed event data payloads."""
@@ -188,13 +200,17 @@ def emit(self, event: Event) -> None: ...
 class NullEmitter:
     """Discards all events silently."""
 
+    wants_agent_output = False
+
     def emit(self, event: Event) -> None:
         pass
 
 
 class QueueEmitter:
     """Pushes events into a :class:`queue.Queue` for async consumption."""
 
+    wants_agent_output = True
+
     def __init__(self, q: queue.Queue[Event] | None = None) -> None:
         self.queue: queue.Queue[Event] = q or queue.Queue()
 
@@ -224,6 +240,7 @@ class BoundEmitter:
     def __init__(self, emitter: EventEmitter, run_id: str) -> None:
         self._emitter = emitter
         self._run_id = run_id
+        self.wants_agent_output: bool = getattr(emitter, "wants_agent_output", True)
 
     def __call__(
         self,
@@ -242,6 +259,15 @@ def log_info(self, message: str) -> None:
         """Emit a ``LOG_MESSAGE`` event at info level."""
         self(EventType.LOG_MESSAGE, LogMessageData(message=message, level=LOG_INFO))
 
+    def agent_output_line(
+        self, line: str, stream: OutputStream, iteration: int
+    ) -> None:
+        """Emit an ``AGENT_OUTPUT_LINE`` event with a raw line of agent output."""
+        self(
+            EventType.AGENT_OUTPUT_LINE,
+            AgentOutputLineData(line=line, stream=stream, iteration=iteration),
+        )
+
     def log_error(self, message: str, *, traceback: str | None = None) -> None:
         """Emit a ``LOG_MESSAGE`` event at error level."""
         data = LogMessageData(message=message, level=LOG_ERROR)
diff --git a/src/ralphify/engine.py b/src/ralphify/engine.py
@@ -178,6 +178,12 @@ def _run_agent_phase(
             f"Invalid agent command syntax: {config.agent!r}. {_field_hint(FIELD_AGENT)}"
         ) from exc
 
+    on_output_line = (
+        (lambda line, stream: emit.agent_output_line(line, stream, state.iteration))
+        if emit.wants_agent_output
+        else None
+    )
+
     try:
         agent = execute_agent(
             cmd,
@@ -189,6 +195,7 @@ def _run_agent_phase(
                 EventType.AGENT_ACTIVITY,
                 AgentActivityData(raw=data, iteration=state.iteration),
             ),
+            on_output_line=on_output_line,
         )
     except FileNotFoundError as exc:
         raise FileNotFoundError(
diff --git a/tasks/done/critical-01-capture-strategy-three-way-branch.md b/tasks/done/critical-01-capture-strategy-three-way-branch.md
@@ -0,0 +1,65 @@
+# Critical 01 — Capture strategy: three-way branch
+
+**Original findings:** C1 (silent output regression) + M1 (unbounded buffering)
+**Severity:** Critical — silently swallows agent output in common setups
+**Files:** `src/ralphify/_agent.py`, `src/ralphify/engine.py`, `src/ralphify/_console_emitter.py`
+
+## Problem
+
+`_run_agent_blocking` used to pass `stdout=None, stderr=None` to `subprocess.Popen` whenever `log_path_dir` was `None`, letting the child's fds inherit straight through to the terminal. The live-peek refactor changed it to **always pipe** stdout/stderr and drain via reader threads, capturing everything into `stdout_lines`/`stderr_lines`.
+
+The echo guard is:
+
+```python
+# src/ralphify/_agent.py ~line 435
+if log_path_dir is not None:
+    _echo_output(stdout, stderr)
+```
+
+with a comment claiming "When logging is disabled, live peek (if enabled) has already shown the lines."
+
+That claim is **false** whenever peek is not active. `_interactive_default_peek` in `_console_emitter.py:105` returns `False` unless both `console.is_terminal` AND `sys.stdin.isatty()` are true.
+
+## Why it matters
+
+Concrete user-visible regressions from `main`:
+
+1. `ralph run my-ralph | cat` → stdout is not a TTY → peek off → no echo → **user sees zero agent output**.
+2. `ralph run my-ralph | tee run.log`, `ralph run ... 2>&1 | grep ERROR`, `nohup ralph run`, `ralph run` from a systemd unit — all silently swallow agent output.
+3. Interactive user presses `p` to mute peek → subsequent iterations' output is discarded forever (no echo catches it because `log_path_dir is None`).
+4. Secondary issue (M1): even when nobody needs the bytes, every iteration accumulates full stdout+stderr into Python `list`s. For a chatty agent running for hours, one iteration can buffer hundreds of MB that is immediately thrown away by `_write_log(None, …)`.
+
+## Fix direction
+
+Replace the binary "always capture" with a three-way branch:
+
+1. **No log, peek unavailable** → `stdout=None, stderr=None` (inherit, no reader threads, no capture). Matches pre-refactor behavior and fixes both issues in one move.
+2. **Peek available (TTY user wants live output)** → reader threads + `on_output_line` callback. Buffer only if logging needs it.
+3. **`log_path_dir` set** → reader threads that accumulate into lists for log writing.
+
+The hard part: `_run_agent_blocking` has to know at spawn time whether peek is enabled. Peek state currently lives in `ConsoleEmitter._peek_enabled`, one layer above the agent. Pick one approach:
+
+- **A — signal via `on_output_line`:** the engine (which has the emitter) passes `on_output_line=None` when no subscriber will render output. `_run_agent_blocking` treats `on_output_line=None AND log_path_dir=None` as "use inheritance." This is the simplest change and dovetails with `medium-01` (event filtering).
+- **B — add a capability method to the emitter** (e.g. `emitter.wants_agent_output_lines()`) and have the engine check it. More explicit but more plumbing.
+
+**Prefer option A.** It requires `ConsoleEmitter` to expose peek state so the engine can pass `None` when peek is off, or the engine can simply pass the callback always and let `_run_agent_blocking` decide based on whether peek *might* become enabled mid-iteration (which it can, via `p`). If toggling mid-iteration matters, you have to capture — in which case document it and keep the echo-on-log path, plus add echo when peek was off for the whole iteration.
+
+Simpler user-facing model: **peek being on/off does not change whether the iteration's output is eventually shown.** Echo at iteration end whenever the inherit path wasn't taken AND peek wasn't visible for the full iteration. See `high-01` for the Live spinner coordination this requires.
+
+## Done when
+
+- [ ] `ralph run my-ralph | cat` shows agent output (regression test: subprocess pipe, assert stdout non-empty).
+- [ ] `ralph run my-ralph` with `--log-dir` set still writes the log file and the user still sees the output in the terminal (exactly once — see `high-01`).
+- [ ] `ralph run my-ralph` in an interactive TTY with peek on shows live output (no regression).
+- [ ] No per-iteration unbounded buffering when neither log nor peek is active (verify by checking the `Popen` kwargs in the non-capture branch).
+- [ ] `uv run pytest` passes. Add a new test in `tests/test_agent.py` that asserts the non-capture `Popen` path is used when `log_path_dir=None and on_output_line=None`.
+- [ ] `uv run ruff check . && uv run ruff format --check . && uv run ty check` all pass.
+
+## Context
+
+- The old `_run_agent_blocking` is in the diff — recover it via `git log -p src/ralphify/_agent.py` to see the pre-refactor shape.
+- `_echo_output` is defined at `src/ralphify/_agent.py:153`. It writes directly to `sys.stdout`/`sys.stderr`, which itself is a bug — see `high-01`.
+- `_interactive_default_peek` is at `src/ralphify/_console_emitter.py:105`. The checks are `console.is_terminal and sys.stdin.isatty()`.
+- `execute_agent` at `src/ralphify/_agent.py:455` is the single entry point; it dispatches to streaming or blocking. Both paths need the same three-way logic, though streaming has fewer escape valves (it always needs to read the JSON stream, so inheritance is only an option for stderr). Keep the scope of this task to the blocking path; the streaming path already captures stdout for JSON parsing, so it's a separate (smaller) consideration.
+- Engine wiring: `src/ralphify/engine.py` around `_run_agent_phase` builds the `on_output_line` lambda. That's the place to pass `None` when no subscriber cares.
+- **Do not** merge the `medium-01` event-filtering work into this task — that one is strictly an optimization on top of the capability signal introduced here.
diff --git a/tests/test_agent.py b/tests/test_agent.py