diff --git a/src/ralphify/_agent.py b/src/ralphify/_agent.py index 29604c33..9191d575 100644 --- a/src/ralphify/_agent.py +++ b/src/ralphify/_agent.py @@ -16,6 +16,8 @@ from __future__ import annotations import json +import os +import signal import subprocess import sys import time @@ -43,6 +45,43 @@ _LOG_TIMESTAMP_FORMAT = "%Y%m%d-%H%M%S" _LOG_ITERATION_PAD_WIDTH = 3 +# Subprocess kwargs that isolate agent processes in their own session/group. +# On POSIX this uses start_new_session so the agent and all its children +# form a separate process group that can be killed together. +_SESSION_KWARGS: dict[str, Any] = ( + {} if sys.platform == "win32" + else {"start_new_session": True} +) + + +def _kill_process_group(proc: subprocess.Popen[Any]) -> None: + """Kill the agent process and its entire process group. + + On POSIX, sends SIGTERM then SIGKILL to the process group — but only when + the process is actually a session leader (its pgid equals its pid). This + guard prevents accidentally killing the *caller's* process group when the + child was not started with ``start_new_session=True`` (e.g. in tests). + Falls back to ``proc.kill()`` on Windows or if the group kill fails. + """ + if sys.platform != "win32" and proc.poll() is None: + try: + pgid = os.getpgid(proc.pid) + except (OSError, ProcessLookupError): + pgid = None + + if pgid == proc.pid: + try: + os.killpg(pgid, signal.SIGTERM) + try: + proc.wait(timeout=3) + return + except subprocess.TimeoutExpired: + os.killpg(pgid, signal.SIGKILL) + return + except (OSError, ProcessLookupError): + pass + proc.kill() + @dataclass class AgentResult(ProcessResult): @@ -176,6 +215,7 @@ def _run_agent_streaming( stdout=subprocess.PIPE, stderr=subprocess.PIPE, **SUBPROCESS_TEXT_KWARGS, + **_SESSION_KWARGS, ) try: # Popen with PIPE guarantees non-None streams; guard explicitly @@ -189,13 +229,13 @@ def _run_agent_streaming( stream = _read_agent_stream(proc.stdout, deadline, on_activity) if stream.timed_out: - proc.kill() + _kill_process_group(proc) proc.wait() stderr_data = proc.stderr.read() finally: if proc.poll() is None: - proc.kill() + _kill_process_group(proc) proc.wait() log_file = _write_log(log_path_dir, iteration, "".join(stream.stdout_lines), stderr_data) @@ -222,6 +262,10 @@ def _run_agent_blocking( then echoed to stdout/stderr so the user still sees it live. When unset, output streams directly to the terminal (no capture overhead). + The subprocess is started in its own process group so that on + ``KeyboardInterrupt`` or timeout the entire child tree can be killed + via :func:`_kill_process_group`. + Returns ``returncode=None`` when the process times out. Raises ``FileNotFoundError`` if the command binary does not exist. """ @@ -230,20 +274,27 @@ def _run_agent_blocking( timed_out = False stdout: str | bytes | None = None stderr: str | bytes | None = None + capture = log_path_dir is not None + proc = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE if capture else None, + stderr=subprocess.PIPE if capture else None, + **SUBPROCESS_TEXT_KWARGS, + **_SESSION_KWARGS, + ) try: - result = subprocess.run( - cmd, - input=prompt, - **SUBPROCESS_TEXT_KWARGS, - timeout=timeout, - capture_output=log_path_dir is not None, - ) - returncode = result.returncode - stdout, stderr = result.stdout, result.stderr - except subprocess.TimeoutExpired as exc: + stdout, stderr = proc.communicate(input=prompt, timeout=timeout) + returncode = proc.returncode + except subprocess.TimeoutExpired: + _kill_process_group(proc) + stdout, stderr = proc.communicate() timed_out = True - stdout, stderr = exc.stdout, exc.stderr + except KeyboardInterrupt: + _kill_process_group(proc) + proc.wait() + raise log_file = _write_log(log_path_dir, iteration, stdout, stderr) if log_path_dir: diff --git a/tests/helpers.py b/tests/helpers.py index d5e98ed4..4598357e 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -20,8 +20,8 @@ # ── Patch targets ───────────────────────────────────────────────────── -MOCK_SUBPROCESS = "ralphify._agent.subprocess.run" -"""Patch target for subprocess.run inside the agent module.""" +MOCK_SUBPROCESS = "ralphify._agent.subprocess.Popen" +"""Patch target for subprocess.Popen inside the agent module (blocking path).""" MOCK_POPEN = "ralphify._agent.subprocess.Popen" """Patch target for subprocess.Popen inside the agent module (streaming path).""" @@ -111,6 +111,7 @@ def ok_result( Works as a direct factory — ``ok_result(stdout="out\\n")`` — and as a ``side_effect`` callable where mock call args are silently absorbed. + Used by runner tests that mock ``subprocess.run``. """ return _make_completed_process(returncode=0, stdout=stdout, stderr=stderr) @@ -126,6 +127,40 @@ def fail_result( return _make_completed_process(returncode=1, stdout=stdout, stderr=stderr) +def _make_mock_proc(returncode: int = 0, stdout: str = "", stderr: str = "") -> MagicMock: + """Build a MagicMock that mimics Popen for the agent blocking path.""" + proc = MagicMock() + proc.returncode = returncode + proc.communicate.return_value = (stdout, stderr) + proc.wait.return_value = returncode + proc.poll.return_value = returncode + proc.pid = 12345 + return proc + + +def ok_proc(*_args: Any, stdout: str = "", stderr: str = "", **_kwargs: Any) -> MagicMock: + """Popen mock with exit code 0. Works as a factory and ``side_effect``.""" + return _make_mock_proc(returncode=0, stdout=stdout, stderr=stderr) + + +def fail_proc(*_args: Any, stdout: str = "", stderr: str = "", **_kwargs: Any) -> MagicMock: + """Popen mock with exit code 1.""" + return _make_mock_proc(returncode=1, stdout=stdout, stderr=stderr) + + +def timeout_proc( + *_args: Any, timeout: float = 5, stdout: str = "", stderr: str = "", **_kwargs: Any, +) -> MagicMock: + """Popen mock whose communicate() raises TimeoutExpired.""" + proc = _make_mock_proc(returncode=0) + proc.communicate.side_effect = [ + subprocess.TimeoutExpired(cmd="agent", timeout=timeout), + (stdout, stderr), + ] + proc.poll.return_value = None + return proc + + def ok_run_result( output: str = "", ) -> RunResult: diff --git a/tests/test_agent.py b/tests/test_agent.py index a68f9a73..c303bbf5 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -1,15 +1,18 @@ """Tests for the _agent module — subprocess execution, log writing, and stream parsing.""" import io +import signal import subprocess +import sys from pathlib import Path -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest -from helpers import MOCK_POPEN, MOCK_SUBPROCESS, fail_result, make_mock_popen, ok_result +from helpers import MOCK_POPEN, MOCK_SUBPROCESS, fail_proc, make_mock_popen, ok_proc, timeout_proc from ralphify._agent import ( AgentResult, + _kill_process_group, _read_agent_stream, _run_agent_streaming, _supports_stream_json, @@ -169,9 +172,8 @@ def test_last_result_wins(self): class TestExecuteAgentBlocking: - @patch(MOCK_SUBPROCESS) - def test_success(self, mock_run): - mock_run.return_value = ok_result() + @patch(MOCK_POPEN, side_effect=ok_proc) + def test_success(self, mock_popen): result = execute_agent(["echo"], "prompt", timeout=None, log_path_dir=None, iteration=1) assert result.returncode == 0 @@ -179,25 +181,23 @@ def test_success(self, mock_run): assert result.log_file is None assert result.elapsed >= 0 - @patch(MOCK_SUBPROCESS) - def test_failure(self, mock_run): - mock_run.return_value = fail_result() + @patch(MOCK_POPEN, side_effect=fail_proc) + def test_failure(self, mock_popen): result = execute_agent(["echo"], "prompt", timeout=None, log_path_dir=None, iteration=1) assert result.returncode == 1 assert result.timed_out is False - @patch(MOCK_SUBPROCESS) - def test_timeout(self, mock_run): - mock_run.side_effect = subprocess.TimeoutExpired(cmd="echo", timeout=5) + @patch(MOCK_POPEN, side_effect=timeout_proc) + def test_timeout(self, mock_popen): result = execute_agent(["echo"], "prompt", timeout=5, log_path_dir=None, iteration=1) assert result.returncode is None assert result.timed_out is True - @patch(MOCK_SUBPROCESS) - def test_writes_log_on_success(self, mock_run, tmp_path): - mock_run.return_value = ok_result(stdout="agent output\n") + @patch(MOCK_POPEN) + def test_writes_log_on_success(self, mock_popen, tmp_path): + mock_popen.return_value = ok_proc(stdout="agent output\n") result = execute_agent( ["echo"], "prompt", timeout=None, log_path_dir=tmp_path, iteration=3, ) @@ -207,12 +207,9 @@ def test_writes_log_on_success(self, mock_run, tmp_path): assert result.log_file.name.startswith("003_") assert "agent output" in result.log_file.read_text() - @patch(MOCK_SUBPROCESS) - def test_writes_log_on_timeout(self, mock_run, tmp_path): - exc = subprocess.TimeoutExpired(cmd="echo", timeout=5) - exc.stdout = "partial" - exc.stderr = "err" - mock_run.side_effect = exc + @patch(MOCK_POPEN) + def test_writes_log_on_timeout(self, mock_popen, tmp_path): + mock_popen.return_value = timeout_proc(stdout="partial", stderr="err") result = execute_agent( ["echo"], "prompt", timeout=5, log_path_dir=tmp_path, iteration=1, ) @@ -220,14 +217,11 @@ def test_writes_log_on_timeout(self, mock_run, tmp_path): assert result.log_file is not None assert result.log_file.exists() - @patch(MOCK_SUBPROCESS) - def test_timeout_echoes_captured_output(self, mock_run, tmp_path, capsys): + @patch(MOCK_POPEN) + def test_timeout_echoes_captured_output(self, mock_popen, tmp_path, capsys): """When logging is enabled and the agent times out, partial output should be echoed to the terminal — same as on normal completion.""" - exc = subprocess.TimeoutExpired(cmd="echo", timeout=5) - exc.stdout = "partial stdout" - exc.stderr = "partial stderr" - mock_run.side_effect = exc + mock_popen.return_value = timeout_proc(stdout="partial stdout", stderr="partial stderr") execute_agent( ["echo"], "prompt", timeout=5, log_path_dir=tmp_path, iteration=1, ) @@ -236,16 +230,15 @@ def test_timeout_echoes_captured_output(self, mock_run, tmp_path, capsys): assert "partial stdout" in captured.out assert "partial stderr" in captured.err - @patch(MOCK_SUBPROCESS) - def test_no_log_when_dir_not_set(self, mock_run): - mock_run.return_value = ok_result() + @patch(MOCK_POPEN, side_effect=ok_proc) + def test_no_log_when_dir_not_set(self, mock_popen): result = execute_agent(["echo"], "prompt", timeout=None, log_path_dir=None, iteration=1) assert result.log_file is None - @patch(MOCK_SUBPROCESS) - def test_file_not_found_propagates(self, mock_run): - mock_run.side_effect = FileNotFoundError("not found") + @patch(MOCK_POPEN) + def test_file_not_found_propagates(self, mock_popen): + mock_popen.side_effect = FileNotFoundError("not found") with pytest.raises(FileNotFoundError): execute_agent( @@ -423,3 +416,63 @@ def test_timeout_kills_process(self, mock_popen, mock_time): assert result.timed_out is True assert result.returncode is None proc.kill.assert_called() + + +class TestProcessGroupCleanup: + """Process group cleanup, isolation, and _kill_process_group tests.""" + + pytestmark = pytest.mark.skipif(sys.platform == "win32", reason="POSIX-only behavior") + + @patch("ralphify._agent.os.killpg") + @patch("ralphify._agent.os.getpgid") + def test_session_leader_gets_sigterm(self, mock_getpgid, mock_killpg): + proc = MagicMock(pid=42, poll=MagicMock(return_value=None)) + mock_getpgid.return_value = 42 + + _kill_process_group(proc) + + mock_killpg.assert_any_call(42, signal.SIGTERM) + proc.wait.assert_called_once_with(timeout=3) + + @patch("ralphify._agent.os.killpg") + @patch("ralphify._agent.os.getpgid") + def test_escalates_to_sigkill_on_timeout(self, mock_getpgid, mock_killpg): + proc = MagicMock(pid=42, poll=MagicMock(return_value=None)) + proc.wait.side_effect = subprocess.TimeoutExpired(cmd="agent", timeout=3) + mock_getpgid.return_value = 42 + + _kill_process_group(proc) + + mock_killpg.assert_any_call(42, signal.SIGTERM) + mock_killpg.assert_any_call(42, signal.SIGKILL) + + @patch("ralphify._agent.os.killpg") + @patch("ralphify._agent.os.getpgid") + def test_not_session_leader_falls_back_to_kill(self, mock_getpgid, mock_killpg): + proc = MagicMock(pid=42, poll=MagicMock(return_value=None)) + mock_getpgid.return_value = 1 + + _kill_process_group(proc) + + mock_killpg.assert_not_called() + proc.kill.assert_called_once() + + def test_already_exited_falls_back_to_kill(self): + proc = MagicMock(pid=42, poll=MagicMock(return_value=0)) + + _kill_process_group(proc) + + proc.kill.assert_called_once() + + @patch(MOCK_POPEN, side_effect=ok_proc) + def test_blocking_uses_start_new_session(self, mock_popen): + execute_agent(["echo"], "prompt", timeout=None, log_path_dir=None, iteration=1) + assert mock_popen.call_args[1].get("start_new_session") is True + + @patch(MOCK_POPEN) + def test_streaming_uses_start_new_session(self, mock_popen): + mock_popen.return_value = make_mock_popen(returncode=0) + _run_agent_streaming( + ["claude", "-p"], "prompt", timeout=None, log_path_dir=None, iteration=1, + ) + assert mock_popen.call_args[1].get("start_new_session") is True diff --git a/tests/test_cli.py b/tests/test_cli.py index d4050e01..b4e7bade 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -7,7 +7,7 @@ import typer from typer.testing import CliRunner -from helpers import MOCK_ENGINE_SLEEP, MOCK_SKILLS_WHICH, MOCK_SUBPROCESS, MOCK_WHICH, ok_result, fail_result, make_ralph +from helpers import MOCK_ENGINE_SLEEP, MOCK_SKILLS_WHICH, MOCK_SUBPROCESS, MOCK_WHICH, ok_proc, fail_proc, make_ralph, timeout_proc from ralphify import __version__ from ralphify._frontmatter import RALPH_MARKER from ralphify.cli import app, _parse_command_items, _parse_user_args @@ -112,7 +112,7 @@ def test_errors_with_invalid_commands(self, mock_which, tmp_path, monkeypatch, @pytest.mark.parametrize("yaml_value", ["commands:", "commands: null"], ids=["empty-value", "explicit-null"]) - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_null_commands_treated_as_empty(self, mock_run, mock_which, tmp_path, monkeypatch, yaml_value): """YAML `commands:` (no value) and `commands: null` should be treated as no commands.""" monkeypatch.chdir(tmp_path) @@ -217,7 +217,7 @@ def test_errors_with_nan_delay(self, mock_which, tmp_path, monkeypatch): assert result.exit_code == 1 assert "non-negative" in result.output.lower() - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_runs_when_valid(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) @@ -225,15 +225,23 @@ def test_runs_when_valid(self, mock_run, mock_which, tmp_path, monkeypatch): assert result.exit_code == 0 assert mock_run.call_count == 1 - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_runs_n_iterations(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path, prompt="test prompt") + procs = [] + + def capture_proc(*args, **kwargs): + proc = ok_proc() + procs.append(proc) + return proc + + mock_run.side_effect = capture_proc result = runner.invoke(app, ["run", str(ralph_dir), "-n", "3"]) assert result.exit_code == 0 assert mock_run.call_count == 3 - for call in mock_run.call_args_list: - assert call.kwargs["input"].startswith("test prompt") + for proc in procs: + assert proc.communicate.call_args.kwargs["input"].startswith("test prompt") @patch(MOCK_SUBPROCESS) def test_reads_prompt_each_iteration(self, mock_run, mock_which, tmp_path, monkeypatch): @@ -242,6 +250,7 @@ def test_reads_prompt_each_iteration(self, mock_run, mock_which, tmp_path, monke ralph_file = ralph_dir / RALPH_MARKER call_count = 0 + procs = [] def update_prompt(*args, **kwargs): nonlocal call_count @@ -251,16 +260,18 @@ def update_prompt(*args, **kwargs): ralph_file.write_text( "---\nagent: claude -p --dangerously-skip-permissions\n---\nv2" ) - return ok_result(*args, **kwargs) + proc = ok_proc(*args, **kwargs) + procs.append(proc) + return proc mock_run.side_effect = update_prompt result = runner.invoke(app, ["run", str(ralph_dir), "-n", "2"]) assert result.exit_code == 0 - assert mock_run.call_args_list[0].kwargs["input"].startswith("v1") - assert mock_run.call_args_list[1].kwargs["input"].startswith("v2") + assert procs[0].communicate.call_args.kwargs["input"].startswith("v1") + assert procs[1].communicate.call_args.kwargs["input"].startswith("v2") - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_shows_success_per_iteration(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) @@ -270,7 +281,7 @@ def test_shows_success_per_iteration(self, mock_run, mock_which, tmp_path, monke assert "Iteration 2 completed" in result.output assert "2 succeeded" in result.output - @patch(MOCK_SUBPROCESS, side_effect=fail_result) + @patch(MOCK_SUBPROCESS, side_effect=fail_proc) def test_continues_on_error_by_default(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) @@ -279,7 +290,7 @@ def test_continues_on_error_by_default(self, mock_run, mock_which, tmp_path, mon assert mock_run.call_count == 3 assert "3 failed" in result.output - @patch(MOCK_SUBPROCESS, side_effect=fail_result) + @patch(MOCK_SUBPROCESS, side_effect=fail_proc) def test_stop_on_error(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) @@ -293,14 +304,14 @@ def test_stop_on_error(self, mock_run, mock_which, tmp_path, monkeypatch): def test_mixed_success_and_failure(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) - mock_run.side_effect = [ok_result(), fail_result(), ok_result()] + mock_run.side_effect = [ok_proc(), fail_proc(), ok_proc()] result = runner.invoke(app, ["run", str(ralph_dir), "-n", "3"]) assert result.exit_code == 0 assert "2 succeeded" in result.output assert "1 failed" in result.output @patch(MOCK_ENGINE_SLEEP) - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_delay_between_iterations(self, mock_run, mock_sleep, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) @@ -313,7 +324,7 @@ def test_delay_between_iterations(self, mock_run, mock_sleep, mock_which, tmp_pa assert abs(total_sleep - 10.0) < 0.01 @patch(MOCK_ENGINE_SLEEP) - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_no_delay_with_single_iteration(self, mock_run, mock_sleep, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) @@ -321,7 +332,7 @@ def test_no_delay_with_single_iteration(self, mock_run, mock_sleep, mock_which, assert result.exit_code == 0 mock_sleep.assert_not_called() - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_accepts_ralph_md_file_path(self, mock_run, mock_which, tmp_path, monkeypatch): """Can pass path to RALPH.md file directly.""" monkeypatch.chdir(tmp_path) @@ -337,7 +348,7 @@ def test_creates_log_files(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) log_dir = tmp_path / "logs" - mock_run.return_value = ok_result(stdout="agent output\n") + mock_run.return_value = ok_proc(stdout="agent output\n") result = runner.invoke(app, ["run", str(ralph_dir), "-n", "2", "--log-dir", str(log_dir)]) assert result.exit_code == 0 log_files = sorted(log_dir.iterdir()) @@ -350,7 +361,7 @@ def test_log_file_contains_output(self, mock_run, mock_which, tmp_path, monkeypa monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) log_dir = tmp_path / "logs" - mock_run.return_value = ok_result(stdout="hello from agent\n", stderr="warning\n") + mock_run.return_value = ok_proc(stdout="hello from agent\n", stderr="warning\n") result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1", "--log-dir", str(log_dir)]) assert result.exit_code == 0 log_files = list(log_dir.iterdir()) @@ -358,7 +369,7 @@ def test_log_file_contains_output(self, mock_run, mock_which, tmp_path, monkeypa assert "hello from agent" in content assert "warning" in content - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_no_log_files_without_flag(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) @@ -369,33 +380,34 @@ def test_no_log_files_without_flag(self, mock_run, mock_which, tmp_path, monkeyp @patch(MOCK_WHICH, return_value="/usr/bin/claude") class TestRunTimeout: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_timeout_passed_to_subprocess(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) + mock_run.return_value = ok_proc() result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1", "--timeout", "30"]) assert result.exit_code == 0 - assert mock_run.call_args.kwargs["timeout"] == 30 + assert mock_run.return_value.communicate.call_args.kwargs["timeout"] == 30 - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_no_timeout_by_default(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) + mock_run.return_value = ok_proc() result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) assert result.exit_code == 0 - assert mock_run.call_args.kwargs["timeout"] is None + assert mock_run.return_value.communicate.call_args.kwargs["timeout"] is None - @patch(MOCK_SUBPROCESS) + @patch(MOCK_SUBPROCESS, side_effect=timeout_proc) def test_timeout_counts_as_failure(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) - mock_run.side_effect = subprocess.TimeoutExpired(cmd="claude", timeout=10) result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1", "--timeout", "10"]) assert result.exit_code == 0 assert "timed out" in result.output assert "1 timed out" in result.output - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_timeout_shows_in_header(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path) @@ -597,15 +609,16 @@ def test_duplicate_declared_arg_names_rejected(self, mock_which, tmp_path, monke @patch(MOCK_WHICH, return_value="/usr/bin/claude") class TestRunWithUserArgs: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_named_args_resolved_in_prompt(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path, prompt="Research {{ args.dir }}") + mock_run.return_value = ok_proc() result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1", "--dir", "./my-project"]) assert result.exit_code == 0 - assert mock_run.call_args.kwargs["input"].startswith("Research ./my-project") + assert mock_run.return_value.communicate.call_args.kwargs["input"].startswith("Research ./my-project") - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_positional_args_with_declared_names(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph( @@ -613,17 +626,19 @@ def test_positional_args_with_declared_names(self, mock_run, mock_which, tmp_pat prompt="Research {{ args.dir }} with focus on {{ args.focus }}", args=["dir", "focus"], ) + mock_run.return_value = ok_proc() result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1", "./my-project", "performance"]) assert result.exit_code == 0 - assert mock_run.call_args.kwargs["input"].startswith("Research ./my-project with focus on performance") + assert mock_run.return_value.communicate.call_args.kwargs["input"].startswith("Research ./my-project with focus on performance") - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_unused_arg_placeholders_cleared(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path, prompt="Before {{ args.opt }} after") + mock_run.return_value = ok_proc() result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) assert result.exit_code == 0 - assert mock_run.call_args.kwargs["input"].startswith("Before after") + assert mock_run.return_value.communicate.call_args.kwargs["input"].startswith("Before after") class TestParseCommands: @@ -742,15 +757,16 @@ def test_name_with_invalid_chars_errors(self, name): @patch(MOCK_WHICH, return_value="/usr/bin/claude") class TestCreditFrontmatter: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_credit_true_by_default(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = make_ralph(tmp_path, prompt="go") + mock_run.return_value = ok_proc() result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) assert result.exit_code == 0 - assert "Co-authored-by: Ralphify" in mock_run.call_args.kwargs["input"] + assert "Co-authored-by: Ralphify" in mock_run.return_value.communicate.call_args.kwargs["input"] - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_credit_false_omits_trailer(self, mock_run, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) ralph_dir = tmp_path / "my-ralph" @@ -758,9 +774,10 @@ def test_credit_false_omits_trailer(self, mock_run, mock_which, tmp_path, monkey (ralph_dir / RALPH_MARKER).write_text( "---\nagent: claude -p --dangerously-skip-permissions\ncredit: false\n---\ngo" ) + mock_run.return_value = ok_proc() result = runner.invoke(app, ["run", str(ralph_dir), "-n", "1"]) assert result.exit_code == 0 - assert "Co-authored-by" not in mock_run.call_args.kwargs["input"] + assert "Co-authored-by" not in mock_run.return_value.communicate.call_args.kwargs["input"] def test_credit_invalid_value_errors(self, mock_which, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) diff --git a/tests/test_engine.py b/tests/test_engine.py index 29e7902a..e77271db 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -6,7 +6,7 @@ from unittest.mock import patch import pytest -from helpers import MOCK_RUN_COMMAND, MOCK_SUBPROCESS, drain_events, event_types, events_of_type, fail_result, make_config, make_state, ok_result, ok_run_result +from helpers import MOCK_RUN_COMMAND, MOCK_SUBPROCESS, drain_events, event_types, events_of_type, fail_proc, make_config, make_state, ok_proc, ok_run_result, timeout_proc from ralphify._events import BoundEmitter, EventType, NullEmitter, QueueEmitter from ralphify._run_types import Command, RunStatus @@ -20,7 +20,7 @@ class TestRunLoop: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_single_iteration(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=1) state = make_state() @@ -33,7 +33,7 @@ def test_single_iteration(self, mock_run, tmp_path): assert state.status == RunStatus.COMPLETED assert mock_run.call_count == 1 - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_multiple_iterations(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=3) state = make_state() @@ -43,7 +43,7 @@ def test_multiple_iterations(self, mock_run, tmp_path): assert state.completed == 3 assert mock_run.call_count == 3 - @patch(MOCK_SUBPROCESS, side_effect=fail_result) + @patch(MOCK_SUBPROCESS, side_effect=fail_proc) def test_failed_iterations_counted(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=2) state = make_state() @@ -53,7 +53,7 @@ def test_failed_iterations_counted(self, mock_run, tmp_path): assert state.completed == 0 assert state.failed == 2 - @patch(MOCK_SUBPROCESS, side_effect=fail_result) + @patch(MOCK_SUBPROCESS, side_effect=fail_proc) def test_stop_on_error(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=5, stop_on_error=True) state = make_state() @@ -63,7 +63,7 @@ def test_stop_on_error(self, mock_run, tmp_path): assert mock_run.call_count == 1 assert state.failed == 1 - @patch(MOCK_SUBPROCESS, side_effect=fail_result) + @patch(MOCK_SUBPROCESS, side_effect=fail_proc) def test_stop_on_error_sets_failed_status(self, mock_run, tmp_path): """When stop_on_error triggers, status should be FAILED, not COMPLETED.""" config = make_config(tmp_path, max_iterations=5, stop_on_error=True) @@ -77,9 +77,8 @@ def test_stop_on_error_sets_failed_status(self, mock_run, tmp_path): stop_event = events_of_type(events, EventType.RUN_STOPPED)[0] assert stop_event.data["reason"] == "error" - @patch(MOCK_SUBPROCESS) + @patch(MOCK_SUBPROCESS, side_effect=timeout_proc) def test_timeout_counted(self, mock_run, tmp_path): - mock_run.side_effect = subprocess.TimeoutExpired(cmd="echo", timeout=5) config = make_config(tmp_path, max_iterations=1, timeout=5) state = make_state() @@ -88,18 +87,19 @@ def test_timeout_counted(self, mock_run, tmp_path): assert state.timed_out == 1 assert state.failed == 1 - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_prompt_read_from_ralph_file(self, mock_run, tmp_path): + mock_run.return_value = ok_proc() config = make_config(tmp_path, "my prompt text", max_iterations=1, credit=False) state = make_state() run_loop(config, state, NullEmitter()) - assert mock_run.call_args.kwargs["input"] == "my prompt text" + assert mock_run.return_value.communicate.call_args.kwargs["input"] == "my prompt text" @patch(MOCK_SUBPROCESS) def test_log_dir_creates_files(self, mock_run, tmp_path): - mock_run.return_value = ok_result(stdout="output\n") + mock_run.return_value = ok_proc(stdout="output\n") log_dir = tmp_path / "logs" config = make_config(tmp_path, max_iterations=2, log_dir=log_dir) state = make_state() @@ -113,7 +113,7 @@ def test_log_dir_creates_files(self, mock_run, tmp_path): class TestRunLoopDefaults: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_runs_without_emitter(self, mock_run, tmp_path): """run_loop works when called without an explicit emitter (uses NullEmitter).""" config = make_config(tmp_path, max_iterations=1) @@ -126,7 +126,7 @@ def test_runs_without_emitter(self, mock_run, tmp_path): class TestRunLoopEvents: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_events_emitted_in_order(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=1) state = make_state() @@ -143,7 +143,7 @@ def test_events_emitted_in_order(self, mock_run, tmp_path): assert types.index(EventType.PROMPT_ASSEMBLED) < types.index(EventType.ITERATION_COMPLETED) assert types.index(EventType.ITERATION_COMPLETED) < types.index(EventType.RUN_STOPPED) - @patch(MOCK_SUBPROCESS, side_effect=fail_result) + @patch(MOCK_SUBPROCESS, side_effect=fail_proc) def test_failure_event_emitted(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=1) state = make_state() @@ -154,9 +154,8 @@ def test_failure_event_emitted(self, mock_run, tmp_path): events = drain_events(q) assert EventType.ITERATION_FAILED in event_types(events) - @patch(MOCK_SUBPROCESS) + @patch(MOCK_SUBPROCESS, side_effect=timeout_proc) def test_timeout_event_emitted(self, mock_run, tmp_path): - mock_run.side_effect = subprocess.TimeoutExpired(cmd="echo", timeout=5) config = make_config(tmp_path, max_iterations=1, timeout=5) state = make_state() q = QueueEmitter() @@ -166,7 +165,7 @@ def test_timeout_event_emitted(self, mock_run, tmp_path): events = drain_events(q) assert EventType.ITERATION_TIMED_OUT in event_types(events) - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_all_events_have_run_id(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=1) state = make_state() @@ -179,14 +178,14 @@ def test_all_events_have_run_id(self, mock_run, tmp_path): class TestRunStateControls: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_stop_request(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=100) state = make_state() def stop_after_first(*args, **kwargs): state.request_stop() - return ok_result(*args, **kwargs) + return ok_proc(*args, **kwargs) mock_run.side_effect = stop_after_first @@ -209,7 +208,7 @@ def test_keyboard_interrupt_sets_stopped(self, mock_run, tmp_path): stop_event = events_of_type(events, EventType.RUN_STOPPED)[0] assert stop_event.data["reason"] == "user_requested" - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_pause_and_resume(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=3) state = make_state() @@ -227,7 +226,7 @@ def resume_later(): state.request_resume() threading.Thread(target=resume_later, daemon=True).start() - return ok_result(*args, **kwargs) + return ok_proc(*args, **kwargs) mock_run.side_effect = track_calls @@ -236,7 +235,7 @@ def resume_later(): assert state.completed == 3 assert state.status == RunStatus.COMPLETED - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_stop_while_paused(self, mock_run, tmp_path): config = make_config(tmp_path, max_iterations=100) state = make_state() @@ -249,7 +248,7 @@ def stop_later(): state.request_stop() threading.Thread(target=stop_later, daemon=True).start() - return ok_result(*args, **kwargs) + return ok_proc(*args, **kwargs) mock_run.side_effect = pause_then_stop @@ -260,8 +259,9 @@ def stop_later(): class TestRalphArgs: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_args_resolved_in_prompt(self, mock_run, tmp_path): + mock_run.return_value = ok_proc() config = make_config( tmp_path, "---\nargs:\n - dir\n - focus\n---\nResearch {{ args.dir }} focus: {{ args.focus }}", @@ -272,24 +272,26 @@ def test_args_resolved_in_prompt(self, mock_run, tmp_path): state = make_state() run_loop(config, state, NullEmitter()) - call_input = mock_run.call_args.kwargs["input"] + call_input = mock_run.return_value.communicate.call_args.kwargs["input"] assert call_input == "Research ./src focus: perf" - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_empty_args_clears_placeholders(self, mock_run, tmp_path): + mock_run.return_value = ok_proc() config = make_config(tmp_path, "Before {{ args.opt }} after", max_iterations=1, args={}, credit=False) state = make_state() run_loop(config, state, NullEmitter()) - call_input = mock_run.call_args.kwargs["input"] + call_input = mock_run.return_value.communicate.call_args.kwargs["input"] assert call_input == "Before after" class TestCommandExecution: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) @patch(MOCK_RUN_COMMAND) def test_commands_output_injected(self, mock_run_cmd, mock_agent, tmp_path): mock_run_cmd.return_value = ok_run_result(output="test output\n") + mock_agent.return_value = ok_proc() config = make_config( tmp_path, @@ -301,11 +303,11 @@ def test_commands_output_injected(self, mock_run_cmd, mock_agent, tmp_path): state = make_state() run_loop(config, state, NullEmitter()) - call_input = mock_agent.call_args.kwargs["input"] + call_input = mock_agent.return_value.communicate.call_args.kwargs["input"] assert "test output" in call_input assert "{{ commands.tests }}" not in call_input - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) @patch(MOCK_RUN_COMMAND) def test_multiple_commands_all_executed(self, mock_run_cmd, mock_agent, tmp_path): """All commands in the list are executed and their outputs collected.""" @@ -336,7 +338,7 @@ def per_command(**kwargs): assert mock_run_cmd.call_count == 2 - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) @patch(MOCK_RUN_COMMAND) def test_dotslash_command_uses_ralph_dir_as_cwd(self, mock_run_cmd, mock_agent, tmp_path): """Commands starting with ./ run relative to the ralph directory.""" @@ -355,7 +357,7 @@ def test_dotslash_command_uses_ralph_dir_as_cwd(self, mock_run_cmd, mock_agent, passed_cwd = mock_run_cmd.call_args.kwargs["cwd"] assert passed_cwd == config.ralph_dir - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) @patch(MOCK_RUN_COMMAND) def test_regular_command_uses_project_root_as_cwd(self, mock_run_cmd, mock_agent, tmp_path): """Commands without ./ prefix run from the project root.""" @@ -374,7 +376,7 @@ def test_regular_command_uses_project_root_as_cwd(self, mock_run_cmd, mock_agent passed_cwd = mock_run_cmd.call_args.kwargs["cwd"] assert passed_cwd == config.project_root - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) @patch(MOCK_RUN_COMMAND) def test_command_timeout_passed_through(self, mock_run_cmd, mock_agent, tmp_path): """Command timeout from frontmatter is forwarded to run_command.""" @@ -473,7 +475,7 @@ def test_unexpected_exception_only_runs_one_iteration(self, mock_run, tmp_path): assert state.iteration == 1 @patch("ralphify.engine.parse_frontmatter") - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_crash_in_prompt_assembly_handled(self, mock_run, mock_parse, tmp_path): mock_parse.side_effect = ValueError("corrupt YAML") config = make_config(tmp_path, max_iterations=1) @@ -877,20 +879,22 @@ def test_arg_values_not_resolved_as_command_placeholders(self, tmp_path): class TestCreditInLoop: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_credit_instruction_in_agent_input(self, mock_run, tmp_path): + mock_run.return_value = ok_proc() config = make_config(tmp_path, "do work", max_iterations=1) state = make_state() run_loop(config, state, NullEmitter()) - call_input = mock_run.call_args.kwargs["input"] + call_input = mock_run.return_value.communicate.call_args.kwargs["input"] assert "Co-authored-by: Ralphify " in call_input - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS) def test_credit_false_no_trailer_in_agent_input(self, mock_run, tmp_path): + mock_run.return_value = ok_proc() config = make_config(tmp_path, "do work", max_iterations=1, credit=False) state = make_state() run_loop(config, state, NullEmitter()) - call_input = mock_run.call_args.kwargs["input"] + call_input = mock_run.return_value.communicate.call_args.kwargs["input"] assert "Co-authored-by" not in call_input diff --git a/tests/test_manager.py b/tests/test_manager.py index dd575184..31362636 100644 --- a/tests/test_manager.py +++ b/tests/test_manager.py @@ -6,7 +6,7 @@ import pytest -from helpers import MOCK_SUBPROCESS, drain_events, event_types, make_config, ok_result +from helpers import MOCK_SUBPROCESS, drain_events, event_types, make_config, ok_proc from ralphify._events import EventType, FanoutEmitter, QueueEmitter from ralphify._run_types import RUN_ID_LENGTH, RunStatus @@ -44,7 +44,7 @@ def test_create_run_id_is_12_hex_chars(self, tmp_path): class TestRunManagerStartRun: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_start_run_starts_thread(self, mock_run, tmp_path): manager = RunManager() config = make_config(tmp_path, max_iterations=1) @@ -57,7 +57,7 @@ def test_start_run_starts_thread(self, mock_run, tmp_path): managed.thread.join(timeout=5) assert managed.state.status == RunStatus.COMPLETED - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_start_run_thread_is_daemon(self, mock_run, tmp_path): manager = RunManager() config = make_config(tmp_path, max_iterations=1) @@ -70,7 +70,7 @@ def test_start_run_thread_is_daemon(self, mock_run, tmp_path): assert managed.thread.daemon is True managed.thread.join(timeout=5) - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_start_run_emits_events_to_queue(self, mock_run, tmp_path): manager = RunManager() config = make_config(tmp_path, max_iterations=1) @@ -88,7 +88,7 @@ def test_start_run_emits_events_to_queue(self, mock_run, tmp_path): class TestRunManagerStartRunGuards: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_start_run_raises_on_double_start(self, mock_run, tmp_path): manager = RunManager() config = make_config(tmp_path, max_iterations=1) @@ -126,7 +126,7 @@ def test_resume_run_raises_key_error_for_unknown_id(self): class TestRunManagerStopRun: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_stop_run_stops_running_run(self, mock_run, tmp_path): manager = RunManager() config = make_config(tmp_path, max_iterations=100, delay=0.1) @@ -156,7 +156,7 @@ def counting_ok(*args, **kwargs): if call_count == 1: pause_done.set() resume_allowed.wait(timeout=5) - return ok_result(*args, **kwargs) + return ok_proc(*args, **kwargs) mock_run.side_effect = counting_ok @@ -237,7 +237,7 @@ def test_build_emitter_returns_fanout_with_extras(self, tmp_path): class TestRunManagerExtraListeners: - @patch(MOCK_SUBPROCESS, side_effect=ok_result) + @patch(MOCK_SUBPROCESS, side_effect=ok_proc) def test_extra_listeners_receive_events(self, mock_run, tmp_path): manager = RunManager() config = make_config(tmp_path, max_iterations=1)