Skip to content

Commit 62ca5b6

Browse files
committed
test: add unit tests for ParallelCandidateEvaluator with mocked worktrees
Tests cover: code replacement failure → EvalFailure, behavioral mismatch carrying diffs, successful candidate routing, and concurrent multi- candidate evaluation.
1 parent 7fca8b2 commit 62ca5b6

1 file changed

Lines changed: 171 additions & 8 deletions

File tree

tests/test_parallel_evaluator.py

Lines changed: 171 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@
55
import subprocess
66
import sys
77
from pathlib import Path
8+
from unittest.mock import MagicMock, patch
89

910
import anyio
1011
import pytest
1112

13+
from codeflash.either import Failure, Success, is_successful
14+
from codeflash.languages.function_optimizer import CandidateNode
15+
from codeflash.optimization.parallel_evaluator import EvalFailure, ParallelCandidateEvaluator
16+
1217

1318
class TestWorktreePoolLifecycle:
1419
def test_creates_n_worktrees_and_cleans_up(self, tmp_path: Path) -> None:
@@ -115,10 +120,7 @@ def test_runs_simple_command(self) -> None:
115120

116121
async def _run() -> subprocess.CompletedProcess[str]:
117122
return await async_execute_test_subprocess(
118-
cmd_list=[sys.executable, "-c", "print('hello world')"],
119-
cwd=cwd,
120-
env=None,
121-
timeout=30,
123+
cmd_list=[sys.executable, "-c", "print('hello world')"], cwd=cwd, env=None, timeout=30
122124
)
123125

124126
result = anyio.run(_run)
@@ -148,11 +150,172 @@ def test_timeout_raises(self) -> None:
148150

149151
async def _run() -> subprocess.CompletedProcess[str]:
150152
return await async_execute_test_subprocess(
151-
cmd_list=[sys.executable, "-c", "import time; time.sleep(60)"],
152-
cwd=cwd,
153-
env=None,
154-
timeout=1,
153+
cmd_list=[sys.executable, "-c", "import time; time.sleep(60)"], cwd=cwd, env=None, timeout=1
155154
)
156155

157156
with pytest.raises(subprocess.TimeoutExpired):
158157
anyio.run(_run)
158+
159+
160+
class TestParallelCandidateEvaluator:
161+
"""Unit tests for the evaluator with mocked worktree operations."""
162+
163+
def _make_candidate_node(self, opt_id: str = "cand_1") -> CandidateNode:
164+
from codeflash.models.models import CodeString, CodeStringsMarkdown, OptimizedCandidate
165+
from codeflash.models.shared_types import OptimizedCandidateSource
166+
167+
source_code = CodeStringsMarkdown(code_strings=[CodeString(code="def f(): pass", file_path=Path("test.py"))])
168+
candidate = OptimizedCandidate(
169+
source_code=source_code,
170+
explanation="test optimization",
171+
optimization_id=opt_id,
172+
source=OptimizedCandidateSource.OPTIMIZE,
173+
)
174+
return CandidateNode(candidate)
175+
176+
def _make_optimizer_mock(self, tmp_path: Path) -> MagicMock:
177+
opt = MagicMock()
178+
opt.function_to_optimize.file_path = str(tmp_path / "src" / "module.py")
179+
opt.function_to_optimize_source_code = "def f(): pass"
180+
opt.test_files.test_files = []
181+
opt.args.project_root = str(tmp_path)
182+
opt.test_cfg = MagicMock()
183+
opt.get_test_env.return_value = {"PATH": "/usr/bin"}
184+
opt.language_support.build_pytest_cmd.return_value = [sys.executable, "-m", "pytest"]
185+
opt.replace_function_and_helpers_with_optimized_code.return_value = True
186+
opt.write_code_and_helpers = MagicMock()
187+
return opt
188+
189+
def test_code_replacement_failure_returns_eval_failure(self, tmp_path: Path) -> None:
190+
opt = self._make_optimizer_mock(tmp_path)
191+
opt.replace_function_and_helpers_with_optimized_code.return_value = False
192+
193+
node = self._make_candidate_node()
194+
evaluator = ParallelCandidateEvaluator(opt, pool_size=1)
195+
196+
repo_root = Path(__file__).resolve().parents[1]
197+
198+
async def _run() -> list: # type: ignore[type-arg]
199+
with patch("codeflash.code_utils.worktree_pool.git_root_dir", return_value=repo_root):
200+
return await evaluator.evaluate_candidates(
201+
candidates=[(node, 0, None)],
202+
code_context=MagicMock(),
203+
original_code_baseline=MagicMock(),
204+
original_helper_code={},
205+
file_path_to_helper_classes={},
206+
)
207+
208+
results = anyio.run(_run)
209+
assert len(results) == 1
210+
_, result = results[0]
211+
assert result is not None
212+
assert not is_successful(result)
213+
failure = result.failure()
214+
assert isinstance(failure, EvalFailure)
215+
assert "Code replacement failed" in failure.message
216+
assert failure.diffs == []
217+
218+
def test_behavioral_mismatch_carries_diffs(self, tmp_path: Path) -> None:
219+
from codeflash.models.models import TestDiff, TestDiffScope
220+
221+
opt = self._make_optimizer_mock(tmp_path)
222+
(tmp_path / "src").mkdir(parents=True)
223+
(tmp_path / "src" / "module.py").write_text("def f(): pass", encoding="utf-8")
224+
225+
node = self._make_candidate_node()
226+
evaluator = ParallelCandidateEvaluator(opt, pool_size=1)
227+
228+
repo_root = Path(__file__).resolve().parents[1]
229+
mock_diffs = [TestDiff(scope=TestDiffScope.DID_PASS, original_pass=True, candidate_pass=False)]
230+
231+
async def _run() -> list: # type: ignore[type-arg]
232+
with (
233+
patch("codeflash.code_utils.worktree_pool.git_root_dir", return_value=repo_root),
234+
patch.object(
235+
ParallelCandidateEvaluator,
236+
"_run_in_worktree",
237+
return_value=Failure(EvalFailure(message="Behavioral mismatch: 1 diffs", diffs=mock_diffs)), # type: ignore[arg-type]
238+
),
239+
):
240+
return await evaluator.evaluate_candidates(
241+
candidates=[(node, 0, None)],
242+
code_context=MagicMock(),
243+
original_code_baseline=MagicMock(),
244+
original_helper_code={},
245+
file_path_to_helper_classes={},
246+
)
247+
248+
results = anyio.run(_run)
249+
_, result = results[0]
250+
assert not is_successful(result)
251+
failure = result.failure()
252+
assert len(failure.diffs) == 1
253+
assert failure.diffs[0].scope == TestDiffScope.DID_PASS
254+
255+
def test_successful_candidate_returns_result(self, tmp_path: Path) -> None:
256+
opt = self._make_optimizer_mock(tmp_path)
257+
(tmp_path / "src").mkdir(parents=True)
258+
(tmp_path / "src" / "module.py").write_text("def f(): pass", encoding="utf-8")
259+
260+
node = self._make_candidate_node()
261+
evaluator = ParallelCandidateEvaluator(opt, pool_size=1)
262+
263+
repo_root = Path(__file__).resolve().parents[1]
264+
mock_result = MagicMock()
265+
mock_result.best_test_runtime = 5000
266+
267+
async def _run() -> list: # type: ignore[type-arg]
268+
with (
269+
patch("codeflash.code_utils.worktree_pool.git_root_dir", return_value=repo_root),
270+
patch.object(ParallelCandidateEvaluator, "_run_in_worktree", return_value=Success(mock_result)),
271+
):
272+
return await evaluator.evaluate_candidates(
273+
candidates=[(node, 0, None)],
274+
code_context=MagicMock(),
275+
original_code_baseline=MagicMock(),
276+
original_helper_code={},
277+
file_path_to_helper_classes={},
278+
)
279+
280+
results = anyio.run(_run)
281+
_, result = results[0]
282+
assert is_successful(result)
283+
assert result.unwrap().best_test_runtime == 5000
284+
285+
def test_multiple_candidates_evaluated_concurrently(self, tmp_path: Path) -> None:
286+
opt = self._make_optimizer_mock(tmp_path)
287+
(tmp_path / "src").mkdir(parents=True)
288+
(tmp_path / "src" / "module.py").write_text("def f(): pass", encoding="utf-8")
289+
290+
nodes = [self._make_candidate_node(f"cand_{i}") for i in range(3)]
291+
evaluator = ParallelCandidateEvaluator(opt, pool_size=3)
292+
293+
repo_root = Path(__file__).resolve().parents[1]
294+
mock_result = MagicMock()
295+
mock_result.best_test_runtime = 1000
296+
297+
call_count = 0
298+
299+
async def mock_run_in_worktree(self_eval: object, *args: object, **kwargs: object) -> Success: # type: ignore[type-arg]
300+
nonlocal call_count
301+
call_count += 1
302+
return Success(mock_result)
303+
304+
async def _run() -> list: # type: ignore[type-arg]
305+
with (
306+
patch("codeflash.code_utils.worktree_pool.git_root_dir", return_value=repo_root),
307+
patch.object(ParallelCandidateEvaluator, "_run_in_worktree", mock_run_in_worktree),
308+
):
309+
return await evaluator.evaluate_candidates(
310+
candidates=[(n, i, None) for i, n in enumerate(nodes)],
311+
code_context=MagicMock(),
312+
original_code_baseline=MagicMock(),
313+
original_helper_code={},
314+
file_path_to_helper_classes={},
315+
)
316+
317+
results = anyio.run(_run)
318+
assert len(results) == 3
319+
assert call_count == 3
320+
for _, result in results:
321+
assert is_successful(result)

0 commit comments

Comments
 (0)