55import subprocess
66import sys
77from pathlib import Path
8+ from typing import Any
89from unittest .mock import MagicMock , patch
910
1011import anyio
@@ -41,6 +42,40 @@ async def _run() -> None:
4142
4243 anyio .run (_run )
4344
45+ def test_partial_pool_initialization (self , tmp_path : Path ) -> None :
46+ """Pool operates at reduced capacity if some slots fail to create."""
47+ from unittest .mock import patch
48+
49+ from codeflash .code_utils .worktree_pool import WorktreePool
50+
51+ pool_size = 3
52+ base_dir = tmp_path .resolve () / "worktrees"
53+ repo_root = Path (__file__ ).resolve ().parents [1 ]
54+
55+ call_count = 0
56+
57+ original_create_slot = WorktreePool ._create_slot
58+
59+ async def failing_create_slot (self : Any , index : int ) -> Any :
60+ nonlocal call_count
61+ call_count += 1
62+ if index == 1 :
63+ raise RuntimeError ("Simulated git worktree failure" )
64+ return await original_create_slot (self , index )
65+
66+ async def _run () -> None :
67+ with (
68+ patch ("codeflash.code_utils.worktree_pool.git_root_dir" , return_value = repo_root ),
69+ patch .object (WorktreePool , "_create_slot" , failing_create_slot ),
70+ ):
71+ async with WorktreePool (pool_size = pool_size , base_dir = base_dir ) as pool :
72+ assert len (pool ._slots ) == 2
73+ slot = await pool .acquire ()
74+ assert slot .index != 1
75+ await pool .release (slot )
76+
77+ anyio .run (_run )
78+
4479 def test_acquire_release_round_trip (self , tmp_path : Path ) -> None :
4580 from unittest .mock import patch
4681
@@ -275,6 +310,9 @@ async def mock_behavioral(self_eval: object, *args: object, **kwargs: object) ->
275310 test_env = {},
276311 pytest_cmd_list = [],
277312 behavior_test_results = mock_behavior_results ,
313+ fto_code = "def f(): pass" ,
314+ helper_codes = {},
315+ fto_file_path = Path ("/tmp/module.py" ),
278316 )
279317 )
280318
@@ -324,6 +362,9 @@ async def mock_behavioral(self_eval: object, *args: object, **kwargs: object) ->
324362 test_env = {},
325363 pytest_cmd_list = [],
326364 behavior_test_results = mock_behavior_results ,
365+ fto_code = "def f(): pass" ,
366+ helper_codes = {},
367+ fto_file_path = Path ("/tmp/module.py" ),
327368 )
328369 )
329370
@@ -355,6 +396,108 @@ async def _run() -> list: # type: ignore[type-arg]
355396 for _ , result in results :
356397 assert is_successful (result )
357398
399+ def test_benchmark_phase_restages_candidate_code (self , tmp_path : Path ) -> None :
400+ """Phase 2 must write fto_code and helper_codes to the slot before running benchmarks."""
401+ from codeflash .optimization .parallel_evaluator import _BehavioralPass
402+
403+ opt = self ._make_optimizer_mock (tmp_path )
404+ (tmp_path / "src" ).mkdir (parents = True )
405+ (tmp_path / "src" / "module.py" ).write_text ("def f(): pass" , encoding = "utf-8" )
406+
407+ node = self ._make_candidate_node ()
408+ evaluator = ParallelCandidateEvaluator (opt , pool_size = 1 )
409+
410+ repo_root = Path (__file__ ).resolve ().parents [1 ]
411+ fto_code = "def f(): return 42 # optimized"
412+ helper_path = tmp_path / "src" / "helpers.py"
413+ helper_codes = {helper_path : "HELPER_CODE = True" }
414+
415+ write_calls : list [tuple [Path , str ]] = []
416+
417+ async def tracking_write_candidate (self_slot : object , file_path : Path , code : str ) -> None :
418+ write_calls .append ((file_path , code ))
419+
420+ async def mock_behavioral (self_eval : object , * args : object , ** kwargs : object ) -> Success : # type: ignore[type-arg]
421+ return Success (
422+ _BehavioralPass (
423+ candidate_index = 0 ,
424+ perf_test_files = [],
425+ test_env = {"PATH" : "/usr/bin" },
426+ pytest_cmd_list = [sys .executable , "-m" , "pytest" ],
427+ behavior_test_results = MagicMock (),
428+ fto_code = fto_code ,
429+ helper_codes = helper_codes ,
430+ fto_file_path = Path (opt .function_to_optimize .file_path ),
431+ )
432+ )
433+
434+ async def _run () -> list : # type: ignore[type-arg]
435+ with (
436+ patch ("codeflash.code_utils.worktree_pool.git_root_dir" , return_value = repo_root ),
437+ patch .object (ParallelCandidateEvaluator , "_run_behavioral" , mock_behavioral ),
438+ patch (
439+ "codeflash.code_utils.worktree_pool.WorktreeSlot.write_candidate" , tracking_write_candidate
440+ ),
441+ patch (
442+ "codeflash.languages.python.test_runner.async_execute_test_subprocess" ,
443+ return_value = MagicMock (returncode = 0 , stdout = "" , stderr = "" ),
444+ ),
445+ patch (
446+ "codeflash.verification.parse_test_output.parse_test_xml" ,
447+ return_value = MagicMock (test_results = [MagicMock ()], effective_loop_count = lambda : 10 , total_passed_runtime = lambda : 5000 ),
448+ ),
449+ ):
450+ return await evaluator .evaluate_candidates (
451+ candidates = [(node , 0 , None )],
452+ code_context = MagicMock (),
453+ original_code_baseline = MagicMock (),
454+ original_helper_code = {},
455+ file_path_to_helper_classes = {},
456+ )
457+
458+ anyio .run (_run )
459+
460+ written_codes = {p : c for p , c in write_calls }
461+ assert Path (opt .function_to_optimize .file_path ) in written_codes
462+ assert written_codes [Path (opt .function_to_optimize .file_path )] == fto_code
463+ assert helper_path in written_codes
464+ assert written_codes [helper_path ] == "HELPER_CODE = True"
465+
466+ def test_empty_candidates_returns_empty (self , tmp_path : Path ) -> None :
467+ opt = self ._make_optimizer_mock (tmp_path )
468+ evaluator = ParallelCandidateEvaluator (opt , pool_size = 1 )
469+ repo_root = Path (__file__ ).resolve ().parents [1 ]
470+
471+ async def _run () -> list : # type: ignore[type-arg]
472+ with patch ("codeflash.code_utils.worktree_pool.git_root_dir" , return_value = repo_root ):
473+ return await evaluator .evaluate_candidates (
474+ candidates = [],
475+ code_context = MagicMock (),
476+ original_code_baseline = MagicMock (),
477+ original_helper_code = {},
478+ file_path_to_helper_classes = {},
479+ )
480+
481+ results = anyio .run (_run )
482+ assert results == []
483+
484+ def test_replace_and_capture_restores_on_failure (self , tmp_path : Path ) -> None :
485+ """_replace_and_capture must restore original code even when replacement raises."""
486+ opt = self ._make_optimizer_mock (tmp_path )
487+ (tmp_path / "src" ).mkdir (parents = True )
488+ original_code = "def f(): pass"
489+ (tmp_path / "src" / "module.py" ).write_text (original_code , encoding = "utf-8" )
490+
491+ opt .replace_function_and_helpers_with_optimized_code .side_effect = ValueError ("bad code" )
492+
493+ result = ParallelCandidateEvaluator ._replace_and_capture (
494+ opt , MagicMock (), MagicMock (), {}
495+ )
496+ assert result is None
497+ opt .write_code_and_helpers .assert_called_once_with (
498+ opt .function_to_optimize_source_code , {}, opt .function_to_optimize .file_path
499+ )
500+
358501 def test_more_candidates_than_slots_no_deadlock (self , tmp_path : Path ) -> None :
359502 """Regression test: more passing candidates than pool slots must not deadlock."""
360503 from codeflash .optimization .parallel_evaluator import _BehavioralPass
@@ -379,6 +522,9 @@ async def mock_behavioral(self_eval: object, *args: object, **kwargs: object) ->
379522 test_env = {},
380523 pytest_cmd_list = [],
381524 behavior_test_results = mock_behavior_results ,
525+ fto_code = "def f(): pass" ,
526+ helper_codes = {},
527+ fto_file_path = Path ("/tmp/module.py" ),
382528 )
383529 )
384530
0 commit comments