Skip to content

Commit fad2839

Browse files
authored
Merge pull request #996 from codeflash-ai/fix-futurehouse-failing-in-the-backend
fix future house test expectation
2 parents cee7b50 + 59c796e commit fad2839

4 files changed

Lines changed: 47 additions & 10 deletions

File tree

tests/scripts/end_to_end_test_futurehouse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
def run_test(expected_improvement_pct: int) -> bool:
88
config = TestConfig(
99
file_path="src/aviary/common_tags.py",
10-
expected_unit_tests=0, # todo: fix bug https://linear.app/codeflash-ai/issue/CF-921/test-discovery-does-not-work-properly-for-e2e-futurehouse-example for context
10+
expected_unit_tests_count=2,
1111
min_improvement_x=0.05,
1212
coverage_expectations=[
1313
CoverageExpectation(

tests/scripts/end_to_end_test_topological_sort_worktree.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def run_test(expected_improvement_pct: int) -> bool:
1717
expected_lines=[25, 26, 27, 28, 29, 30, 31],
1818
)
1919
],
20-
expected_unit_tests=1,
20+
expected_unit_test_files=1, # Per-function count
2121
)
2222
cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve()
2323
return_var = run_codeflash_command(cwd, config, expected_improvement_pct)

tests/scripts/end_to_end_test_tracer_replay.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def run_test(expected_improvement_pct: int) -> bool:
88
config = TestConfig(
99
trace_mode=True,
1010
min_improvement_x=0.1,
11-
expected_unit_tests=0,
11+
expected_unit_tests_count=None, # Tracer creates replay tests dynamically, skip validation
1212
coverage_expectations=[
1313
CoverageExpectation(function_name="funcA", expected_coverage=100.0, expected_lines=[6, 7, 8, 9, 11, 14])
1414
],

tests/scripts/end_to_end_test_utilities.py

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import contextlib
12
import logging
23
import os
34
import pathlib
@@ -8,6 +9,11 @@
89
from dataclasses import dataclass, field
910
from typing import Optional
1011

12+
try:
13+
import tomllib
14+
except ImportError:
15+
import tomli as tomllib
16+
1117

1218
@dataclass
1319
class CoverageExpectation:
@@ -21,7 +27,10 @@ class TestConfig:
2127
# Make file_path optional when trace_mode is True
2228
file_path: Optional[pathlib.Path] = None
2329
function_name: Optional[str] = None
24-
expected_unit_tests: Optional[int] = None
30+
# Global count: "Discovered X existing unit tests and Y replay tests in Z.Zs at /path"
31+
expected_unit_tests_count: Optional[int] = None
32+
# Per-function count: "Discovered X existing unit test files, Y replay test files, and Z concolic..."
33+
expected_unit_test_files: Optional[int] = None
2534
min_improvement_x: float = 0.1
2635
trace_mode: bool = False
2736
coverage_expectations: list[CoverageExpectation] = field(default_factory=list)
@@ -129,7 +138,20 @@ def build_command(
129138

130139
if config.function_name:
131140
base_command.extend(["--function", config.function_name])
132-
base_command.extend(["--tests-root", str(test_root), "--module-root", str(cwd)])
141+
142+
# Check if pyproject.toml exists with codeflash config - if so, don't override it
143+
pyproject_path = cwd / "pyproject.toml"
144+
has_codeflash_config = False
145+
if pyproject_path.exists():
146+
with contextlib.suppress(Exception):
147+
with open(pyproject_path, "rb") as f:
148+
pyproject_data = tomllib.load(f)
149+
has_codeflash_config = "tool" in pyproject_data and "codeflash" in pyproject_data["tool"]
150+
151+
# Only pass --tests-root and --module-root if they're not configured in pyproject.toml
152+
if not has_codeflash_config:
153+
base_command.extend(["--tests-root", str(test_root), "--module-root", str(cwd)])
154+
133155
if benchmarks_root:
134156
base_command.extend(["--benchmark", "--benchmarks-root", str(benchmarks_root)])
135157
if config.use_worktree:
@@ -163,15 +185,30 @@ def validate_output(stdout: str, return_code: int, expected_improvement_pct: int
163185
logging.error(f"Performance improvement rate {improvement_x}x not above {config.min_improvement_x}x")
164186
return False
165187

166-
if config.expected_unit_tests is not None:
167-
unit_test_match = re.search(r"Discovered (\d+) existing unit test file", stdout)
188+
if config.expected_unit_tests_count is not None:
189+
# Match the global test discovery message from optimizer.py which counts test invocations
190+
# Format: "Discovered X existing unit tests and Y replay tests in Z.Zs at /path/to/tests"
191+
unit_test_match = re.search(r"Discovered (\d+) existing unit tests? and \d+ replay tests? in [\d.]+s at", stdout)
168192
if not unit_test_match:
169-
logging.error("Could not find unit test count")
193+
logging.error("Could not find global unit test count")
170194
return False
171195

172196
num_tests = int(unit_test_match.group(1))
173-
if num_tests != config.expected_unit_tests:
174-
logging.error(f"Expected {config.expected_unit_tests} unit tests, found {num_tests}")
197+
if num_tests != config.expected_unit_tests_count:
198+
logging.error(f"Expected {config.expected_unit_tests_count} global unit tests, found {num_tests}")
199+
return False
200+
201+
if config.expected_unit_test_files is not None:
202+
# Match the per-function test discovery message from function_optimizer.py
203+
# Format: "Discovered X existing unit test files, Y replay test files, and Z concolic..."
204+
unit_test_files_match = re.search(r"Discovered (\d+) existing unit test files?", stdout)
205+
if not unit_test_files_match:
206+
logging.error("Could not find per-function unit test file count")
207+
return False
208+
209+
num_test_files = int(unit_test_files_match.group(1))
210+
if num_test_files != config.expected_unit_test_files:
211+
logging.error(f"Expected {config.expected_unit_test_files} unit test files, found {num_test_files}")
175212
return False
176213

177214
if config.coverage_expectations:

0 commit comments

Comments
 (0)