Skip to content

Commit cafcd7f

Browse files
authored
Merge pull request #2144 from codeflash-ai/fix/test-files-silent-dedup
fix: silently skip duplicate TestFiles.add() instead of raising
2 parents 4a520d9 + 9c9f0de commit cafcd7f

5 files changed

Lines changed: 84 additions & 47 deletions

File tree

codeflash/languages/python/parse_xml.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import os
1111
import re
12-
from typing import TYPE_CHECKING
12+
from typing import TYPE_CHECKING, Any
1313

1414
from junitparser.xunit2 import JUnitXml
1515

@@ -48,7 +48,7 @@
4848
)
4949

5050

51-
def _parse_func(file_path: Path):
51+
def _parse_func(file_path: Path) -> Any:
5252
from lxml.etree import XMLParser, parse
5353

5454
xml_parser = XMLParser(huge_tree=True)
@@ -59,13 +59,22 @@ def parse_python_test_xml(
5959
test_xml_file_path: Path,
6060
test_files: TestFiles,
6161
test_config: TestConfig,
62-
run_result: subprocess.CompletedProcess | None = None,
62+
run_result: subprocess.CompletedProcess[str] | None = None,
6363
) -> TestResults:
6464
from codeflash.verification.parse_test_output import resolve_test_file_from_class_path
6565

6666
test_results = TestResults()
6767
if not test_xml_file_path.exists():
68-
logger.warning(f"No test results for {test_xml_file_path} found.")
68+
if run_result is not None and run_result.returncode != 0:
69+
stderr_snippet = (run_result.stderr or "")[:500]
70+
stdout_snippet = (run_result.stdout or "")[:500]
71+
logger.warning(
72+
f"No test results for {test_xml_file_path} found. "
73+
f"Subprocess exited with code {run_result.returncode}.\n"
74+
f"stdout: {stdout_snippet}\nstderr: {stderr_snippet}"
75+
)
76+
else:
77+
logger.warning(f"No test results for {test_xml_file_path} found.")
6978
console.rule()
7079
return test_results
7180
try:
@@ -87,12 +96,7 @@ def parse_python_test_xml(
8796
):
8897
logger.info("Test failed to load, skipping it.")
8998
if run_result is not None:
90-
if isinstance(run_result.stdout, str) and isinstance(run_result.stderr, str):
91-
logger.info(f"Test log - STDOUT : {run_result.stdout} \n STDERR : {run_result.stderr}")
92-
else:
93-
logger.info(
94-
f"Test log - STDOUT : {run_result.stdout.decode()} \n STDERR : {run_result.stderr.decode()}"
95-
)
99+
logger.info(f"Test log - STDOUT : {run_result.stdout} \n STDERR : {run_result.stderr}")
96100
return test_results
97101

98102
test_class_path = testcase.classname
@@ -159,7 +163,7 @@ def parse_python_test_xml(
159163
sys_stdout = testcase.system_out or ""
160164

161165
begin_matches = list(matches_re_start.finditer(sys_stdout))
162-
end_matches: dict[tuple, re.Match] = {}
166+
end_matches: dict[tuple[str, ...], re.Match[str]] = {}
163167
for match in matches_re_end.finditer(sys_stdout):
164168
groups = match.groups()
165169
if len(groups[5].split(":")) > 1:
@@ -234,11 +238,5 @@ def parse_python_test_xml(
234238
f"Tests '{[test_file.original_file_path for test_file in test_files.test_files]}' failed to run, skipping"
235239
)
236240
if run_result is not None:
237-
stdout, stderr = "", ""
238-
try:
239-
stdout = run_result.stdout.decode()
240-
stderr = run_result.stderr.decode()
241-
except AttributeError:
242-
stdout = run_result.stderr
243-
logger.debug(f"Test log - STDOUT : {stdout} \n STDERR : {stderr}")
241+
logger.debug(f"Test log - STDOUT : {run_result.stdout} \n STDERR : {run_result.stderr}")
244242
return test_results

codeflash/languages/python/support.py

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
CodeContext,
1616
FunctionFilterCriteria,
1717
HelperFunction,
18-
Language,
1918
ReferenceInfo,
2019
TestInfo,
2120
TestResult,
2221
)
22+
from codeflash.languages.language_enum import Language
2323
from codeflash.languages.registry import register_language
2424
from codeflash.models.function_types import FunctionParent
2525

@@ -48,8 +48,8 @@ def function_sources_to_helpers(sources: list[FunctionSource]) -> list[HelperFun
4848
qualified_name=fs.qualified_name,
4949
file_path=fs.file_path,
5050
source_code=fs.source_code,
51-
start_line=fs.jedi_definition.line if fs.jedi_definition else 1,
52-
end_line=fs.jedi_definition.line if fs.jedi_definition else 1,
51+
start_line=getattr(getattr(fs, "jedi_definition", None), "line", 1),
52+
end_line=getattr(getattr(fs, "jedi_definition", None), "line", 1),
5353
)
5454
for fs in sources
5555
]
@@ -119,7 +119,7 @@ def visit_FunctionDef(self, node: cst.FunctionDef) -> None:
119119
)
120120

121121

122-
@register_language
122+
@register_language # type: ignore[arg-type] # PythonSupport satisfies LanguageSupport protocol structurally
123123
class PythonSupport:
124124
"""Python language support implementation.
125125
@@ -214,6 +214,7 @@ def load_coverage(
214214
) -> Any:
215215
from codeflash.verification.coverage_utils import CoverageUtils
216216

217+
assert coverage_config_file is not None
217218
return CoverageUtils.load_from_sqlite_database(
218219
database_path=coverage_database_file,
219220
config_path=coverage_config_file,
@@ -854,7 +855,7 @@ def compare_test_results(
854855
candidate_results_path: Path,
855856
project_root: Path | None = None,
856857
project_classpath: str | None = None,
857-
) -> tuple[bool, list]:
858+
) -> tuple[bool, list[Any]]:
858859
"""Compare test results between original and candidate code.
859860
860861
Args:
@@ -1017,7 +1018,7 @@ def instrument_source_for_line_profiler(
10171018
# This is handled through the existing infrastructure
10181019
return True
10191020

1020-
def parse_line_profile_results(self, line_profiler_output_file: Path) -> dict:
1021+
def parse_line_profile_results(self, line_profiler_output_file: Path) -> dict[str, Any]:
10211022
"""Parse line profiler output for Python.
10221023
10231024
Args:
@@ -1078,7 +1079,7 @@ def run_behavioral_tests(
10781079
from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME_EFFECTIVE
10791080
from codeflash.languages.python.static_analysis.coverage_utils import prepare_coverage_files
10801081
from codeflash.languages.python.test_runner import execute_test_subprocess
1081-
from codeflash.models.models import TestType
1082+
from codeflash.models.test_type import TestType
10821083

10831084
blocklisted_plugins = ["benchmark", "codspeed", "xdist", "sugar"]
10841085

@@ -1110,7 +1111,7 @@ def run_behavioral_tests(
11101111
common_pytest_args.append(f"--timeout={timeout}")
11111112

11121113
result_file_path = get_run_tmp_file(Path("pytest_results.xml"))
1113-
result_args = [f"--junitxml={result_file_path.as_posix()}", "-o", "junit_logging=all"]
1114+
result_args = [f"--junitxml={result_file_path}", "-o", "junit_logging=all"]
11141115

11151116
pytest_test_env = test_env.copy()
11161117
pytest_test_env["PYTEST_PLUGINS"] = "codeflash.verification.pytest_plugin"
@@ -1137,14 +1138,7 @@ def run_behavioral_tests(
11371138
shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env, timeout=30
11381139
)
11391140
logger.debug(cov_erase)
1140-
coverage_cmd = [
1141-
SAFE_SYS_EXECUTABLE,
1142-
"-m",
1143-
"coverage",
1144-
"run",
1145-
f"--rcfile={coverage_config_file.as_posix()}",
1146-
"-m",
1147-
]
1141+
coverage_cmd = [SAFE_SYS_EXECUTABLE, "-m", "coverage", "run", f"--rcfile={coverage_config_file}", "-m"]
11481142
coverage_cmd.extend(self.pytest_cmd_tokens(IS_POSIX))
11491143

11501144
blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins if plugin != "cov"]
@@ -1201,7 +1195,7 @@ def run_benchmarking_tests(
12011195
pytest_args.append(f"--timeout={timeout}")
12021196

12031197
result_file_path = get_run_tmp_file(Path("pytest_results.xml"))
1204-
result_args = [f"--junitxml={result_file_path.as_posix()}", "-o", "junit_logging=all"]
1198+
result_args = [f"--junitxml={result_file_path}", "-o", "junit_logging=all"]
12051199
pytest_test_env = test_env.copy()
12061200
pytest_test_env["PYTEST_PLUGINS"] = "codeflash.verification.pytest_plugin"
12071201
blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
@@ -1243,7 +1237,7 @@ def run_line_profile_tests(
12431237
if timeout is not None:
12441238
pytest_args.append(f"--timeout={timeout}")
12451239
result_file_path = get_run_tmp_file(Path("pytest_results.xml"))
1246-
result_args = [f"--junitxml={result_file_path.as_posix()}", "-o", "junit_logging=all"]
1240+
result_args = [f"--junitxml={result_file_path}", "-o", "junit_logging=all"]
12471241
pytest_test_env = test_env.copy()
12481242
pytest_test_env["PYTEST_PLUGINS"] = "codeflash.verification.pytest_plugin"
12491243
blocklist_args = [f"-p no:{plugin}" for plugin in blocklisted_plugins]
@@ -1258,7 +1252,7 @@ def run_line_profile_tests(
12581252

12591253
def generate_concolic_tests(
12601254
self, test_cfg: Any, project_root: Path, function_to_optimize: FunctionToOptimize, function_to_optimize_ast: Any
1261-
) -> tuple[dict, str]:
1255+
) -> tuple[dict[str, Any], str]:
12621256
import ast
12631257
import importlib.util
12641258
import subprocess
@@ -1281,7 +1275,7 @@ def generate_concolic_tests(
12811275
crosshair_available = importlib.util.find_spec("crosshair") is not None
12821276

12831277
start_time = time.perf_counter()
1284-
function_to_concolic_tests: dict = {}
1278+
function_to_concolic_tests: dict[str, Any] = {}
12851279
concolic_test_suite_code = ""
12861280

12871281
if not crosshair_available:

codeflash/languages/python/test_runner.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
from codeflash.cli_cmds.console import logger
1212
from codeflash.code_utils.code_utils import custom_addopts
13-
from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args
1413
from codeflash.languages.registry import get_language_support
1514

1615
# Pattern to extract timing from stdout markers: !######...:<duration_ns>######!
@@ -92,11 +91,10 @@ def _ensure_runtime_files(project_root: Path, language: str = "javascript") -> N
9291

9392
def execute_test_subprocess(
9493
cmd_list: list[str], cwd: Path, env: dict[str, str] | None, timeout: int = 600
95-
) -> subprocess.CompletedProcess:
94+
) -> subprocess.CompletedProcess[str]:
9695
"""Execute a subprocess with the given command list, working directory, environment variables, and timeout."""
9796
logger.debug(f"executing test run with command: {' '.join(cmd_list)}")
9897
with custom_addopts():
99-
run_args = get_cross_platform_subprocess_run_args(
100-
cwd=cwd, env=env, timeout=timeout, check=False, text=True, capture_output=True
98+
return subprocess.run(
99+
cmd_list, cwd=cwd, env=env, timeout=timeout, check=False, text=True, capture_output=True, close_fds=False
101100
)
102-
return subprocess.run(cmd_list, **run_args) # noqa: PLW1510

codeflash/models/models.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,13 +429,16 @@ class TestFile(BaseModel):
429429

430430
class TestFiles(BaseModel):
431431
test_files: list[TestFile]
432+
_seen_paths: set[Path] = PrivateAttr(default_factory=set)
433+
434+
def model_post_init(self, __context: Any, /) -> None:
435+
self._seen_paths = {tf.instrumented_behavior_file_path for tf in self.test_files}
432436

433437
def add(self, test_file: TestFile) -> None:
434-
if test_file not in self.test_files:
438+
key = test_file.instrumented_behavior_file_path
439+
if key not in self._seen_paths:
440+
self._seen_paths.add(key)
435441
self.test_files.append(test_file)
436-
else:
437-
msg = "Test file already exists in the list"
438-
raise ValueError(msg)
439442

440443
def get_by_original_file_path(self, file_path: Path) -> TestFile | None:
441444
normalized = self._normalize_path_for_comparison(file_path)

tests/test_test_files_add.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from pathlib import Path
2+
3+
from codeflash.models.models import TestFile, TestFiles
4+
from codeflash.models.test_type import TestType
5+
6+
7+
class TestTestFilesAdd:
8+
def test_add_unique_test_file(self) -> None:
9+
tf = TestFiles(test_files=[])
10+
test_file = TestFile(
11+
instrumented_behavior_file_path=Path("/tmp/test_behavior.py"),
12+
benchmarking_file_path=Path("/tmp/test_perf.py"),
13+
test_type=TestType.GENERATED_REGRESSION,
14+
)
15+
tf.add(test_file)
16+
assert len(tf.test_files) == 1
17+
assert tf.test_files[0] is test_file
18+
19+
def test_add_duplicate_is_noop(self) -> None:
20+
tf = TestFiles(test_files=[])
21+
test_file = TestFile(
22+
instrumented_behavior_file_path=Path("/tmp/test_behavior.py"),
23+
benchmarking_file_path=Path("/tmp/test_perf.py"),
24+
test_type=TestType.GENERATED_REGRESSION,
25+
)
26+
tf.add(test_file)
27+
tf.add(test_file) # silent skip — first write wins
28+
assert len(tf.test_files) == 1
29+
30+
def test_add_many_files_performance(self) -> None:
31+
tf = TestFiles(test_files=[])
32+
for i in range(100):
33+
test_file = TestFile(
34+
instrumented_behavior_file_path=Path(f"/tmp/test_behavior_{i}.py"),
35+
benchmarking_file_path=Path(f"/tmp/test_perf_{i}.py"),
36+
test_type=TestType.GENERATED_REGRESSION,
37+
)
38+
tf.add(test_file)
39+
40+
assert len(tf.test_files) == 100
41+
assert len(tf._seen_paths) == 100
42+
# Verify all paths are unique in the set
43+
expected_paths = {Path(f"/tmp/test_behavior_{i}.py") for i in range(100)}
44+
assert tf._seen_paths == expected_paths

0 commit comments

Comments
 (0)