From c1e4b25031389ad590cf757a87618f80bcfaaf21 Mon Sep 17 00:00:00 2001 From: nicklafleur <55208706+nicklafleur@users.noreply.github.com> Date: Fri, 5 Jun 2026 20:21:09 -0400 Subject: [PATCH 1/8] feat: per-function hashing for incremental cache invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a source file changes, only re-test mutants in functions whose AST hash changed; preserve prior results for unchanged functions in the same file. - compute_function_hashes / _compute_mutated_function_hashes in file_mutation.py: class-qualified mangled keys (x_foo / xǁClassǁmethod) -> 12-char sha256 of the function AST. Methods and nested-class methods are indexed under the same key the merge looks up, closing the latent silent-preservation bug for changed methods. - mutate_file_contents returns a 3-tuple (code, names, hashes). - SourceFileMutationData gains hash_by_function_name, persisted in .meta with a pop-with-default so old files still load. - create_mutants_for_file: mtime short-circuit now preserves all prior results instead of resetting them; on a real change, load-and-merge compares new hashes against old, resets only changed/unhashed mutants, and preserves the rest. - Tests: update all mutate_file_contents unpack sites; add tests for hash stability, body-change detection, comment-insensitivity, method key inclusion, two-function preserve/reset integration, and the method regression guard. --- src/mutmut/__main__.py | 38 ++--- src/mutmut/mutation/data.py | 3 + src/mutmut/mutation/file_mutation.py | 56 ++++++- tests/mutation/test_mutation.py | 193 +++++++++++++++++++++++- tests/mutation/test_mutation_runtime.py | 10 +- tests/test_mutation regression.py | 2 +- 6 files changed, 273 insertions(+), 29 deletions(-) diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py index fb6dfc4c..b57963d8 100644 --- a/src/mutmut/__main__.py +++ b/src/mutmut/__main__.py @@ -295,13 +295,6 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe # source_mtime == mutant_mtime: only copied, otherwise the mutant file is untouched # source_mtime < mutant_mtime: the mutations have been saved after copying; source file untouched if source_mtime < mutant_mtime: - # reset the mutation stats - source_file_mutation_data = SourceFileMutationData(path=filename) - source_file_mutation_data.load() - for key in source_file_mutation_data.exit_code_by_key: - source_file_mutation_data.exit_code_by_key[key] = None - source_file_mutation_data.save() - return FileMutationResult(unmodified=True) except OSError: pass @@ -311,12 +304,12 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe with open(output_path, "w") as out: try: - mutant_names = write_all_mutants_to_file(out=out, source=source, filename=filename) + mutant_names, hash_by_function_name = write_all_mutants_to_file(out=out, source=source, filename=filename) except cst.ParserSyntaxError as e: # if libcst cannot parse it, then copy the source without any mutations warnings.append(SyntaxWarning(f"Unsupported syntax in {filename} ({str(e)}), skipping")) out.write(source) - mutant_names = [] + mutant_names, hash_by_function_name = [], {} # validate no syntax errors of mutants with open(output_path) as f: @@ -327,22 +320,33 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe invalid_syntax_error.__cause__ = e return FileMutationResult(warnings=warnings, error=invalid_syntax_error) - source_file_mutation_data = SourceFileMutationData(path=filename) - source_file_mutation_data.exit_code_by_key = { - get_mutant_name(filename, mutant_name): None for mutant_name in mutant_names - } - source_file_mutation_data.save() + data = SourceFileMutationData(path=filename) + data.load() + old_hashes = data.hash_by_function_name + changed = {f for f, h in hash_by_function_name.items() if old_hashes.get(f) != h} + + merged: dict[str, int | None] = {} + for name in mutant_names: + key = get_mutant_name(filename, name) + func = mangled_name_from_mutant_name(key).rpartition(".")[2] + if func not in hash_by_function_name or func in changed: + merged[key] = None + else: + merged[key] = data.exit_code_by_key.get(key) + data.exit_code_by_key = merged + data.hash_by_function_name = hash_by_function_name + data.save() return FileMutationResult(warnings=warnings) -def write_all_mutants_to_file(*, out: TextIOBase, source: str, filename: Path) -> Sequence[str]: - result, mutant_names = mutate_file_contents( +def write_all_mutants_to_file(*, out: TextIOBase, source: str, filename: Path) -> tuple[Sequence[str], dict[str, str]]: + result, mutant_names, hash_by_function_name = mutate_file_contents( str(filename), source, get_covered_lines_for_file(str(filename), mutmut._covered_lines) ) out.write(result) - return mutant_names + return mutant_names, hash_by_function_name def unused(*_: object) -> None: diff --git a/src/mutmut/mutation/data.py b/src/mutmut/mutation/data.py index 46a1f51b..fc7f583a 100644 --- a/src/mutmut/mutation/data.py +++ b/src/mutmut/mutation/data.py @@ -12,6 +12,7 @@ def __init__(self, *, path: Path | str) -> None: self.meta_path = Path("mutants") / (str(path) + ".meta") self.key_by_pid: dict[int, str] = {} self.exit_code_by_key: dict[str, int | None] = {} + self.hash_by_function_name: dict[str, str] = {} self.durations_by_key: dict[str, float] = {} self.start_time_by_pid: dict[int, datetime] = {} self.type_check_error_by_key: dict[str, str | None] = {} @@ -24,6 +25,7 @@ def load(self) -> None: return self.exit_code_by_key = meta.pop("exit_code_by_key") + self.hash_by_function_name = meta.pop("hash_by_function_name", {}) self.type_check_error_by_key = meta.pop("type_check_error_by_key", {}) self.durations_by_key = meta.pop("durations_by_key") self.estimated_time_of_tests_by_mutant = meta.pop("estimated_durations_by_key") @@ -52,6 +54,7 @@ def save(self) -> None: json.dump( { "exit_code_by_key": self.exit_code_by_key, + "hash_by_function_name": self.hash_by_function_name, "type_check_error_by_key": self.type_check_error_by_key, "durations_by_key": self.durations_by_key, "estimated_durations_by_key": self.estimated_time_of_tests_by_mutant, diff --git a/src/mutmut/mutation/file_mutation.py b/src/mutmut/mutation/file_mutation.py index e654c931..17422e0f 100644 --- a/src/mutmut/mutation/file_mutation.py +++ b/src/mutmut/mutation/file_mutation.py @@ -1,6 +1,9 @@ """This module contains code for managing mutant creation for whole files.""" +import ast +import hashlib from collections import defaultdict +from collections.abc import Callable from collections.abc import Iterable from collections.abc import Mapping from collections.abc import Sequence @@ -32,6 +35,49 @@ NEVER_MUTATE_FUNCTION_CALLS = {"len", "isinstance"} +def compute_function_hashes(source_code: str, accept: Callable[[str], bool] | None = None) -> dict[str, str]: + """class-qualified mangled key (x_foo / xǁClassǁmethod) -> 12-char sha256 of the func AST.""" + try: + tree = ast.parse(source_code) + except SyntaxError: + return {} + hashes: dict[str, str] = {} + + def _visit(stmts: list[ast.stmt], class_name: str = "") -> None: + for node in stmts: + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): + key = mangle_function_name(name=node.name, class_name=class_name or None) + if accept is None or accept(key): + hashes[key] = hashlib.sha256(ast.dump(node, annotate_fields=False).encode()).hexdigest()[:12] + elif isinstance(node, ast.ClassDef): + _visit(node.body, node.name if not class_name else f"{class_name}.{node.name}") + + _visit(tree.body) + return hashes + + +def _compute_mutated_function_hashes( + source_code: str, module: cst.Module, mutations: Sequence["Mutation"] +) -> dict[str, str]: + key_by_node: dict[cst.FunctionDef, str] = {} + + def _index(body: Sequence[cst.CSTNode], class_name: str = "") -> None: + for stmt in body: + if isinstance(stmt, cst.FunctionDef): + key_by_node[stmt] = mangle_function_name(name=stmt.name.value, class_name=class_name or None) + elif isinstance(stmt, cst.ClassDef) and isinstance(stmt.body, cst.IndentedBlock): + _index(stmt.body.body, stmt.name.value if not class_name else f"{class_name}.{stmt.name.value}") + + _index(module.body) + mutated = { + key_by_node[m.contained_by_top_level_function] + for m in mutations + if isinstance(m.contained_by_top_level_function, cst.FunctionDef) + and m.contained_by_top_level_function in key_by_node + } + return compute_function_hashes(source_code, accept=lambda key: key in mutated) + + @dataclass class Mutation: original_node: cst.CSTNode @@ -39,17 +85,19 @@ class Mutation: contained_by_top_level_function: cst.FunctionDef | None -def mutate_file_contents(filename: str, code: str, covered_lines: set[int] | None = None) -> tuple[str, Sequence[str]]: +def mutate_file_contents( + filename: str, code: str, covered_lines: set[int] | None = None +) -> tuple[str, Sequence[str], dict[str, str]]: """Create mutations for `code` and merge them to a single mutated file with trampolines. - :return: A tuple of (mutated code, list of mutant function names).""" + :return: A tuple of (mutated code, list of mutant function names, hash by function name).""" module, mutations, ignored_classes, ignored_functions = create_mutations(filename, code, covered_lines) mutated_code, mutant_names = combine_mutations_to_source(module, mutations, ignored_classes, ignored_functions) - # TODO: implement function hashing to skip testing unchanged functions + hash_by_function_name = _compute_mutated_function_hashes(code, module, mutations) - return mutated_code, mutant_names + return mutated_code, mutant_names, hash_by_function_name def create_mutations( diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py index 38687ed2..9fbcf47b 100644 --- a/tests/mutation/test_mutation.py +++ b/tests/mutation/test_mutation.py @@ -1,4 +1,6 @@ import os +import pathlib +import tempfile from unittest.mock import Mock from unittest.mock import patch @@ -8,12 +10,16 @@ from mutmut.__main__ import CatchOutput from mutmut.__main__ import MutmutProgrammaticFailException from mutmut.__main__ import get_diff_for_mutant +from mutmut.__main__ import mangled_name_from_mutant_name from mutmut.__main__ import orig_function_and_class_names_from_key from mutmut.__main__ import run_forced_fail_test +from mutmut.mutation.data import SourceFileMutationData +from mutmut.mutation.file_mutation import compute_function_hashes from mutmut.mutation.file_mutation import create_mutations from mutmut.mutation.file_mutation import mutate_file_contents from mutmut.mutation.trampoline_templates import CLASS_NAME_SEPARATOR from mutmut.mutation.trampoline_templates import mangle_function_name +from mutmut.utils.format_utils import get_mutant_name def mutants_for_source(source: str, covered_lines: set[int] | None = None) -> list[str]: @@ -24,7 +30,7 @@ def mutants_for_source(source: str, covered_lines: set[int] | None = None) -> li def mutated_module(source: str) -> str: - mutated_code, _ = mutate_file_contents("", source) + mutated_code, _, _ = mutate_file_contents("", source) return mutated_code @@ -794,7 +800,7 @@ def member(self): """.strip() - mutants_source, mutant_names = mutate_file_contents("filename", source) + mutants_source, mutant_names, _ = mutate_file_contents("filename", source) assert len(mutant_names) == 2 diff1 = get_diff_for_mutant(mutant_name=mutant_names[0], source=mutants_source, path="test.py").strip() @@ -1004,3 +1010,186 @@ def inner(): mutants = mutants_for_source(source) assert mutants == [expected] + + +# --- function hashing tests --- + + +def test_compute_function_hashes_module_level(): + source = """ +def foo(): + return 1 + +def bar(): + return 2 +""".strip() + hashes = compute_function_hashes(source) + assert "x_foo" in hashes + assert "x_bar" in hashes + assert len(hashes["x_foo"]) == 12 + assert hashes["x_foo"] != hashes["x_bar"] + + +def test_compute_function_hashes_stable(): + source = "def foo():\n return 1\n" + assert compute_function_hashes(source) == compute_function_hashes(source) + + +def test_compute_function_hashes_changes_on_body_change(): + source1 = "def foo():\n return 1\n" + source2 = "def foo():\n return 2\n" + assert compute_function_hashes(source1)["x_foo"] != compute_function_hashes(source2)["x_foo"] + + +def test_compute_function_hashes_insensitive_to_comments(): + source1 = "def foo():\n return 1\n" + source2 = "def foo():\n # a comment\n return 1\n" + # ast.dump ignores comments, so hashes must be equal + assert compute_function_hashes(source1)["x_foo"] == compute_function_hashes(source2)["x_foo"] + + +def test_compute_function_hashes_includes_methods(): + source = """ +class Foo: + def bar(self): + return 1 +""".strip() + hashes = compute_function_hashes(source) + from mutmut.mutation.trampoline_templates import CLASS_NAME_SEPARATOR + + method_key = f"x{CLASS_NAME_SEPARATOR}Foo{CLASS_NAME_SEPARATOR}bar" + assert method_key in hashes + + +def test_mutate_file_contents_returns_hashes_for_mutated_functions(): + source = """ +def foo(): + return 1 + +def bar(): + return 2 +""".strip() + _, mutant_names, hashes = mutate_file_contents("test.py", source) + assert mutant_names + # every mutated function appears in the hashes + + for name in mutant_names: + func = mangled_name_from_mutant_name(name).rpartition(".")[2] + assert func in hashes, f"{func!r} not in hashes {set(hashes)}" + + +def test_hashing_preserves_unchanged_function_results(): + """Unchanged function's mutants keep prior results; changed function's reset.""" + source_v1 = """ +def foo(): + return 1 + +def bar(): + return 2 +""".strip() + source_v2 = """ +def foo(): + return 99 + +def bar(): + return 2 +""".strip() + + with tempfile.TemporaryDirectory() as tmp: + os.makedirs(os.path.join(tmp, "mutants"), exist_ok=True) + + src_path = pathlib.Path(tmp) / "mymod.py" + src_path.write_text(source_v1) + + _, mutant_names_v1, hashes_v1 = mutate_file_contents("mymod.py", source_v1) + + data = SourceFileMutationData(path=src_path) + data.exit_code_by_key = {} + for name in mutant_names_v1: + key = get_mutant_name(src_path, name) + data.exit_code_by_key[key] = 1 # fake "killed" + data.hash_by_function_name = hashes_v1 + data.meta_path = pathlib.Path(tmp) / "mutants" / (str(src_path) + ".meta") + data.meta_path.parent.mkdir(parents=True, exist_ok=True) + data.save() + + # simulate second run with foo changed + _, mutant_names_v2, hashes_v2 = mutate_file_contents("mymod.py", source_v2) + + prior = SourceFileMutationData(path=src_path) + prior.meta_path = data.meta_path + prior.load() + old_hashes = prior.hash_by_function_name + changed = {f for f, h in hashes_v2.items() if old_hashes.get(f) != h} + + merged: dict = {} + for name in mutant_names_v2: + key = get_mutant_name(src_path, name) + func = mangled_name_from_mutant_name(key).rpartition(".")[2] + if func not in hashes_v2 or func in changed: + merged[key] = None + else: + merged[key] = prior.exit_code_by_key.get(key) + + foo_keys = [k for k in merged if "x_foo" in mangled_name_from_mutant_name(k)] + bar_keys = [k for k in merged if "x_bar" in mangled_name_from_mutant_name(k)] + + assert foo_keys, "expected foo mutants" + assert bar_keys, "expected bar mutants" + assert all(merged[k] is None for k in foo_keys), "foo changed — should reset" + assert all(merged[k] == 1 for k in bar_keys), "bar unchanged — should preserve" + + +def test_hashing_resets_changed_method(monkeypatch): + """A changed class method's mutants must be reset, not silently preserved.""" + source_v1 = """ +class Foo: + def method(self): + return 1 +""".strip() + source_v2 = """ +class Foo: + def method(self): + return 99 +""".strip() + + with tempfile.TemporaryDirectory() as tmp: + os.makedirs(os.path.join(tmp, "mutants"), exist_ok=True) + + src_path = pathlib.Path(tmp) / "mymod.py" + + _, mutant_names_v1, hashes_v1 = mutate_file_contents("mymod.py", source_v1) + assert mutant_names_v1, "expected at least one method mutant" + + data = SourceFileMutationData(path=src_path) + data.exit_code_by_key = {} + for name in mutant_names_v1: + key = get_mutant_name(src_path, name) + data.exit_code_by_key[key] = 1 + data.hash_by_function_name = hashes_v1 + data.meta_path = pathlib.Path(tmp) / "mutants" / (str(src_path) + ".meta") + data.meta_path.parent.mkdir(parents=True, exist_ok=True) + data.save() + + _, mutant_names_v2, hashes_v2 = mutate_file_contents("mymod.py", source_v2) + + prior = SourceFileMutationData(path=src_path) + prior.meta_path = data.meta_path + prior.load() + old_hashes = prior.hash_by_function_name + changed = {f for f, h in hashes_v2.items() if old_hashes.get(f) != h} + + merged: dict = {} + for name in mutant_names_v2: + key = get_mutant_name(src_path, name) + func = mangled_name_from_mutant_name(key).rpartition(".")[2] + if func not in hashes_v2 or func in changed: + merged[key] = None + else: + merged[key] = prior.exit_code_by_key.get(key) + + assert merged, "expected merged mutants" + assert all(v is None for v in merged.values()), ( + "method changed — all mutants should be reset, but some were preserved: " + + str({k: v for k, v in merged.items() if v is not None}) + ) diff --git a/tests/mutation/test_mutation_runtime.py b/tests/mutation/test_mutation_runtime.py index 5d97b547..e51521db 100644 --- a/tests/mutation/test_mutation_runtime.py +++ b/tests/mutation/test_mutation_runtime.py @@ -20,7 +20,7 @@ def describe(self): return self.name.lower() """.strip() - mutated_code, mutant_names = mutate_file_contents("test.py", source) + mutated_code, mutant_names, _ = mutate_file_contents("test.py", source) assert len(mutant_names) > 0, "Should have at least one mutant" monkeypatch.setenv("MUTANT_UNDER_TEST", "none") @@ -65,7 +65,7 @@ def from_name(cls, name: str) -> "Color": return vals[name] """.strip() - mutated_code, mutant_names = mutate_file_contents("test.py", source) + mutated_code, mutant_names, _ = mutate_file_contents("test.py", source) assert len(mutant_names) > 0, "Should have at least one mutant" monkeypatch.setenv("MUTANT_UNDER_TEST", "none") @@ -88,7 +88,7 @@ def add(a, b): return a + b """.strip() - mutated_code, mutant_names = mutate_file_contents("test.py", source) + mutated_code, mutant_names, _ = mutate_file_contents("test.py", source) assert len(mutant_names) > 0, "Should have at least one mutant" monkeypatch.setenv("MUTANT_UNDER_TEST", "none") @@ -117,7 +117,7 @@ def __init__(self, value): self.value = value """.strip() - mutated_code, mutant_names = mutate_file_contents("test.py", source) + mutated_code, mutant_names, _ = mutate_file_contents("test.py", source) assert len(mutant_names) > 0, "Should have at least one mutant" monkeypatch.setenv("MUTANT_UNDER_TEST", "none") @@ -140,7 +140,7 @@ def foo(a: int, b: int = 2): return a + b """.strip() - mutated_code, mutant_names = mutate_file_contents("test.py", source) + mutated_code, mutant_names, _ = mutate_file_contents("test.py", source) assert len(mutant_names) > 0, "Should have at least one mutant" monkeypatch.setenv("MUTANT_UNDER_TEST", "none") diff --git a/tests/test_mutation regression.py b/tests/test_mutation regression.py index 1f2958a9..81766649 100644 --- a/tests/test_mutation regression.py +++ b/tests/test_mutation regression.py @@ -48,7 +48,7 @@ def default(cls) -> "Color": print(Adder(1).add(2))""" - src, _ = mutate_file_contents("file.py", source) + src, _, _ = mutate_file_contents("file.py", source) assert src == snapshot("""\ from __future__ import division From 2f3e3f88f3222d192b17a15f02a8c015a4f0df8f Mon Sep 17 00:00:00 2001 From: nicklafleur <55208706+nicklafleur@users.noreply.github.com> Date: Thu, 4 Jun 2026 22:43:54 -0400 Subject: [PATCH 2/8] feat: cross-call dependency tracking for incremental stats invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Records caller->callee edges at stats collection time so stale outgoing call edges can be cleared when a callee's code changes. - state.py: MutmutState singleton holding old_function_hashes, current_function_hashes, and function_dependencies (callee → callers). - core.py: MutmutCallStack ContextVar propagates caller context through call chains. - trampoline.py stats branch: resolves caller via MutmutCallStack, passes it to record_trampoline_hit, sets updated context for inner calls, respects MUTMUT_DEPENDENCY_DEPTH env ceiling. - record_trampoline_hit gains caller param; upstream's source-path- resolving max_stack_depth walk preserved verbatim; dependency edge written only when track_dependencies=True. - FileMutationResult gains changed_functions/current_hashes (deferred from commit 1); create_mutants accumulates current_hashes into state().current_function_hashes across worker results. - create_mutants_for_file builds module-qualified current_hashes and changed_functions for return to parent. - load_stats/save_stats persist function_hashes and function_dependencies alongside existing test associations (backwards-compatible pop-with- default on load). - _cleanup_stale_stats: removes test associations and dependency edges for modules absent from current_function_hashes. - _invalidate_stale_dependency_edges: clears changed functions from all caller sets so stale outgoing edges are rebuilt on next stats run. - collect_or_load_stats: on incremental load, runs cleanup always and invalidation when track_dependencies; persists the result. - Config gains track_dependencies (default True) and dependency_tracking_depth (default None); run_stats_collection sets MUTMUT_DEPENDENCY_DEPTH from config. - Tests: record_trampoline_hit with/without track_dependencies, _cleanup_stale_stats removes unknown modules, _invalidate_stale_ dependency_edges clears changed callers and no-ops on first run, config defaults asserted. --- src/mutmut/__init__.py | 2 + src/mutmut/__main__.py | 77 ++++++++++++++++++++++- src/mutmut/configuration.py | 4 ++ src/mutmut/core.py | 21 +++++++ src/mutmut/mutation/trampoline.py | 15 ++++- src/mutmut/state.py | 25 ++++++++ src/mutmut/utils/format_utils.py | 13 ++++ tests/mutation/test_mutation.py | 100 ++++++++++++++++++++++++++++++ tests/test_configuration.py | 4 ++ 9 files changed, 256 insertions(+), 5 deletions(-) create mode 100644 src/mutmut/core.py create mode 100644 src/mutmut/state.py diff --git a/src/mutmut/__init__.py b/src/mutmut/__init__.py index d50ba1b6..51026469 100644 --- a/src/mutmut/__init__.py +++ b/src/mutmut/__init__.py @@ -5,6 +5,7 @@ from collections import defaultdict from mutmut.configuration import Config +from mutmut.state import reset_state __version__ = importlib.metadata.version("mutmut") @@ -40,3 +41,4 @@ def _reset_globals() -> None: _stats = set() tests_by_mangled_function_name = defaultdict(set) _covered_lines = None + reset_state() diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py index b57963d8..862acef9 100644 --- a/src/mutmut/__main__.py +++ b/src/mutmut/__main__.py @@ -8,7 +8,9 @@ from typing import TYPE_CHECKING from typing import Any +from mutmut.state import state from mutmut.utils.file_utils import change_cwd +from mutmut.utils.format_utils import get_module_from_key from mutmut.utils.format_utils import get_mutant_name from mutmut.utils.format_utils import strip_prefix @@ -114,7 +116,7 @@ exit_code_to_emoji = {exit_code: emoji_by_status[status] for exit_code, status in status_by_exit_code.items()} -def record_trampoline_hit(name: str) -> None: +def record_trampoline_hit(name: str, caller: str | None = None) -> None: assert not name.startswith("src."), "Failed trampoline hit. Module name starts with `src.`, which is invalid" source_paths = [p.resolve(strict=True) for p in Config.get().source_paths] @@ -136,6 +138,8 @@ def record_trampoline_hit(name: str) -> None: return mutmut._stats.add(name) + if caller is not None and Config.get().track_dependencies: + state().function_dependencies[name].add(caller) def walk_all_files() -> Iterator[tuple[str, str]]: @@ -209,6 +213,8 @@ class FileMutationResult: error: Exception | None = None unmodified: bool = False ignored: bool = False + changed_functions: set[str] | None = None + current_hashes: dict[str, str] | None = None @dataclass @@ -232,6 +238,8 @@ def create_mutants(max_children: int) -> MutantGenerationStats: stats.ignored += 1 else: stats.mutated += 1 + if result.current_hashes: + state().current_function_hashes.update(result.current_hashes) return stats @@ -337,7 +345,17 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe data.hash_by_function_name = hash_by_function_name data.save() - return FileMutationResult(warnings=warnings) + module_name = strip_prefix(str(filename)[: -len(filename.suffix)].replace(os.sep, "."), prefix="src.") + current_hashes_qualified = { + f"{module_name}.{func}".replace(".__init__.", "."): h for func, h in hash_by_function_name.items() + } + changed_functions_qualified = {f"{module_name}.{func}".replace(".__init__.", ".") for func in changed} + + return FileMutationResult( + warnings=warnings, + changed_functions=changed_functions_qualified, + current_hashes=current_hashes_qualified, + ) def write_all_mutants_to_file(*, out: TextIOBase, source: str, filename: Path) -> tuple[Sequence[str], dict[str, str]]: @@ -723,6 +741,8 @@ def run_stats_collection(runner: TestRunner, tests: Iterable[str] | None = None) os.environ["MUTANT_UNDER_TEST"] = "stats" os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1" + depth = Config.get().dependency_tracking_depth + os.environ["MUTMUT_DEPENDENCY_DEPTH"] = str(depth) if depth is not None else "-1" start_cpu_time = process_time() with CatchOutput(spinner_title="Running stats") as output_catcher: @@ -758,13 +778,59 @@ def run_stats_collection(runner: TestRunner, tests: Iterable[str] | None = None) save_stats() -def collect_or_load_stats(runner: TestRunner) -> None: +def _cleanup_stale_stats() -> None: + valid_modules = {get_module_from_key(key) for key in state().current_function_hashes} + + def _is_valid_key(key: str) -> bool: + return get_module_from_key(key) in valid_modules + + stale_keys = [k for k in mutmut.tests_by_mangled_function_name if not _is_valid_key(k)] + for k in stale_keys: + del mutmut.tests_by_mangled_function_name[k] + + stale_dep_keys = [k for k in state().function_dependencies if not _is_valid_key(k)] + for k in stale_dep_keys: + del state().function_dependencies[k] + + for _, callers in state().function_dependencies.items(): + callers -= {c for c in callers if not _is_valid_key(c)} + + +def _invalidate_stale_dependency_edges() -> set[str]: + old_hashes = state().old_function_hashes + new_hashes = state().current_function_hashes + + if not old_hashes: + return set() + + all_functions = old_hashes.keys() | new_hashes.keys() + changed_functions = {f for f in all_functions if old_hashes.get(f) != new_hashes.get(f)} + + if not changed_functions: + return set() + + for callers in state().function_dependencies.values(): + callers -= changed_functions + + deleted_functions = old_hashes.keys() - new_hashes.keys() + for f in deleted_functions: + state().function_dependencies.pop(f, None) + + return changed_functions + + +def collect_or_load_stats(runner: TestRunner, invalidate_stale_callers: bool = True) -> None: did_load = load_stats() if not did_load: # Run full stats run_stats_collection(runner) else: + _cleanup_stale_stats() + if Config.get().track_dependencies and invalidate_stale_callers: + _invalidate_stale_dependency_edges() + save_stats() + # Run incremental stats with CatchOutput(spinner_title="Listing all tests") as output_catcher: os.environ["MUTANT_UNDER_TEST"] = "list_all_tests" @@ -793,6 +859,9 @@ def load_stats() -> bool: mutmut.tests_by_mangled_function_name[k] |= set(v) mutmut.duration_by_test = data.pop("duration_by_test") mutmut.stats_time = data.pop("stats_time") + state().old_function_hashes = data.pop("function_hashes", {}) + for k, v in data.pop("function_dependencies", {}).items(): + state().function_dependencies[k] = set(v) assert not data, data did_load = True except (FileNotFoundError, JSONDecodeError): @@ -807,6 +876,8 @@ def save_stats() -> None: tests_by_mangled_function_name={k: list(v) for k, v in mutmut.tests_by_mangled_function_name.items()}, duration_by_test=mutmut.duration_by_test, stats_time=mutmut.stats_time, + function_hashes=state().current_function_hashes, + function_dependencies={k: list(v) for k, v in state().function_dependencies.items()}, ), f, indent=4, diff --git a/src/mutmut/configuration.py b/src/mutmut/configuration.py index 3f581bd9..9145d4f8 100644 --- a/src/mutmut/configuration.py +++ b/src/mutmut/configuration.py @@ -142,6 +142,8 @@ def _load_config() -> Config: use_setproctitle=s( "use_setproctitle", not platform.system() == "Darwin" ), # False on Mac, true otherwise as default (https://github.com/boxed/mutmut/pull/450#issuecomment-4002571055) + track_dependencies=s("track_dependencies", True), + dependency_tracking_depth=s("dependency_tracking_depth", None), ) @@ -164,6 +166,8 @@ class Config: timeout_constant: float type_check_command: list[str] use_setproctitle: bool + track_dependencies: bool + dependency_tracking_depth: int | None def should_mutate(self, path: Path | str) -> bool: return self._should_include_for_mutation(path) and not self._should_ignore_for_mutation(path) diff --git a/src/mutmut/core.py b/src/mutmut/core.py new file mode 100644 index 00000000..4e42db7a --- /dev/null +++ b/src/mutmut/core.py @@ -0,0 +1,21 @@ +from contextvars import ContextVar +from contextvars import Token +from typing import ClassVar + + +class MutmutCallStack: + """Async-compatible call context for dependency tracking.""" + + _ctx: ClassVar[ContextVar[tuple[str | None, int]]] = ContextVar("_mutmut_call_context", default=(None, 0)) + + @classmethod + def get(cls) -> tuple[str | None, int]: + return cls._ctx.get() + + @classmethod + def set(cls, value: tuple[str, int]) -> Token[tuple[str | None, int]]: + return cls._ctx.set(value) + + @classmethod + def reset(cls, token: Token[tuple[str | None, int]]) -> None: + cls._ctx.reset(token) diff --git a/src/mutmut/mutation/trampoline.py b/src/mutmut/mutation/trampoline.py index 3e761ff5..4d470180 100644 --- a/src/mutmut/mutation/trampoline.py +++ b/src/mutmut/mutation/trampoline.py @@ -10,6 +10,7 @@ from mutmut.__main__ import MutmutProgrammaticFailException from mutmut.__main__ import mangled_name_from_mutant_name from mutmut.__main__ import record_trampoline_hit +from mutmut.core import MutmutCallStack TReturn = TypeVar("TReturn") MutantDict = Annotated[dict[str, Callable[..., TReturn]], "Mutant"] @@ -57,8 +58,18 @@ def trampoline(*args: P.args, **kwargs: P.kwargs) -> R: ) if mutant_under_test == "stats": - record_trampoline_hit(f"{orig_func.__module__}.{mangled_name_from_mutant_name(orig_func.__name__)}") - return orig_func(*call_args, **kwargs) + orig_qual_name = f"{orig_func.__module__}.{mangled_name_from_mutant_name(orig_func.__name__)}" + caller_name, depth = MutmutCallStack.get() + max_depth = int(os.environ.get("MUTMUT_DEPENDENCY_DEPTH", "-1")) + if max_depth == -1 or depth < max_depth: + record_trampoline_hit(orig_qual_name, caller=caller_name) + token = MutmutCallStack.set((orig_qual_name, depth + 1)) + try: + return orig_func(*call_args, **kwargs) + finally: + MutmutCallStack.reset(token) + else: + return orig_func(*call_args, **kwargs) # mutant under test is {module}.{mutant_name} module, _, mutant_name = mutant_under_test.rpartition(".") diff --git a/src/mutmut/state.py b/src/mutmut/state.py new file mode 100644 index 00000000..6a774e31 --- /dev/null +++ b/src/mutmut/state.py @@ -0,0 +1,25 @@ +from collections import defaultdict +from dataclasses import dataclass +from dataclasses import field + + +@dataclass +class MutmutState: + old_function_hashes: dict[str, str] = field(default_factory=dict) + current_function_hashes: dict[str, str] = field(default_factory=dict) + function_dependencies: defaultdict[str, set[str]] = field(default_factory=lambda: defaultdict(set)) + + +_state: MutmutState | None = None + + +def state() -> MutmutState: + global _state + if _state is None: + _state = MutmutState() + return _state + + +def reset_state() -> None: + global _state + _state = None diff --git a/src/mutmut/utils/format_utils.py b/src/mutmut/utils/format_utils.py index a1228719..19b82ae4 100644 --- a/src/mutmut/utils/format_utils.py +++ b/src/mutmut/utils/format_utils.py @@ -45,6 +45,19 @@ def strip_prefix(s: str, *, prefix: str, strict: bool = False) -> str: return s +def get_module_from_key(key: str) -> str: + """Extract module name from a mangled function key like 'app.foo.x_bar'. + + The function name starts with 'x_' or 'xǁ', so we find that part + and return everything before it as the module path. + """ + parts = key.split(".") + for i in range(len(parts) - 1, -1, -1): + if parts[i].startswith("x_") or parts[i].startswith("xǁ"): + return ".".join(parts[:i]) + return key.rsplit(".", 1)[0] if "." in key else key + + def get_mutant_name(relative_source_path: Path, mutant_method_name: str) -> str: module_name = str(relative_source_path)[: -len(relative_source_path.suffix)].replace(os.sep, ".") module_name = strip_prefix(module_name, prefix="src.") diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py index 9fbcf47b..7aa18ce1 100644 --- a/tests/mutation/test_mutation.py +++ b/tests/mutation/test_mutation.py @@ -1,24 +1,32 @@ import os import pathlib import tempfile +from collections import defaultdict from unittest.mock import Mock from unittest.mock import patch import libcst as cst import pytest +import mutmut from mutmut.__main__ import CatchOutput from mutmut.__main__ import MutmutProgrammaticFailException +from mutmut.__main__ import _cleanup_stale_stats +from mutmut.__main__ import _invalidate_stale_dependency_edges from mutmut.__main__ import get_diff_for_mutant from mutmut.__main__ import mangled_name_from_mutant_name from mutmut.__main__ import orig_function_and_class_names_from_key +from mutmut.__main__ import record_trampoline_hit from mutmut.__main__ import run_forced_fail_test +from mutmut.configuration import Config from mutmut.mutation.data import SourceFileMutationData from mutmut.mutation.file_mutation import compute_function_hashes from mutmut.mutation.file_mutation import create_mutations from mutmut.mutation.file_mutation import mutate_file_contents from mutmut.mutation.trampoline_templates import CLASS_NAME_SEPARATOR from mutmut.mutation.trampoline_templates import mangle_function_name +from mutmut.state import reset_state +from mutmut.state import state from mutmut.utils.format_utils import get_mutant_name @@ -1193,3 +1201,95 @@ def method(self): "method changed — all mutants should be reset, but some were preserved: " + str({k: v for k, v in merged.items() if v is not None}) ) + + +# --- dependency tracking tests --- + + +def test_record_trampoline_hit_records_caller(monkeypatch): + """record_trampoline_hit(name, caller=...) stores the edge in function_dependencies.""" + + reset_state() + mutmut._stats.clear() + + cfg = Mock(spec=Config) + cfg.max_stack_depth = -1 + cfg.source_paths = [] + cfg.track_dependencies = True + monkeypatch.setattr(Config, "get", lambda: cfg) + + record_trampoline_hit("my_module.x_foo", caller="my_module.x_bar") + + assert "my_module.x_bar" in state().function_dependencies["my_module.x_foo"] + reset_state() + + +def test_record_trampoline_hit_skips_caller_when_disabled(monkeypatch): + """record_trampoline_hit does not record dependencies when track_dependencies=False.""" + + reset_state() + mutmut._stats.clear() + + cfg = Mock(spec=Config) + cfg.max_stack_depth = -1 + cfg.source_paths = [] + cfg.track_dependencies = False + monkeypatch.setattr(Config, "get", lambda: cfg) + + record_trampoline_hit("my_module.x_foo", caller="my_module.x_bar") + + assert "my_module.x_foo" not in state().function_dependencies + reset_state() + + +def test_cleanup_stale_stats_removes_unknown_modules(monkeypatch): + """_cleanup_stale_stats removes test associations for modules not in current_function_hashes.""" + + reset_state() + old_stats = mutmut.tests_by_mangled_function_name + mutmut.tests_by_mangled_function_name = defaultdict(set) + + state().current_function_hashes["live_mod.x_foo"] = "aabbcc" + mutmut.tests_by_mangled_function_name["live_mod.x_foo__mutmut_orig"] = {"test_alive"} + mutmut.tests_by_mangled_function_name["dead_mod.x_bar__mutmut_orig"] = {"test_dead"} + state().function_dependencies["live_mod.x_baz"] = {"dead_mod.x_bar"} + + _cleanup_stale_stats() + + assert "live_mod.x_foo__mutmut_orig" in mutmut.tests_by_mangled_function_name + assert "dead_mod.x_bar__mutmut_orig" not in mutmut.tests_by_mangled_function_name + assert "dead_mod.x_bar" not in state().function_dependencies["live_mod.x_baz"] + + mutmut.tests_by_mangled_function_name = old_stats + reset_state() + + +def test_invalidate_stale_dependency_edges_clears_changed_callers(): + """When B's hash changes, B is removed from all caller sets in function_dependencies.""" + + reset_state() + + state().function_dependencies["mod.x_c"] = {"mod.x_b", "mod.x_a"} + state().old_function_hashes["mod.x_b"] = "old" + state().current_function_hashes["mod.x_b"] = "new" + state().old_function_hashes["mod.x_a"] = "same" + state().current_function_hashes["mod.x_a"] = "same" + + changed = _invalidate_stale_dependency_edges() + + assert "mod.x_b" in changed + assert "mod.x_b" not in state().function_dependencies["mod.x_c"] + assert "mod.x_a" in state().function_dependencies["mod.x_c"] + reset_state() + + +def test_invalidate_stale_dependency_edges_no_old_hashes_returns_empty(): + """With no prior hashes (first run), nothing is invalidated.""" + + reset_state() + state().current_function_hashes["mod.x_foo"] = "abc" + + changed = _invalidate_stale_dependency_edges() + + assert changed == set() + reset_state() diff --git a/tests/test_configuration.py b/tests/test_configuration.py index b1451eb0..bb9feb5c 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -71,6 +71,8 @@ def _get_config(only_mutate: list[str], do_not_mutate: list[str]) -> Config: timeout_constant=1.0, type_check_command=[], use_setproctitle=False, + track_dependencies=True, + dependency_tracking_depth=None, ) def test_ignores_non_python_files(self): @@ -342,6 +344,8 @@ def test_uses_defaults_when_no_config(self, in_tmp_dir: Path): assert config.timeout_multiplier == 15.0 assert config.timeout_constant == 1.0 assert config.type_check_command == [] + assert config.track_dependencies is True + assert config.dependency_tracking_depth is None def test_also_copy_includes_defaults(self, in_tmp_dir: Path): (in_tmp_dir / "src").mkdir() From a526f6a661d8f3012d2d16577f5c7b6785e0d363 Mon Sep 17 00:00:00 2001 From: nicklafleur <55208706+nicklafleur@users.noreply.github.com> Date: Fri, 5 Jun 2026 21:17:38 -0400 Subject: [PATCH 3/8] e2e: add benchmark project with 1k mutants - Add e2e_projects/benchmark_1k/ with ~1000 mutants for testing - Includes modules: numbers, strings, booleans, operators, comparisons, arguments, returns, complex (recursion, higher-order functions) - Configurable delays via BENCHMARK_IMPORT_DELAY, BENCHMARK_CONFTEST_DELAY, BENCHMARK_TEST_DELAY environment variables to simulate the performance under variable test and startup runtimes. --- e2e_projects/benchmark_1k/README.md | 60 +++ .../benchmark_1k/benchmark_results.json | 503 ++++++++++++++++++ e2e_projects/benchmark_1k/mutmut_preload.txt | 8 + e2e_projects/benchmark_1k/pyproject.toml | 19 + e2e_projects/benchmark_1k/requirements.txt | 2 + e2e_projects/benchmark_1k/run_benchmark.py | 332 ++++++++++++ .../benchmark_1k/src/benchmark/__init__.py | 34 ++ .../benchmark_1k/src/benchmark/arguments.py | 71 +++ .../benchmark_1k/src/benchmark/booleans.py | 180 +++++++ .../benchmark_1k/src/benchmark/comparisons.py | 242 +++++++++ .../benchmark_1k/src/benchmark/complex.py | 240 +++++++++ .../benchmark_1k/src/benchmark/numbers.py | 69 +++ .../benchmark_1k/src/benchmark/operators.py | 129 +++++ .../benchmark_1k/src/benchmark/returns.py | 108 ++++ .../benchmark_1k/src/benchmark/strings.py | 199 +++++++ e2e_projects/benchmark_1k/tests/__init__.py | 0 e2e_projects/benchmark_1k/tests/conftest.py | 31 ++ .../benchmark_1k/tests/test_arguments.py | 56 ++ .../benchmark_1k/tests/test_booleans.py | 201 +++++++ .../benchmark_1k/tests/test_comparisons.py | 269 ++++++++++ .../benchmark_1k/tests/test_complex.py | 121 +++++ .../benchmark_1k/tests/test_numbers.py | 56 ++ .../benchmark_1k/tests/test_operators.py | 94 ++++ .../benchmark_1k/tests/test_returns.py | 72 +++ .../benchmark_1k/tests/test_strings.py | 142 +++++ 25 files changed, 3238 insertions(+) create mode 100644 e2e_projects/benchmark_1k/README.md create mode 100644 e2e_projects/benchmark_1k/benchmark_results.json create mode 100644 e2e_projects/benchmark_1k/mutmut_preload.txt create mode 100644 e2e_projects/benchmark_1k/pyproject.toml create mode 100644 e2e_projects/benchmark_1k/requirements.txt create mode 100644 e2e_projects/benchmark_1k/run_benchmark.py create mode 100644 e2e_projects/benchmark_1k/src/benchmark/__init__.py create mode 100644 e2e_projects/benchmark_1k/src/benchmark/arguments.py create mode 100644 e2e_projects/benchmark_1k/src/benchmark/booleans.py create mode 100644 e2e_projects/benchmark_1k/src/benchmark/comparisons.py create mode 100644 e2e_projects/benchmark_1k/src/benchmark/complex.py create mode 100644 e2e_projects/benchmark_1k/src/benchmark/numbers.py create mode 100644 e2e_projects/benchmark_1k/src/benchmark/operators.py create mode 100644 e2e_projects/benchmark_1k/src/benchmark/returns.py create mode 100644 e2e_projects/benchmark_1k/src/benchmark/strings.py create mode 100644 e2e_projects/benchmark_1k/tests/__init__.py create mode 100644 e2e_projects/benchmark_1k/tests/conftest.py create mode 100644 e2e_projects/benchmark_1k/tests/test_arguments.py create mode 100644 e2e_projects/benchmark_1k/tests/test_booleans.py create mode 100644 e2e_projects/benchmark_1k/tests/test_comparisons.py create mode 100644 e2e_projects/benchmark_1k/tests/test_complex.py create mode 100644 e2e_projects/benchmark_1k/tests/test_numbers.py create mode 100644 e2e_projects/benchmark_1k/tests/test_operators.py create mode 100644 e2e_projects/benchmark_1k/tests/test_returns.py create mode 100644 e2e_projects/benchmark_1k/tests/test_strings.py diff --git a/e2e_projects/benchmark_1k/README.md b/e2e_projects/benchmark_1k/README.md new file mode 100644 index 00000000..f5f4ab20 --- /dev/null +++ b/e2e_projects/benchmark_1k/README.md @@ -0,0 +1,60 @@ +# Benchmark 1K + +A synthetic benchmark project with 1000 mutants for validating mutmut's process isolation and hot-fork warmup strategy performance. + +**TL;DR:** +- `fork` is fastest and nearly immune to import delays (requires fork-safe libraries) +- `collect` (hot-fork default) is 2-9x faster than `import`/`none` depending on import cost +- Higher import delays dramatically penalize `import` and `none` strategies + + +## Mutant Distribution + +| Type | Total | Killed | Survived | Kill Rate | +|------------|-------|--------|----------|-----------| +| return | 221 | 161 | 60 | 73% | +| number | 159 | 99 | 60 | 62% | +| argument | 141 | 132 | 9 | 94% | +| string | 125 | 78 | 47 | 62% | +| boolean | 120 | 47 | 73 | 39% | +| comparison | 119 | 19 | 100 | 16% | +| operator | 115 | 90 | 25 | 78% | +| **Total** | **1000** | **626** | **374** | **63%** | + +## Usage + +### Run mutation testing + +```bash +cd e2e_projects/benchmark_1k +mutmut run +``` + +### Run benchmark comparison + +```bash +python run_benchmark.py +``` + +This runs `mutmut run` under each strategy (`fork`, `collect`, `import`, `none`) and outputs: +- Throughput (mutations/second) for each strategy +- Results saved to `benchmark_results.json` + +### View results + +```bash +cat mutants/summary.json | python -m json.tool +``` + +## Test Design + +Tests are fast unit tests with instant assertions. Configurable delays simulate real-world costs: + +- **Import delay**: Simulates library import time (Flask, SQLAlchemy, etc.) +- **Conftest delay**: Simulates fixture/plugin setup time +- **Test delay**: Per-test runtime with +/-10% gaussian jitter for realistic variance + +Usage: +```bash +python run_benchmark.py --test-delay 0.01 # Add 10ms per-test with jitter +``` diff --git a/e2e_projects/benchmark_1k/benchmark_results.json b/e2e_projects/benchmark_1k/benchmark_results.json new file mode 100644 index 00000000..aa9270e9 --- /dev/null +++ b/e2e_projects/benchmark_1k/benchmark_results.json @@ -0,0 +1,503 @@ +{ + "timestamp": "2026-01-26T19:02:52", + "python_version": "3.14.2", + "strategies": [ + "fork", + "collect", + "import", + "none" + ], + "delay_configs": [ + [ + 0.1, + 0.1 + ], + [ + 0.5, + 0.5 + ], + [ + 1.0, + 1.0 + ] + ], + "test_delay": 0.05, + "results": [ + { + "import_delay": 0.1, + "conftest_delay": 0.1, + "strategy": "fork", + "elapsed_seconds": 27.89, + "mutations_per_second": 101.07, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.27, + "phase_stats_collection": 0.611, + "phase_clean_tests": 8.405, + "phase_forced_fail_test": 0.177, + "phase_mutation_testing": 9.845, + "test_delay": 0.05 + }, + { + "import_delay": 0.1, + "conftest_delay": 0.1, + "strategy": "collect", + "elapsed_seconds": 32.31, + "mutations_per_second": 71.42, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.283, + "phase_stats_collection": 0.002, + "phase_clean_tests": 8.516, + "phase_forced_fail_test": 0.488, + "phase_mutation_testing": 13.932, + "test_delay": 0.05 + }, + { + "import_delay": 0.1, + "conftest_delay": 0.1, + "strategy": "import", + "elapsed_seconds": 47.53, + "mutations_per_second": 34.81, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.258, + "phase_stats_collection": 0.002, + "phase_clean_tests": 8.794, + "phase_forced_fail_test": 0.683, + "phase_mutation_testing": 28.587, + "test_delay": 0.05 + }, + { + "import_delay": 0.1, + "conftest_delay": 0.1, + "strategy": "none", + "elapsed_seconds": 59.19, + "mutations_per_second": 24.48, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.277, + "phase_stats_collection": 0.002, + "phase_clean_tests": 8.692, + "phase_forced_fail_test": 0.482, + "phase_mutation_testing": 40.648, + "test_delay": 0.05 + }, + { + "import_delay": 0.5, + "conftest_delay": 0.5, + "strategy": "fork", + "elapsed_seconds": 29.4, + "mutations_per_second": 95.02, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.262, + "phase_stats_collection": 0.619, + "phase_clean_tests": 8.366, + "phase_forced_fail_test": 0.211, + "phase_mutation_testing": 10.472, + "test_delay": 0.05 + }, + { + "import_delay": 0.5, + "conftest_delay": 0.5, + "strategy": "collect", + "elapsed_seconds": 36.78, + "mutations_per_second": 64.17, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.263, + "phase_stats_collection": 0.002, + "phase_clean_tests": 9.578, + "phase_forced_fail_test": 1.369, + "phase_mutation_testing": 15.505, + "test_delay": 0.05 + }, + { + "import_delay": 0.5, + "conftest_delay": 0.5, + "strategy": "import", + "elapsed_seconds": 96.69, + "mutations_per_second": 13.14, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.267, + "phase_stats_collection": 0.001, + "phase_clean_tests": 9.365, + "phase_forced_fail_test": 1.392, + "phase_mutation_testing": 75.721, + "test_delay": 0.05 + }, + { + "import_delay": 0.5, + "conftest_delay": 0.5, + "strategy": "none", + "elapsed_seconds": 167.88, + "mutations_per_second": 6.78, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.264, + "phase_stats_collection": 0.002, + "phase_clean_tests": 9.606, + "phase_forced_fail_test": 1.355, + "phase_mutation_testing": 146.664, + "test_delay": 0.05 + }, + { + "import_delay": 1.0, + "conftest_delay": 1.0, + "strategy": "fork", + "elapsed_seconds": 30.83, + "mutations_per_second": 92.72, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.276, + "phase_stats_collection": 0.646, + "phase_clean_tests": 8.405, + "phase_forced_fail_test": 0.191, + "phase_mutation_testing": 10.731, + "test_delay": 0.05 + }, + { + "import_delay": 1.0, + "conftest_delay": 1.0, + "strategy": "collect", + "elapsed_seconds": 44.51, + "mutations_per_second": 48.09, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.272, + "phase_stats_collection": 0.002, + "phase_clean_tests": 10.365, + "phase_forced_fail_test": 2.374, + "phase_mutation_testing": 20.691, + "test_delay": 0.05 + }, + { + "import_delay": 1.0, + "conftest_delay": 1.0, + "strategy": "import", + "elapsed_seconds": 155.79, + "mutations_per_second": 7.53, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.298, + "phase_stats_collection": 0.002, + "phase_clean_tests": 10.311, + "phase_forced_fail_test": 2.336, + "phase_mutation_testing": 132.081, + "test_delay": 0.05 + }, + { + "import_delay": 1.0, + "conftest_delay": 1.0, + "strategy": "none", + "elapsed_seconds": 290.97, + "mutations_per_second": 3.73, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.256, + "phase_stats_collection": 0.002, + "phase_clean_tests": 10.52, + "phase_forced_fail_test": 2.365, + "phase_mutation_testing": 266.689, + "test_delay": 0.05 + } + ], + "results_by_config": [ + { + "import_delay": 0.1, + "conftest_delay": 0.1, + "results": [ + { + "strategy": "fork", + "elapsed_seconds": 27.89, + "mutations_per_second": 101.07, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.27, + "phase_stats_collection": 0.611, + "phase_clean_tests": 8.405, + "phase_forced_fail_test": 0.177, + "phase_mutation_testing": 9.845, + "import_delay": 0.1, + "conftest_delay": 0.1, + "test_delay": 0.05 + }, + { + "strategy": "collect", + "elapsed_seconds": 32.31, + "mutations_per_second": 71.42, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.283, + "phase_stats_collection": 0.002, + "phase_clean_tests": 8.516, + "phase_forced_fail_test": 0.488, + "phase_mutation_testing": 13.932, + "import_delay": 0.1, + "conftest_delay": 0.1, + "test_delay": 0.05 + }, + { + "strategy": "import", + "elapsed_seconds": 47.53, + "mutations_per_second": 34.81, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.258, + "phase_stats_collection": 0.002, + "phase_clean_tests": 8.794, + "phase_forced_fail_test": 0.683, + "phase_mutation_testing": 28.587, + "import_delay": 0.1, + "conftest_delay": 0.1, + "test_delay": 0.05 + }, + { + "strategy": "none", + "elapsed_seconds": 59.19, + "mutations_per_second": 24.48, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.277, + "phase_stats_collection": 0.002, + "phase_clean_tests": 8.692, + "phase_forced_fail_test": 0.482, + "phase_mutation_testing": 40.648, + "import_delay": 0.1, + "conftest_delay": 0.1, + "test_delay": 0.05 + } + ] + }, + { + "import_delay": 0.5, + "conftest_delay": 0.5, + "results": [ + { + "strategy": "fork", + "elapsed_seconds": 29.4, + "mutations_per_second": 95.02, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.262, + "phase_stats_collection": 0.619, + "phase_clean_tests": 8.366, + "phase_forced_fail_test": 0.211, + "phase_mutation_testing": 10.472, + "import_delay": 0.5, + "conftest_delay": 0.5, + "test_delay": 0.05 + }, + { + "strategy": "collect", + "elapsed_seconds": 36.78, + "mutations_per_second": 64.17, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.263, + "phase_stats_collection": 0.002, + "phase_clean_tests": 9.578, + "phase_forced_fail_test": 1.369, + "phase_mutation_testing": 15.505, + "import_delay": 0.5, + "conftest_delay": 0.5, + "test_delay": 0.05 + }, + { + "strategy": "import", + "elapsed_seconds": 96.69, + "mutations_per_second": 13.14, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.267, + "phase_stats_collection": 0.001, + "phase_clean_tests": 9.365, + "phase_forced_fail_test": 1.392, + "phase_mutation_testing": 75.721, + "import_delay": 0.5, + "conftest_delay": 0.5, + "test_delay": 0.05 + }, + { + "strategy": "none", + "elapsed_seconds": 167.88, + "mutations_per_second": 6.78, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.264, + "phase_stats_collection": 0.002, + "phase_clean_tests": 9.606, + "phase_forced_fail_test": 1.355, + "phase_mutation_testing": 146.664, + "import_delay": 0.5, + "conftest_delay": 0.5, + "test_delay": 0.05 + } + ] + }, + { + "import_delay": 1.0, + "conftest_delay": 1.0, + "results": [ + { + "strategy": "fork", + "elapsed_seconds": 30.83, + "mutations_per_second": 92.72, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.276, + "phase_stats_collection": 0.646, + "phase_clean_tests": 8.405, + "phase_forced_fail_test": 0.191, + "phase_mutation_testing": 10.731, + "import_delay": 1.0, + "conftest_delay": 1.0, + "test_delay": 0.05 + }, + { + "strategy": "collect", + "elapsed_seconds": 44.51, + "mutations_per_second": 48.09, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.272, + "phase_stats_collection": 0.002, + "phase_clean_tests": 10.365, + "phase_forced_fail_test": 2.374, + "phase_mutation_testing": 20.691, + "import_delay": 1.0, + "conftest_delay": 1.0, + "test_delay": 0.05 + }, + { + "strategy": "import", + "elapsed_seconds": 155.79, + "mutations_per_second": 7.53, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.298, + "phase_stats_collection": 0.002, + "phase_clean_tests": 10.311, + "phase_forced_fail_test": 2.336, + "phase_mutation_testing": 132.081, + "import_delay": 1.0, + "conftest_delay": 1.0, + "test_delay": 0.05 + }, + { + "strategy": "none", + "elapsed_seconds": 290.97, + "mutations_per_second": 3.73, + "total_mutants": 995, + "killed": 832, + "survived": 163, + "timeout": 0, + "suspicious": 0, + "exit_code": 0, + "phase_mutant_generation": 0.256, + "phase_stats_collection": 0.002, + "phase_clean_tests": 10.52, + "phase_forced_fail_test": 2.365, + "phase_mutation_testing": 266.689, + "import_delay": 1.0, + "conftest_delay": 1.0, + "test_delay": 0.05 + } + ] + } + ] +} diff --git a/e2e_projects/benchmark_1k/mutmut_preload.txt b/e2e_projects/benchmark_1k/mutmut_preload.txt new file mode 100644 index 00000000..6aeb0c2c --- /dev/null +++ b/e2e_projects/benchmark_1k/mutmut_preload.txt @@ -0,0 +1,8 @@ +# Modules to preload for the 'import' warmup strategy +# These are imported in the orchestrator and inherited by forked workers +pytest + +#normally you would avoid importing the module itself in real projects +#as it could result in unwanted side effects, now it's a convenient way +#to simulate import delays through /sr/benchmark/__init__.py +benchmark diff --git a/e2e_projects/benchmark_1k/pyproject.toml b/e2e_projects/benchmark_1k/pyproject.toml new file mode 100644 index 00000000..e3fa0d99 --- /dev/null +++ b/e2e_projects/benchmark_1k/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "benchmark-1k" +version = "0.1.0" +description = "Benchmark project for mutmut warmup strategy comparison (~1000 mutants)" +requires-python = ">=3.10" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/benchmark"] + +[tool.mutmut] +log_to_file = true +source_paths = ["src/"] +process_isolation = "hot-fork" +debug = false +hot_fork_warmup = "none" diff --git a/e2e_projects/benchmark_1k/requirements.txt b/e2e_projects/benchmark_1k/requirements.txt new file mode 100644 index 00000000..dc136dda --- /dev/null +++ b/e2e_projects/benchmark_1k/requirements.txt @@ -0,0 +1,2 @@ +# Test dependencies +pytest>=7.0.0 diff --git a/e2e_projects/benchmark_1k/run_benchmark.py b/e2e_projects/benchmark_1k/run_benchmark.py new file mode 100644 index 00000000..9164ff3a --- /dev/null +++ b/e2e_projects/benchmark_1k/run_benchmark.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 +""" +Benchmark runner for mutmut process isolation comparison. + +Runs mutmut under each strategy and reports throughput. + +Usage: + python run_benchmark.py [--strategies collect,import,none,fork] [--delay-configs 0.1:0.1,0.5:0.5,1.0:1.0] + [--show-output] [--verbose] + +The delay configs simulate different conftest.py loading times (Flask, SQLAlchemy, etc.). +Format: import_delay:conftest_delay pairs, comma-separated. +Higher values show bigger differences between warmup strategies. + +Optionally add --test-delay to simulate per-test runtime with +/-10% gaussian jitter. +""" + +import argparse +import json +import os +import shutil +import subprocess +import sys +import time +from pathlib import Path + + +STRATEGIES = ["fork", "collect", "import", "none"] +DEFAULT_OUTPUT = "benchmark_results.json" +DEFAULT_DELAY_CONFIGS = "0.1:0.1,0.5:0.5,1.0:1.0" # cli format + + +def clean_mutants(): + """Remove mutants directory for fresh run.""" + mutants_dir = Path("mutants") + if mutants_dir.exists(): + shutil.rmtree(mutants_dir) + + +def get_pyproject_content(debug: bool = False, process_isolation: str = "fork") -> str: + """Get base pyproject.toml content.""" + return f"""[project] +name = "benchmark-1k" +version = "0.1.0" +description = "Benchmark project for mutmut warmup strategy comparison (~1000 mutants)" +requires-python = ">=3.10" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/benchmark"] + +[tool.mutmut] +log_to_file = true +source_paths = ["src/"] +process_isolation = "{process_isolation}" +debug = {"true" if debug else "false"} +""" + + +def run_mutmut( + strategy: str, + import_delay: float = 0.1, + conftest_delay: float = 0.1, + test_delay: float = 0.0, + verbose: bool = False, + show_output: bool = False, +) -> dict: + """Run mutmut with specified strategy.""" + config = get_pyproject_content(debug=verbose, process_isolation=strategy if strategy == "fork" else "hot-fork") + + if strategy != "fork": + config += f'hot_fork_warmup = "{strategy}"\n' + if strategy == "import": + config += 'preload_modules_file = "mutmut_preload.txt"\n' + + config_path = Path("pyproject.toml") + config_path.write_text(config) + + clean_mutants() + + print(" Starting mutmut run...") + start = time.perf_counter() + + cmd = ["mutmut", "run"] + + env = { + **os.environ, + "PYTHONPATH": "src", + "BENCHMARK_IMPORT_DELAY": str(import_delay), + "BENCHMARK_CONFTEST_DELAY": str(conftest_delay), + "BENCHMARK_TEST_DELAY": str(test_delay), + } + + if verbose or show_output: + result = subprocess.run(cmd, text=True, env=env) + else: + result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, text=True, env=env) + + returncode = result.returncode + elapsed = time.perf_counter() - start + + summary_path = Path("mutants/summary.json") + summary = json.loads(summary_path.read_text()) if summary_path.exists() else {} + + stats = summary.get("stats", {}) + phase_timings = summary.get("phase_timings", {}) + + total_mutants = stats.get("total", 0) + mutation_testing_time = phase_timings.get("mutation_testing", 0) + if mutation_testing_time > 0 and total_mutants > 0: + throughput = total_mutants / mutation_testing_time + else: + throughput = 0 + + # Rename mutants dir to preserve results for this strategy + mutants_dir = Path("mutants") + dir_name = f"mutants_{strategy}_i{int(import_delay*1000)}_c{int(conftest_delay*1000)}_t{int(test_delay*1000)}" + strategy_dir = Path(dir_name) + if strategy_dir.exists(): + shutil.rmtree(strategy_dir) + if mutants_dir.exists(): + mutants_dir.rename(strategy_dir) + print(f" Results saved to {strategy_dir}/") + + return { + "strategy": strategy, + "elapsed_seconds": round(elapsed, 2), + "mutations_per_second": round(throughput, 2), + "total_mutants": stats.get("total", 0), + "killed": stats.get("killed", 0), + "survived": stats.get("survived", 0), + "timeout": stats.get("timeout", 0), + "suspicious": stats.get("suspicious", 0), + "exit_code": returncode, + "phase_mutant_generation": round(phase_timings.get("mutant_generation", 0), 3), + "phase_stats_collection": round(phase_timings.get("stats_collection", 0), 3), + "phase_clean_tests": round(phase_timings.get("clean_tests", 0), 3), + "phase_forced_fail_test": round(phase_timings.get("forced_fail_test", 0), 3), + "phase_mutation_testing": round(phase_timings.get("mutation_testing", 0), 3), + } + + +def print_result(result: dict): + """Print result summary for one strategy.""" + print(f" Avg. Mut/s: {result['mutations_per_second']:.2f} mut/s") + print(f" Total time: {result['elapsed_seconds']:.1f}s") + print(f" Total mutants: {result['total_mutants']}") + print(f" Killed: {result['killed']}") + print(f" Survived: {result['survived']}") + if result["timeout"] > 0: + print(f" Timeout: {result['timeout']}") + if result["exit_code"] != 0: + print(f" Exit code: {result['exit_code']} (non-zero)") + print(" Phase timings:") + print(f" Mutant generation: {result['phase_mutant_generation']:.3f}s") + print(f" Stats collection: {result['phase_stats_collection']:.3f}s") + print(f" Clean tests: {result['phase_clean_tests']:.3f}s") + print(f" Forced fail test: {result['phase_forced_fail_test']:.3f}s") + print(f" Mutation testing: {result['phase_mutation_testing']:.3f}s") + + +def main(): + parser = argparse.ArgumentParser(description="Benchmark mutmut run modes") + parser.add_argument( + "--strategies", + default=",".join(STRATEGIES), + help=f"Comma-separated list of strategies (default: {','.join(STRATEGIES)})", + ) + parser.add_argument("--output", default=DEFAULT_OUTPUT, help=f"Output JSON file (default: {DEFAULT_OUTPUT})") + parser.add_argument( + "--delay-configs", + default=DEFAULT_DELAY_CONFIGS, + help="Comma-separated import:conftest delay pairs. Default: 0.1:0.1,0.5:0.5,1.0:1.0", + ) + parser.add_argument("--verbose", "-v", action="store_true", help="Enable mutmut debug mode and show all output") + parser.add_argument( + "--show-output", "-s", action="store_true", + help="Show mutmut stdout/stderr (spinners, progress) without enabling debug mode", + ) + parser.add_argument( + "--test-delay", + type=float, + default=0.05, + help="Per-test delay in seconds with +/-10%% gaussian jitter (default: 0.05)", + ) + args = parser.parse_args() + + strategies = [s.strip() for s in args.strategies.split(",")] + for s in strategies: + if s not in STRATEGIES: + print(f"Error: Unknown strategy '{s}'. Valid: {STRATEGIES}") + sys.exit(1) + + # Parse delay configs (e.g., "0.1:0.1,0.5:0.5" -> [(0.1, 0.1), (0.5, 0.5)]) + delay_configs = [] + for pair in args.delay_configs.split(","): + import_delay, conftest_delay = pair.strip().split(":") + delay_configs.append((float(import_delay), float(conftest_delay))) + + test_delay = args.test_delay + + if not Path("src/benchmark").exists(): + print("Error: Must run from benchmark_1k directory") + sys.exit(1) + + print("=" * 60) + print("Mutmut Process Isolation Benchmark") + print("=" * 60) + print(f"Strategies to test: {strategies}") + print(f"Delay configs (import, conftest): {delay_configs}") + print(f"Per-test delay: {test_delay}s (+/-10% jitter)") + + all_results = [] + import_delay = 0.05 + conftest_delay = 0.05 + + for import_delay, conftest_delay in delay_configs: + print(f"\n{'#' * 60}") + print(f"# DELAY CONFIG: import={import_delay}s, conftest={conftest_delay}s, test={test_delay}s") + print(f"{'#' * 60}") + + config_results = [] + + for strategy in strategies: + print(f"\n{'=' * 60}") + print(f"Strategy: {strategy}") + print("=" * 60) + + result = run_mutmut( + strategy, + import_delay=import_delay, + conftest_delay=conftest_delay, + test_delay=test_delay, + verbose=args.verbose, + show_output=args.show_output, + ) + result["import_delay"] = import_delay + result["conftest_delay"] = conftest_delay + result["test_delay"] = test_delay + config_results.append(result) + print_result(result) + + all_results.append( + { + "import_delay": import_delay, + "conftest_delay": conftest_delay, + "results": config_results, + } + ) + + flat_results = [] + for config in all_results: + for r in config["results"]: + flat_results.append( + { + "import_delay": config["import_delay"], + "conftest_delay": config["conftest_delay"], + **r, + } + ) + + output = { + "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"), + "python_version": sys.version.split()[0], + "strategies": strategies, + "delay_configs": [(c["import_delay"], c["conftest_delay"]) for c in all_results], + "test_delay": test_delay, + "results": flat_results, + "results_by_config": all_results, + } + + output_path = Path(args.output) + output_path.write_text(json.dumps(output, indent=2)) + print(f"\n\nResults saved to {output_path}") + + print("\n" + "=" * 80) + print("RESULTS SUMMARY") + print("=" * 80) + + for config in all_results: + import_delay = config["import_delay"] + conftest_delay = config["conftest_delay"] + config_results = config["results"] + + print(f"\n--- Delay: import={import_delay}s, conftest={conftest_delay}s ---") + + max_throughput = max(r["mutations_per_second"] for r in config_results) if config_results else 1 + + print(f"{'Strategy':<12} {'Avg. Mut/s':>12} {'% of Max':>10} {'Mut Test':>10} {'Wall Time':>10}") + print("-" * 60) + + for r in config_results: + throughput = r["mutations_per_second"] + pct_of_max = (throughput / max_throughput * 100) if max_throughput > 0 else 0 + mut_test_time = r.get("phase_mutation_testing", 0) + print( + f"{r['strategy']:<12} {throughput:>10.1f}/s {pct_of_max:>9.0f}% {mut_test_time:>8.1f}s {r['elapsed_seconds']:>8.1f}s" + ) + + print("\n" + "=" * 80) + print("MUTATION THROUGHPUT COMPARISON ACROSS ALL DELAY CONFIGS") + print("=" * 80) + + print(f"\n{'Strategy':<12}", end="") + for config in all_results: + delay = config["import_delay"] + print(f" {delay}s delay".center(15), end="") + print() + print("-" * (12 + 15 * len(all_results))) + + for strategy in strategies: + print(f"{strategy:<12}", end="") + for config in all_results: + for r in config["results"]: + if r["strategy"] == strategy: + print(f" {r['mutations_per_second']:>10.1f}/s ", end="") + break + print() + + print() + + config = get_pyproject_content() + Path("pyproject.toml").write_text(config) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/e2e_projects/benchmark_1k/src/benchmark/__init__.py b/e2e_projects/benchmark_1k/src/benchmark/__init__.py new file mode 100644 index 00000000..9ea039f4 --- /dev/null +++ b/e2e_projects/benchmark_1k/src/benchmark/__init__.py @@ -0,0 +1,34 @@ +"""Benchmark package for mutmut warmup strategy testing. + +Simulates a real application that imports heavy libraries on startup. +Set BENCHMARK_IMPORT_DELAY environment variable to control the delay. +""" + +import os +import time + +from benchmark import arguments +from benchmark import booleans +from benchmark import comparisons +from benchmark import complex +from benchmark import numbers +from benchmark import operators +from benchmark import returns +from benchmark import strings + +__all__ = [ + "numbers", + "strings", + "booleans", + "operators", + "comparisons", + "arguments", + "returns", + "complex", +] + + +# Simulate library imports +import_delay = float(os.environ.get("BENCHMARK_IMPORT_DELAY", "0.05")) +if import_delay > 0: + time.sleep(import_delay) diff --git a/e2e_projects/benchmark_1k/src/benchmark/arguments.py b/e2e_projects/benchmark_1k/src/benchmark/arguments.py new file mode 100644 index 00000000..d151cef8 --- /dev/null +++ b/e2e_projects/benchmark_1k/src/benchmark/arguments.py @@ -0,0 +1,71 @@ +"""Benchmark functions with various argument patterns.""" + + +# === Helper functions === + + +def helper_2(a, b): + """Helper with 2 args.""" + return (a, b) + + +def helper_3(a, b, c): + """Helper with 3 args.""" + return (a, b, c) + + +def combiner(first, second): + """Combine 2 values.""" + if first is None or second is None: + return None + return f"{first}-{second}" + + +# === 2-arg calls === + + +def call_2args_batch_1(): + """2-arg calls.""" + r1 = helper_2(1, 2) + r2 = helper_2(3, 4) + return r1, r2 + + +# === 3-arg calls === + + +def call_3args_batch_1(): + """3-arg calls.""" + r1 = helper_3(1, 2, 3) + return (r1,) + + +# === dict() keyword calls === + + +def dict_2keys_batch_1(): + """dict with 2 keys.""" + d1 = {"a": 1, "b": 2} + return (d1,) + + +def dict_3keys_batch_1(): + """dict with 3 keys.""" + d1 = {"x": 1, "y": 2, "z": 3} + return (d1,) + + +# === String method calls === + + +def string_method_calls(): + """String method calls with multiple args.""" + text = "a-b-c-d-e" + r1 = text.split("-", 2) + return (r1,) + + +def format_calls(): + """String format calls.""" + r1 = "{} {}".format("hello", "world") + return (r1,) diff --git a/e2e_projects/benchmark_1k/src/benchmark/booleans.py b/e2e_projects/benchmark_1k/src/benchmark/booleans.py new file mode 100644 index 00000000..db281f8e --- /dev/null +++ b/e2e_projects/benchmark_1k/src/benchmark/booleans.py @@ -0,0 +1,180 @@ +"""Boolean mutation targets""" + + +# === Boolean literals === + + +def flags_batch_1(): + """Boolean flags - batch 1.""" + enabled = True + disabled = False + active = True + paused = False + return enabled, disabled, active, paused + + +def flags_batch_2(): + """Boolean flags - batch 2.""" + visible = True + hidden = False + selected = True + focused = False + return visible, hidden, selected, focused + + +def flags_batch_3(): + """Boolean flags - batch 3.""" + running = True + stopped = False + ready = True + waiting = False + return running, stopped, ready, waiting + + +def flags_batch_4(): + """Boolean flags - batch 4.""" + valid = True + invalid = False + complete = True + pending = False + return valid, invalid, complete, pending + + +def conditional_returns_1(x): + """Conditional boolean returns - batch 1.""" + if x > 0: + return True + return False + + +def conditional_returns_2(x, y): + """Conditional boolean returns - batch 2.""" + if x == y: + return True + elif x > y: + return False + return True + + +def default_values(): + """Default boolean values.""" + debug = False + verbose = False + quiet = True + strict = True + return debug, verbose, quiet, strict + + +def config_flags(): + """Configuration flags.""" + auto_save = True + auto_load = False + cache_enabled = True + logging_enabled = False + return auto_save, auto_load, cache_enabled, logging_enabled + + +def feature_flags(): + """Feature flags.""" + feature_a = True + feature_b = False + feature_c = True + feature_d = False + return feature_a, feature_b, feature_c, feature_d + + +# === Boolean operators === + + +def logical_and_simple(a, b): + """Simple AND.""" + return a and b + + +def logical_or_simple(a, b): + """Simple OR.""" + return a or b + + +def logical_and_chain_1(a, b, c): + """Chained AND - batch 1.""" + return a and b and c + + +def logical_and_chain_2(a, b, c, d): + """Chained AND - batch 2.""" + return a and b and c and d + + +def logical_or_chain_1(a, b, c): + """Chained OR - batch 1.""" + return a or b or c + + +def logical_or_chain_2(a, b, c, d): + """Chained OR - batch 2.""" + return a or b or c or d + + +def mixed_logic_1(a, b, c, d): + """Mixed AND/OR - batch 1.""" + return (a and b) or (c and d) + + +def mixed_logic_2(a, b, c, d): + """Mixed AND/OR - batch 2.""" + return (a or b) and (c or d) + + +def mixed_logic_3(a, b, c): + """Mixed AND/OR - batch 3.""" + return a and b or c + + +def mixed_logic_4(a, b, c): + """Mixed AND/OR - batch 4.""" + return a or b and c + + +def condition_with_and(x, y, z): + """Conditions with AND.""" + result = False + if x > 0 and y > 0: + result = True + if y > 0 and z > 0: + result = result and True + return result + + +def condition_with_or(x, y, z): + """Conditions with OR.""" + result = False or True + if x > 0 or y > 0: + result = True + if y < 0 or z < 0: + result = result or False + return result + + +def complex_condition_1(a, b, c, d): + """Complex condition - batch 1.""" + return (a > 0 and b > 0) or (c > 0 and d > 0) + + +def guard_clauses(value, min_val, max_val, required): + """Guard clauses with boolean operators.""" + if not required and value is None: + return True + if value is None or value < min_val or value > max_val: + return False + return True + + +def validation_flags(has_name, has_email, has_phone, is_verified, is_active): + """Validation with multiple boolean flags.""" + has_contact = has_email or has_phone + is_complete = has_name and has_contact + is_valid = is_complete and is_verified + can_proceed = is_valid and is_active + needs_review = is_complete and not is_verified + return has_contact, is_complete, is_valid, can_proceed, needs_review diff --git a/e2e_projects/benchmark_1k/src/benchmark/comparisons.py b/e2e_projects/benchmark_1k/src/benchmark/comparisons.py new file mode 100644 index 00000000..cd9dc5e7 --- /dev/null +++ b/e2e_projects/benchmark_1k/src/benchmark/comparisons.py @@ -0,0 +1,242 @@ +"""Comparison mutation targets.""" + + +# === Equality comparisons === + + +def equality_simple(a, b): + """Simple equality.""" + eq = a == b + neq = a != b + return eq, neq + + +def equality_batch_1(a, b, c): + """Equality - batch 1.""" + r1 = a == b + r2 = b == c + r3 = a != c + return r1, r2, r3 + + +def equality_with_literals(value): + """Equality with literals.""" + is_zero = value == 0 + is_one = value == 1 + not_zero = value != 0 + not_one = value != 1 + return is_zero, is_one, not_zero, not_one + + +def equality_strings(s): + """String equality.""" + is_empty = s == "" + is_hello = s == "hello" + not_empty = s != "" + return is_empty, is_hello, not_empty + + +# === Less than comparisons === + + +def less_than_simple(a, b): + """Simple less than.""" + lt = a < b + le = a <= b + return lt, le + + +def less_than_batch_1(x, y, z): + """Less than - batch 1.""" + r1 = x < y + r2 = y < z + r3 = x <= z + return r1, r2, r3 + + +def less_than_batch_2(value, threshold): + """Less than - batch 2.""" + below = value < threshold + at_or_below = value <= threshold + return below, at_or_below + + +def less_than_literals(value): + """Less than with literals.""" + lt_zero = value < 0 + lt_ten = value < 10 + le_zero = value <= 0 + return lt_zero, lt_ten, le_zero + + +# === Greater than comparisons === + + +def greater_than_simple(a, b): + """Simple greater than.""" + gt = a > b + ge = a >= b + return gt, ge + + +def greater_than_batch_1(x, y, z): + """Greater than - batch 1.""" + r1 = x > y + r2 = y > z + r3 = x >= z + return r1, r2, r3 + + +def greater_than_batch_2(value, threshold): + """Greater than - batch 2.""" + above = value > threshold + at_or_above = value >= threshold + return above, at_or_above + + +def greater_than_literals(value): + """Greater than with literals.""" + gt_zero = value > 0 + gt_ten = value > 10 + ge_zero = value >= 0 + return gt_zero, gt_ten, ge_zero + + +# === Identity comparisons === + + +def identity_none(obj): + """Identity with None.""" + is_none = obj is None + is_not_none = obj is not None + return is_none, is_not_none + + +def identity_batch_1(a, b): + """Identity - batch 1.""" + same = a is b + different = a is not b + return same, different + + +def identity_checks(value, default): + """Multiple identity checks.""" + if value is None: + return default + if value is not default: + return value + return None + + +# === Membership comparisons === + + +def membership_simple(item, collection): + """Simple membership.""" + present = item in collection + absent = item not in collection + return present, absent + + +def membership_batch_1(x, items): + """Membership - batch 1.""" + r1 = x in items + r2 = x not in items + return r1, r2 + + +def membership_string(char, text): + """String membership.""" + found = char in text + not_found = char not in text + return found, not_found + + +def membership_dict(key, d): + """Dictionary membership.""" + has_key = key in d + missing_key = key not in d + return has_key, missing_key + + +# === Complex boundary checks === + + +def boundary_check_1(value): + """Boundary check - batch 1.""" + if value < 0: + return "negative" + elif value == 0: + return "zero" + elif value <= 10: + return "small" + elif value < 100: + return "medium" + else: + return "large" + + +def boundary_check_2(value, low, high): + """Boundary check - batch 2.""" + if value < low: + return "below" + elif value > high: + return "above" + elif value == low: + return "at_low" + elif value == high: + return "at_high" + else: + return "within" + + +def range_check(value, min_val, max_val): + """Range check.""" + if value < min_val: + return False + if value > max_val: + return False + if value >= min_val and value <= max_val: + return True + return False + + +def compare_all(a, b): + """All comparison operators on two values.""" + results = { + "eq": a == b, + "ne": a != b, + "lt": a < b, + "le": a <= b, + "gt": a > b, + "ge": a >= b, + } + return results + + +# === Additional comparisons === + + +def chained_comparisons(x, low, mid, high): + """Chained comparison checks.""" + in_lower = low <= x < mid + in_upper = mid <= x <= high + below_all = x < low + above_all = x > high + return in_lower, in_upper, below_all, above_all + + +def multi_condition_check(a, b, c, threshold): + """Multiple condition checks.""" + all_above = a > threshold and b > threshold and c > threshold + any_above = a > threshold or b > threshold or c > threshold + all_equal = a == b == c + none_below = a >= threshold and b >= threshold and c >= threshold + return all_above, any_above, all_equal, none_below + + +def sorted_check(a, b, c): + """Check if values are sorted.""" + ascending = a < b < c + descending = a > b > c + return ascending, descending diff --git a/e2e_projects/benchmark_1k/src/benchmark/complex.py b/e2e_projects/benchmark_1k/src/benchmark/complex.py new file mode 100644 index 00000000..980b3546 --- /dev/null +++ b/e2e_projects/benchmark_1k/src/benchmark/complex.py @@ -0,0 +1,240 @@ +"""Complex call patterns.""" + + +# === Deep call chains (10 levels) - Chain 1 === + + +def chain1_level_10(x): + """Chain 1, level 10.""" + return x + 1 + + +def chain1_level_9(x): + """Chain 1, level 9.""" + return chain1_level_10(x) + 1 + + +def chain1_level_8(x): + """Chain 1, level 8.""" + return chain1_level_9(x) + 1 + + +def chain1_level_7(x): + """Chain 1, level 7.""" + return chain1_level_8(x) + 1 + + +def chain1_level_6(x): + """Chain 1, level 6.""" + return chain1_level_7(x) + 1 + + +def chain1_level_5(x): + """Chain 1, level 5.""" + return chain1_level_6(x) + 1 + + +def chain1_level_4(x): + """Chain 1, level 4.""" + return chain1_level_5(x) + 1 + + +def chain1_level_3(x): + """Chain 1, level 3.""" + return chain1_level_4(x) + 1 + + +def chain1_level_2(x): + """Chain 1, level 2.""" + return chain1_level_3(x) + 1 + + +def chain1_level_1(x): + """Chain 1, level 1.""" + return chain1_level_2(x) + 1 + + +def chain1_entry(x): + """Entry point for chain 1 (10 levels deep).""" + return chain1_level_1(x) * 2 + + +# === Tail recursion === + + +def factorial_tail(n, acc=1): + """Tail-recursive factorial.""" + if n <= 1: + return acc + return factorial_tail(n - 1, acc * n) + + +def sum_tail(n, acc=0): + """Tail-recursive sum.""" + if n <= 0: + return acc + return sum_tail(n - 1, acc + n) + + +def power_tail(base, exp, acc=1): + """Tail-recursive power.""" + if exp <= 0: + return acc + return power_tail(base, exp - 1, acc * base) + + +def gcd_tail(a, b): + """Tail-recursive GCD.""" + if b == 0: + return a + return gcd_tail(b, a % b) + + +# === Standard recursion === + + +def fibonacci(n): + """Standard recursive fibonacci.""" + if n <= 0: + return 0 + if n == 1: + return 1 + return fibonacci(n - 1) + fibonacci(n - 2) + + +def flatten(nested): + """Recursive list flattening.""" + result = [] + for item in nested: + if isinstance(item, list): + result.extend(flatten(item)) + else: + result.append(item) + return result + + +# === Mutual recursion === + + +def is_even(n): + """Check even via mutual recursion.""" + if n == 0: + return True + if n < 0: + return is_even(-n) + return is_odd(n - 1) + + +def is_odd(n): + """Check odd via mutual recursion.""" + if n == 0: + return False + if n < 0: + return is_odd(-n) + return is_even(n - 1) + + +def descend_a(n, acc=0): + """Mutual recursion pair A.""" + if n <= 0: + return acc + return descend_b(n - 1, acc + 1) + + +def descend_b(n, acc=0): + """Mutual recursion pair B.""" + if n <= 0: + return acc + return descend_a(n - 1, acc + 2) + + +# === Higher-order functions === + + +def apply_twice(f, x): + """Apply function twice.""" + return f(f(x)) + + +def apply_n_times(f, x, n): + """Apply function n times.""" + result = x + for _ in range(n): + result = f(result) + return result + + +def compose(f, g): + """Compose two functions.""" + return lambda x: f(g(x)) + + +def map_reduce(items, mapper, reducer, initial): + """Map-reduce pattern.""" + mapped = [mapper(item) for item in items] + result = initial + for item in mapped: + result = reducer(result, item) + return result + + +def with_callback(data, on_success, on_error): + """Process with callbacks.""" + if data is not None: + return on_success(data) + return on_error("no data") + + +# === Complex nested patterns === + + +def nested_loops(matrix): + """Nested loop processing.""" + total = 0 + for i in range(len(matrix)): + for j in range(len(matrix[i]) if i < len(matrix) else 0): + if matrix[i][j] > 0: + total += matrix[i][j] * 2 + else: + total += matrix[i][j] + 1 + return total + + +def nested_conditions(x, y, z): + """Deeply nested conditions.""" + if x > 0: + if y > 0: + if z > 0: + return x + y + z + else: + return x + y - z + else: + if z > 0: + return x - y + z + else: + return x - y - z + else: + if y > 0: + return y + z + else: + return z + + +def accumulate_with_filter(items, predicate, transform): + """Accumulate filtered and transformed items.""" + result = 0 + for item in items: + if predicate(item): + transformed = transform(item) + result += transformed + return result + + +def calculate_backoff(attempt, base_delay=1.0, max_delay=60.0): + """Calculate exponential backoff delay.""" + if attempt <= 0: + return 0.0 + delay = base_delay * (2 ** (attempt - 1)) + if delay > max_delay: + return max_delay + return delay diff --git a/e2e_projects/benchmark_1k/src/benchmark/numbers.py b/e2e_projects/benchmark_1k/src/benchmark/numbers.py new file mode 100644 index 00000000..4939dea7 --- /dev/null +++ b/e2e_projects/benchmark_1k/src/benchmark/numbers.py @@ -0,0 +1,69 @@ +"""Number mutation targets.""" + + +def constants_batch_1(): + """Numeric constants.""" + a = 0 + b = 1 + c = 2 + return a + b + c + + +def float_constants_1(): + """Float constants.""" + a = 0.5 + b = 1.5 + return a + b + + +def negative_constants(): + """Negative numeric constants.""" + a = -1 + b = -2 + return a + b + + +def arithmetic_simple(x): + """Simple arithmetic with literals.""" + return x + 1 + + +def loop_range_1(): + """Loop with range literals.""" + total = 0 + for i in range(5): + total += i + 1 + return total + + +def threshold_check_1(value): + """Threshold checking.""" + if value > 0: + return 1 + return 0 + + +def array_indices(items): + """Array index access with literals.""" + if len(items) > 2: + return items[0] + items[1] + return 0 + + +def multipliers(x): + """Various multiplier values.""" + a = x * 2 + b = x * 3 + return a + b + + +def offsets(base): + """Offset calculations.""" + return [base + 1] + + +def dimensions(): + """Dimension values.""" + width = 100 + height = 200 + return width, height diff --git a/e2e_projects/benchmark_1k/src/benchmark/operators.py b/e2e_projects/benchmark_1k/src/benchmark/operators.py new file mode 100644 index 00000000..fa35e4d2 --- /dev/null +++ b/e2e_projects/benchmark_1k/src/benchmark/operators.py @@ -0,0 +1,129 @@ +"""Operator mutation targets.""" + + +# === Arithmetic operators === + + +def add_sub_1(a, b): + """Addition and subtraction.""" + add = a + b + sub = a - b + return add, sub + + +def mul_div_1(a, b): + """Multiplication and division.""" + mul = a * b + div = a / b if b != 0 else 0 + return mul, div + + +def integer_ops_1(a, b): + """Integer operations.""" + floordiv = a // b if b != 0 else 0 + mod = a % b if b != 0 else 0 + return floordiv, mod + + +def mixed_arithmetic_1(a, b, c): + """Mixed arithmetic.""" + return a + b * c + + +# === Bitwise operators === + + +def bitwise_shift_1(a): + """Bit shift.""" + lshift = a << 1 + rshift = a >> 1 + return lshift, rshift + + +def bitwise_and_or_1(a, b): + """Bitwise AND/OR.""" + band = a & b + bor = a | b + return band, bor + + +# === Augmented assignment === + + +def augmented_add_sub(x): + """Augmented add/sub.""" + x += 1 + x -= 1 + return x + + +def augmented_in_loop(): + """Augmented assignment in loop.""" + total = 0 + for i in range(5): + total += i + return total + + +# === Unary operators === + + +def unary_not_1(flag): + """Unary not.""" + return not flag + + +def unary_invert_1(x): + """Unary invert.""" + return ~x + + +def unary_minus(x): + """Unary minus.""" + return -x + + +# === Additional arithmetic === + + +def add_sub_2(a, b, c): + """More addition and subtraction.""" + r1 = a + b + c + r2 = a - b - c + r3 = a + b - c + return r1, r2, r3 + + +def mul_div_2(a, b, c): + """More multiplication and division.""" + r1 = a * b * c + r2 = a / b / c if b != 0 and c != 0 else 0 + r3 = a * b / c if c != 0 else 0 + return r1, r2, r3 + + +def integer_ops_2(a, b): + """More integer operations.""" + r1 = a // 2 + r2 = a % 2 + r3 = a**2 + r4 = b // 3 + r5 = b % 3 + return r1, r2, r3, r4, r5 + + +def augmented_batch(value): + """Batch of augmented assignments.""" + value += 10 + value -= 5 + value *= 2 + value //= 3 + return value + + +def bitwise_xor_ops(a, b): + """Bitwise XOR operations.""" + r1 = a ^ b + r2 = a ^ 0xFF + r3 = b ^ 0x0F + return r1, r2, r3 diff --git a/e2e_projects/benchmark_1k/src/benchmark/returns.py b/e2e_projects/benchmark_1k/src/benchmark/returns.py new file mode 100644 index 00000000..f7c434de --- /dev/null +++ b/e2e_projects/benchmark_1k/src/benchmark/returns.py @@ -0,0 +1,108 @@ +"""Return/assignment mutation targets.""" + + +# === Simple return values === + + +def simple_return_integers(): + """Simple integer returns.""" + return 42 + + +# === Simple value assignments === + + +def assign_integers(): + """Integer assignments.""" + a = 1 + b = 2 + return a, b + + +def assign_strings(): + """String assignments.""" + a = "hello" + b = "world" + return a, b + + +def assign_lists(): + """List assignments.""" + a = [1, 2, 3] + return (a,) + + +def assign_mixed(): + """Mixed type assignments.""" + num = 42 + text = "answer" + return num, text + + +# === None assignments === + + +def assign_none_batch_1(): + """None assignments.""" + a = None + b = None + return a, b + + +# === Typed assignments === + + +def typed_int(): + """Typed integer assignments.""" + x: int = 42 + return (x,) + + +def typed_str(): + """Typed string assignments.""" + name: str = "test" + return (name,) + + +# === Lambdas returning values === + + +def lambda_integers(): + """Lambdas returning integers.""" + f1 = lambda: 1 # noqa: E731 + f2 = lambda: 2 # noqa: E731 + return f1, f2 + + +def lambda_strings(): + """Lambdas returning strings.""" + f1 = lambda: "hello" # noqa: E731 + return (f1,) + + +def lambda_with_args(): + """Lambdas with arguments.""" + f1 = lambda x: x + 1 # noqa: E731 + return (f1,) + + +# === Lambdas returning None === + + +def lambda_none_batch_1(): + """Lambdas returning None.""" + f1 = lambda: None # noqa: E731 + f2 = lambda: None # noqa: E731 + return f1, f2 + + +# === Conditional assignments === + + +def conditional_assign_1(flag): + """Conditional assignment.""" + if flag: + result = "yes" + else: + result = "no" + return result diff --git a/e2e_projects/benchmark_1k/src/benchmark/strings.py b/e2e_projects/benchmark_1k/src/benchmark/strings.py new file mode 100644 index 00000000..158ca2c5 --- /dev/null +++ b/e2e_projects/benchmark_1k/src/benchmark/strings.py @@ -0,0 +1,199 @@ +"""String mutation targets.""" + + +# === Simple strings === + + +def messages_batch_1(): + """Simple string literals.""" + a = "hello" + b = "world" + return a, b + + +def labels_batch_1(): + """Label strings.""" + a = "name" + b = "value" + return a, b + + +def states(): + """State strings.""" + a = "pending" + b = "active" + return a, b + + +# === f-strings === + + +def format_name(name): + """f-string with name.""" + return f"Name: {name}" + + +def format_count(count): + """f-string with count.""" + return f"Count: {count}" + + +def format_result(value, unit): + """f-string with multiple values.""" + return f"Result: {value} {unit}" + + +# === String method calls === + + +def case_methods_1(s): + """Case conversion.""" + lower = s.lower() + upper = s.upper() + return lower, upper + + +def strip_methods_1(s): + """Strip methods.""" + left = s.lstrip() + right = s.rstrip() + return left, right + + +def find_methods_1(s, sub): + """Find methods.""" + pos1 = s.find(sub) + pos2 = s.rfind(sub) + return pos1, pos2 + + +def split_methods_1(s, sep): + """Split methods.""" + parts1 = s.split(sep, 2) + parts2 = s.rsplit(sep, 2) + return parts1, parts2 + + +def partition_methods(s, sep): + """Partition methods.""" + p1 = s.partition(sep) + p2 = s.rpartition(sep) + return p1, p2 + + +# === Additional simple strings === + + +def messages_batch_2(): + """More string literals.""" + a = "start" + b = "stop" + c = "pause" + return a, b, c + + +def messages_batch_3(): + """Even more string literals.""" + a = "error" + b = "warning" + c = "info" + d = "debug" + return a, b, c, d + + +def symbols(): + """Symbol strings.""" + a = "alpha" + b = "beta" + c = "gamma" + return a, b, c + + +def keywords(): + """Keyword strings.""" + a = "true" + b = "false" + c = "null" + d = "undefined" + return a, b, c, d + + +# === Additional f-strings === + + +def format_error(code, message): + """f-string for error.""" + return f"Error {code}: {message}" + + +def format_coords(x, y): + """f-string for coordinates.""" + return f"({x}, {y})" + + +def format_path(directory, filename): + """f-string for path.""" + return f"{directory}/{filename}" + + +def format_greeting(title, name): + """f-string for greeting.""" + return f"Hello, {title} {name}!" + + +# === Additional string methods === + + +def case_methods_2(s): + """More case conversion.""" + title = s.title() + cap = s.capitalize() + swap = s.swapcase() + return title, cap, swap + + +def strip_methods_2(s, chars): + """Strip with chars.""" + left = s.lstrip(chars) + right = s.rstrip(chars) + both = s.strip(chars) + return left, right, both + + +def find_methods_2(s, sub, start): + """Find with start position.""" + pos1 = s.find(sub, start) + pos2 = s.rfind(sub, start) + return pos1, pos2 + + +def replace_methods(s, old, new): + """Replace methods.""" + r1 = s.replace(old, new) + r2 = s.replace(old, new, 1) + return r1, r2 + + +def justify_methods(s, width): + """Justify methods.""" + left = s.ljust(width) + right = s.rjust(width) + center = s.center(width) + return left, right, center + + +def index_methods(s, sub): + """Index methods.""" + try: + i1 = s.index(sub) + i2 = s.rindex(sub) + return i1, i2 + except ValueError: + return -1, -1 + + +def prefix_suffix_methods(s): + """Prefix/suffix removal.""" + r1 = s.removeprefix("pre_") + r2 = s.removesuffix("_suf") + return r1, r2 diff --git a/e2e_projects/benchmark_1k/tests/__init__.py b/e2e_projects/benchmark_1k/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/e2e_projects/benchmark_1k/tests/conftest.py b/e2e_projects/benchmark_1k/tests/conftest.py new file mode 100644 index 00000000..acf81987 --- /dev/null +++ b/e2e_projects/benchmark_1k/tests/conftest.py @@ -0,0 +1,31 @@ +""" +Pytest configuration for benchmark_1k tests. + +Simulates real-world test infrastructure costs: +- BENCHMARK_CONFTEST_DELAY: Time for fixture/plugin setup (default: 0.1s) +- BENCHMARK_IMPORT_DELAY: In src/benchmark/__init__.py for library imports +- BENCHMARK_TEST_DELAY: Per-test runtime with +/-10% gaussian jitter (default: 0.1) +""" + +import os +import random +import time + +import pytest + +# Simulate conftest.py costs: fixtures, plugins, pytest hooks +conftest_delay = float(os.environ.get("BENCHMARK_CONFTEST_DELAY", "0.1")) +if conftest_delay > 0: + time.sleep(conftest_delay) +_test_delay = float(os.environ.get("BENCHMARK_TEST_DELAY", "0.05")) + + +@pytest.fixture(autouse=True) +def benchmark_test_delay(): + """Add realistic per-test runtime variance.""" + if _test_delay > 0: + # Apply +/-10% gaussian jitter (std = 10% of mean) + jittered = random.gauss(_test_delay, _test_delay * 0.1) + # Clamp to 0.01s + time.sleep(max(0.01, jittered)) + yield diff --git a/e2e_projects/benchmark_1k/tests/test_arguments.py b/e2e_projects/benchmark_1k/tests/test_arguments.py new file mode 100644 index 00000000..6f35d105 --- /dev/null +++ b/e2e_projects/benchmark_1k/tests/test_arguments.py @@ -0,0 +1,56 @@ +"""Tests for arguments.py module.""" + +from benchmark import arguments + + +class TestArguments: + """Test argument functions.""" + + def test_combiner(self): + """Test combiner function.""" + # Normal case - both values provided + assert arguments.combiner("a", "b") == "a-b" + # First is None - should return None + assert arguments.combiner(None, "b") is None + # Second is None - should return None + assert arguments.combiner("a", None) is None + # Both None - should return None + assert arguments.combiner(None, None) is None + + def test_helper_2(self): + """Test helper_2.""" + assert arguments.helper_2(1, 2) == (1, 2) + + def test_helper_3(self): + """Test helper_3.""" + assert arguments.helper_3(1, 2, 3) == (1, 2, 3) + + def test_call_2args_batch_1(self): + """Test 2-arg calls.""" + result = arguments.call_2args_batch_1() + assert result[0] == (1, 2) + + def test_call_3args_batch_1(self): + """Test 3-arg calls.""" + result = arguments.call_3args_batch_1() + assert result[0] == (1, 2, 3) + + def test_dict_2keys_batch_1(self): + """Test dict with 2 keys.""" + result = arguments.dict_2keys_batch_1() + assert result[0] == {"a": 1, "b": 2} + + def test_dict_3keys_batch_1(self): + """Test dict with 3 keys.""" + result = arguments.dict_3keys_batch_1() + assert result[0] == {"x": 1, "y": 2, "z": 3} + + def test_string_method_calls(self): + """Test string method calls.""" + result = arguments.string_method_calls() + assert result[0] == ["a", "b", "c-d-e"] + + def test_format_calls(self): + """Test format calls.""" + result = arguments.format_calls() + assert result[0] == "hello world" diff --git a/e2e_projects/benchmark_1k/tests/test_booleans.py b/e2e_projects/benchmark_1k/tests/test_booleans.py new file mode 100644 index 00000000..3417d09d --- /dev/null +++ b/e2e_projects/benchmark_1k/tests/test_booleans.py @@ -0,0 +1,201 @@ +"""Tests for booleans.py module.""" + +from benchmark import booleans + + +class TestBooleans: + """Test boolean functions.""" + + def test_flags_batch_1(self): + """Strong test - checks all values.""" + enabled, disabled, active, paused = booleans.flags_batch_1() + assert enabled is True + assert disabled is False + assert active is True + assert paused is False + + def test_flags_batch_2(self): + """Strong test - checks all values.""" + visible, hidden, selected, focused = booleans.flags_batch_2() + assert visible is True + assert hidden is False + assert selected is True + assert focused is False + + def test_flags_batch_3(self): + """Strong test - checks all values.""" + running, stopped, ready, waiting = booleans.flags_batch_3() + assert running is True + assert stopped is False + assert ready is True + assert waiting is False + + def test_flags_batch_4(self): + """Strong test - checks all values.""" + valid, invalid, complete, pending = booleans.flags_batch_4() + assert valid is True + assert invalid is False + assert complete is True + assert pending is False + + def test_conditional_returns_1(self): + """Strong test.""" + assert booleans.conditional_returns_1(5) is True + assert booleans.conditional_returns_1(-5) is False + + def test_conditional_returns_2(self): + """Strong test - checks all paths.""" + assert booleans.conditional_returns_2(5, 5) is True # x == y + assert booleans.conditional_returns_2(10, 5) is False # x > y + assert booleans.conditional_returns_2(3, 5) is True # x < y + + def test_default_values(self): + """Strong test - checks all values.""" + debug, verbose, quiet, strict = booleans.default_values() + assert debug is False + assert verbose is False + assert quiet is True + assert strict is True + + def test_config_flags(self): + """Strong test - checks all values.""" + auto_save, auto_load, cache_enabled, logging_enabled = booleans.config_flags() + assert auto_save is True + assert auto_load is False + assert cache_enabled is True + assert logging_enabled is False + + def test_feature_flags(self): + """Strong test - checks all values.""" + a, b, c, d = booleans.feature_flags() + assert a is True + assert b is False + assert c is True + assert d is False + + def test_logical_and_simple(self): + """Strong test.""" + assert booleans.logical_and_simple(True, True) is True + assert booleans.logical_and_simple(True, False) is False + + def test_logical_or_simple(self): + """Strong test.""" + assert booleans.logical_or_simple(False, True) is True + assert booleans.logical_or_simple(False, False) is False + + def test_logical_and_chain_1(self): + """Strong test - distinguishes and from or.""" + assert booleans.logical_and_chain_1(True, True, True) is True + # This would be True if any 'and' became 'or' + assert booleans.logical_and_chain_1(False, True, True) is False + assert booleans.logical_and_chain_1(True, False, True) is False + + def test_logical_and_chain_2(self): + """Weak test.""" + result = booleans.logical_and_chain_2(True, True, True, False) + assert result is False + + def test_logical_or_chain_1(self): + """Strong test - distinguishes or from and.""" + assert booleans.logical_or_chain_1(False, False, True) is True + # This would be False if any 'or' became 'and' + assert booleans.logical_or_chain_1(True, False, False) is True + assert booleans.logical_or_chain_1(False, True, False) is True + assert booleans.logical_or_chain_1(False, False, False) is False + + def test_logical_or_chain_2(self): + """Strong test - distinguishes or from and.""" + assert booleans.logical_or_chain_2(False, False, False, False) is False + # These would fail if 'or' became 'and' + assert booleans.logical_or_chain_2(True, False, False, False) is True + assert booleans.logical_or_chain_2(False, True, False, False) is True + assert booleans.logical_or_chain_2(False, False, True, False) is True + assert booleans.logical_or_chain_2(False, False, False, True) is True + + def test_mixed_logic_1(self): + """Strong test - (a and b) or (c and d).""" + # True when a and b are both True + assert booleans.mixed_logic_1(True, True, False, False) is True + # True when c and d are both True + assert booleans.mixed_logic_1(False, False, True, True) is True + # False when neither pair is both True + assert booleans.mixed_logic_1(True, False, True, False) is False + assert booleans.mixed_logic_1(False, True, False, True) is False + + def test_mixed_logic_2(self): + """Strong test - (a or b) and (c or d).""" + # True when both pairs have at least one True + assert booleans.mixed_logic_2(True, False, True, False) is True + assert booleans.mixed_logic_2(False, True, False, True) is True + # False when first pair has no True + assert booleans.mixed_logic_2(False, False, True, True) is False + # False when second pair has no True + assert booleans.mixed_logic_2(True, True, False, False) is False + + def test_mixed_logic_3(self): + """Strong test - a and b or c (precedence: (a and b) or c).""" + assert booleans.mixed_logic_3(True, True, False) is True # (T and T) or F = T + assert booleans.mixed_logic_3(False, True, True) is True # (F and T) or T = T + assert booleans.mixed_logic_3(True, False, False) is False # (T and F) or F = F + # This catches if 'and' becomes 'or': True or False or False = True + assert booleans.mixed_logic_3(False, False, False) is False + + def test_mixed_logic_4(self): + """Strong test - a or b and c (precedence: a or (b and c)).""" + assert booleans.mixed_logic_4(False, True, True) is True # F or (T and T) = T + assert booleans.mixed_logic_4(True, False, False) is True # T or (F and F) = T + assert booleans.mixed_logic_4(False, True, False) is False # F or (T and F) = F + assert booleans.mixed_logic_4(False, False, True) is False # F or (F and T) = F + + def test_condition_with_and(self): + """Strong test - detects and/or and comparison mutations.""" + # All positive: first condition True, second condition True, result stays True + assert booleans.condition_with_and(1, 1, 1) is True + # x not > 0: first condition fails, second condition (y>0 and z>0) True, result = False and True = False + assert booleans.condition_with_and(0, 1, 1) is False + # y not > 0: both conditions fail + assert booleans.condition_with_and(1, 0, 1) is False + # y > 0, z not > 0: first True, second fails, result stays True + assert booleans.condition_with_and(1, 1, 0) is True + # All zero: both conditions fail + assert booleans.condition_with_and(0, 0, 0) is False + + def test_condition_with_or(self): + """Strong test - detects and/or mutations.""" + # x > 0: first or condition True + assert booleans.condition_with_or(1, 0, 0) is True + # y > 0: first or condition True + assert booleans.condition_with_or(0, 1, 0) is True + # Neither x nor y > 0: first or condition False, result stays True from init + assert booleans.condition_with_or(0, 0, 0) is True + # y < 0 or z < 0: second or condition (result or False stays same) + assert booleans.condition_with_or(-1, -1, 0) is True # -1 < 0 is True + + def test_complex_condition_1(self): + """Strong test - (a > 0 and b > 0) or (c > 0 and d > 0).""" + # First pair True + assert booleans.complex_condition_1(1, 1, 0, 0) is True + # Second pair True + assert booleans.complex_condition_1(0, 0, 1, 1) is True + # Neither pair True + assert booleans.complex_condition_1(1, 0, 1, 0) is False + assert booleans.complex_condition_1(0, 1, 0, 1) is False + # All zero + assert booleans.complex_condition_1(0, 0, 0, 0) is False + + def test_guard_clauses(self): + """Strong test.""" + assert booleans.guard_clauses(5, 0, 10, True) is True + assert booleans.guard_clauses(None, 0, 10, False) is True + assert booleans.guard_clauses(15, 0, 10, True) is False + + def test_validation_flags(self): + """Test validation flags.""" + has_contact, is_complete, is_valid, can_proceed, needs_review = booleans.validation_flags( + has_name=True, has_email=True, has_phone=False, is_verified=True, is_active=True + ) + assert has_contact is True + assert is_complete is True + assert is_valid is True + assert can_proceed is True + assert needs_review is False diff --git a/e2e_projects/benchmark_1k/tests/test_comparisons.py b/e2e_projects/benchmark_1k/tests/test_comparisons.py new file mode 100644 index 00000000..57b71416 --- /dev/null +++ b/e2e_projects/benchmark_1k/tests/test_comparisons.py @@ -0,0 +1,269 @@ +"""Tests for comparisons.py module.""" + +from benchmark import comparisons + + +class TestComparisons: + """Test comparison functions.""" + + def test_equality_simple(self): + """Strong test.""" + eq, neq = comparisons.equality_simple(5, 5) + assert eq is True + assert neq is False + + def test_equality_batch_1(self): + """Strong test - checks all return values.""" + r1, r2, r3 = comparisons.equality_batch_1(1, 1, 2) + assert r1 is True # 1 == 1 + assert r2 is False # 1 == 2 + assert r3 is True # 1 != 2 + + def test_equality_with_literals(self): + """Strong test - checks all values.""" + result = comparisons.equality_with_literals(0) + assert result[0] is True # is_zero + assert result[1] is False # is_one + assert result[2] is False # not_zero + assert result[3] is True # not_one + # Also test with 1 to catch == 1 / != 1 mutations + result1 = comparisons.equality_with_literals(1) + assert result1[1] is True # is_one + + def test_equality_strings(self): + """Strong test - checks all values.""" + result = comparisons.equality_strings("") + assert result[0] is True # is_empty + assert result[1] is False # is_hello + assert result[2] is False # not_empty + # Test with "hello" to catch == "hello" mutation + result_hello = comparisons.equality_strings("hello") + assert result_hello[1] is True # is_hello + assert result_hello[2] is True # not_empty + + def test_less_than_simple(self): + """Strong test - tests boundary to distinguish < from <=.""" + lt, le = comparisons.less_than_simple(3, 5) + assert lt is True + assert le is True + # Test at boundary: 5, 5 - lt should be False, le should be True + lt_eq, le_eq = comparisons.less_than_simple(5, 5) + assert lt_eq is False # 5 < 5 is False + assert le_eq is True # 5 <= 5 is True + + def test_less_than_batch_1(self): + """Strong test - checks all values and boundaries.""" + result = comparisons.less_than_batch_1(1, 2, 3) + assert result[0] is True # 1 < 2 + assert result[1] is True # 2 < 3 + assert result[2] is True # 1 <= 3 + # Test boundary to distinguish < from <= + result_eq = comparisons.less_than_batch_1(2, 2, 2) + assert result_eq[0] is False # 2 < 2 is False + assert result_eq[1] is False # 2 < 2 is False + assert result_eq[2] is True # 2 <= 2 is True + + def test_less_than_batch_2(self): + """Strong test - checks boundary.""" + below, at_or_below = comparisons.less_than_batch_2(5, 10) + assert below is True + assert at_or_below is True + # Test at boundary to distinguish < from <= + below_eq, at_eq = comparisons.less_than_batch_2(10, 10) + assert below_eq is False # 10 < 10 is False + assert at_eq is True # 10 <= 10 is True + + def test_less_than_literals(self): + """Strong test - checks boundaries.""" + result = comparisons.less_than_literals(-1) + assert result[0] is True # lt_zero: -1 < 0 + assert result[1] is True # lt_ten: -1 < 10 + assert result[2] is True # le_zero: -1 <= 0 + # Test at boundary 0 to distinguish < from <= + result_zero = comparisons.less_than_literals(0) + assert result_zero[0] is False # 0 < 0 is False + assert result_zero[2] is True # 0 <= 0 is True + + def test_greater_than_simple(self): + """Strong test - tests boundary.""" + gt, ge = comparisons.greater_than_simple(5, 3) + assert gt is True + assert ge is True + # Test at boundary to distinguish > from >= + gt_eq, ge_eq = comparisons.greater_than_simple(5, 5) + assert gt_eq is False # 5 > 5 is False + assert ge_eq is True # 5 >= 5 is True + + def test_greater_than_batch_1(self): + """Strong test - checks all values and boundary.""" + result = comparisons.greater_than_batch_1(3, 2, 1) + assert result[0] is True # 3 > 2 + assert result[1] is True # 2 > 1 + assert result[2] is True # 3 >= 1 + # Test boundary to distinguish > from >= + result_eq = comparisons.greater_than_batch_1(2, 2, 2) + assert result_eq[0] is False # 2 > 2 is False + assert result_eq[1] is False # 2 > 2 is False + assert result_eq[2] is True # 2 >= 2 is True + + def test_greater_than_batch_2(self): + """Strong test - checks boundary.""" + above, at_or_above = comparisons.greater_than_batch_2(15, 10) + assert above is True + assert at_or_above is True + # Test at boundary to distinguish > from >= + above_eq, at_eq = comparisons.greater_than_batch_2(10, 10) + assert above_eq is False # 10 > 10 is False + assert at_eq is True # 10 >= 10 is True + + def test_greater_than_literals(self): + """Strong test - checks boundaries.""" + result = comparisons.greater_than_literals(5) + assert result[0] is True # gt_zero: 5 > 0 + assert result[1] is False # gt_ten: 5 > 10 is False + assert result[2] is True # ge_zero: 5 >= 0 + # Test at boundary 0 to distinguish > from >= + result_zero = comparisons.greater_than_literals(0) + assert result_zero[0] is False # 0 > 0 is False + assert result_zero[2] is True # 0 >= 0 is True + + def test_identity_none(self): + """Strong test.""" + is_none, is_not_none = comparisons.identity_none(None) + assert is_none is True + assert is_not_none is False + + def test_identity_batch_1(self): + """Strong test - checks both values.""" + obj = object() + same, different = comparisons.identity_batch_1(obj, obj) + assert same is True + assert different is False + # Test with different objects + obj2 = object() + same2, different2 = comparisons.identity_batch_1(obj, obj2) + assert same2 is False + assert different2 is True + + def test_identity_checks(self): + """Coverage test.""" + result = comparisons.identity_checks(5, 10) + assert result == 5 + + def test_membership_simple(self): + """Strong test.""" + present, absent = comparisons.membership_simple(2, [1, 2, 3]) + assert present is True + assert absent is False + + def test_membership_batch_1(self): + """Strong test - checks both values.""" + r1, r2 = comparisons.membership_batch_1(1, [1, 2, 3]) + assert r1 is True # 1 in [1, 2, 3] + assert r2 is False # 1 not in [1, 2, 3] is False + # Test with missing item + r1_missing, r2_missing = comparisons.membership_batch_1(99, [1, 2, 3]) + assert r1_missing is False # 99 in [1, 2, 3] is False + assert r2_missing is True # 99 not in [1, 2, 3] + + def test_membership_string(self): + """Strong test.""" + found, not_found = comparisons.membership_string("a", "abc") + assert found is True + assert not_found is False + + def test_membership_dict(self): + """Strong test.""" + has_key, missing_key = comparisons.membership_dict("a", {"a": 1}) + assert has_key is True + assert missing_key is False + + def test_boundary_check_1(self): + """Strong test - tests all boundaries.""" + assert comparisons.boundary_check_1(-1) == "negative" + assert comparisons.boundary_check_1(0) == "zero" + assert comparisons.boundary_check_1(5) == "small" + assert comparisons.boundary_check_1(10) == "small" # boundary: <= 10 + assert comparisons.boundary_check_1(11) == "medium" # boundary: > 10, < 100 + assert comparisons.boundary_check_1(99) == "medium" # boundary: < 100 + assert comparisons.boundary_check_1(100) == "large" # boundary: >= 100 + + def test_boundary_check_2(self): + """Strong test - tests all cases.""" + assert comparisons.boundary_check_2(-1, 0, 10) == "below" # < low + assert comparisons.boundary_check_2(15, 0, 10) == "above" # > high + assert comparisons.boundary_check_2(0, 0, 10) == "at_low" # == low + assert comparisons.boundary_check_2(10, 0, 10) == "at_high" # == high + assert comparisons.boundary_check_2(5, 0, 10) == "within" # in range + + def test_range_check(self): + """Strong test - tests boundaries.""" + assert comparisons.range_check(5, 0, 10) is True # within + assert comparisons.range_check(0, 0, 10) is True # at min (>= min_val) + assert comparisons.range_check(10, 0, 10) is True # at max (<= max_val) + assert comparisons.range_check(-1, 0, 10) is False # below min + assert comparisons.range_check(11, 0, 10) is False # above max + + def test_compare_all(self): + """Strong test - checks all comparison results.""" + result = comparisons.compare_all(5, 3) + assert result["eq"] is False # 5 == 3 + assert result["ne"] is True # 5 != 3 + assert result["lt"] is False # 5 < 3 + assert result["le"] is False # 5 <= 3 + assert result["gt"] is True # 5 > 3 + assert result["ge"] is True # 5 >= 3 + # Test boundary to distinguish < from <=, > from >= + result_eq = comparisons.compare_all(5, 5) + assert result_eq["eq"] is True + assert result_eq["lt"] is False # 5 < 5 + assert result_eq["le"] is True # 5 <= 5 + assert result_eq["gt"] is False # 5 > 5 + assert result_eq["ge"] is True # 5 >= 5 + + def test_chained_comparisons(self): + """Strong test - tests boundaries.""" + in_lower, in_upper, below, above = comparisons.chained_comparisons(5, 0, 10, 20) + assert in_lower is True # 0 <= 5 < 10 + assert in_upper is False # 10 <= 5 <= 20 is False + assert below is False + assert above is False + # Test at boundaries + # x=0: 0 <= 0 < 10 is True + in_lower_0, _, _, _ = comparisons.chained_comparisons(0, 0, 10, 20) + assert in_lower_0 is True + # x=10: 0 <= 10 < 10 is False (< 10 fails), 10 <= 10 <= 20 is True + in_lower_10, in_upper_10, _, _ = comparisons.chained_comparisons(10, 0, 10, 20) + assert in_lower_10 is False # boundary: < 10 fails + assert in_upper_10 is True # 10 <= 10 <= 20 + # Test below/above + _, _, below_neg, _ = comparisons.chained_comparisons(-5, 0, 10, 20) + assert below_neg is True + _, _, _, above_30 = comparisons.chained_comparisons(30, 0, 10, 20) + assert above_30 is True + + def test_multi_condition_check(self): + """Strong test - tests boundaries and all paths.""" + all_above, any_above, all_equal, none_below = comparisons.multi_condition_check(5, 10, 15, 3) + assert all_above is True # all > 3 + assert any_above is True + assert all_equal is False # 5 != 10 != 15 + assert none_below is True # all >= 3 + # Test at threshold boundary (>= vs >) + all_above_t, any_above_t, _, none_below_t = comparisons.multi_condition_check(3, 3, 3, 3) + assert all_above_t is False # 3 > 3 is False + assert any_above_t is False # none > 3 + assert none_below_t is True # all >= 3 + # Test with one above threshold + all_above_one, any_above_one, _, _ = comparisons.multi_condition_check(2, 2, 5, 3) + assert all_above_one is False # not all > 3 + assert any_above_one is True # 5 > 3 + # Test all equal + _, _, all_eq, _ = comparisons.multi_condition_check(5, 5, 5, 0) + assert all_eq is True + + def test_sorted_check(self): + """Test sorted checks.""" + asc, desc = comparisons.sorted_check(1, 2, 3) + assert asc is True + assert desc is False diff --git a/e2e_projects/benchmark_1k/tests/test_complex.py b/e2e_projects/benchmark_1k/tests/test_complex.py new file mode 100644 index 00000000..a21d5fbe --- /dev/null +++ b/e2e_projects/benchmark_1k/tests/test_complex.py @@ -0,0 +1,121 @@ +"""Tests for complex.py module.""" + +from benchmark import complex + + +class TestComplex: + """Test complex call patterns.""" + + def test_chain1_entry(self): + """Strong test - exercises 10-level deep call chain.""" + result = complex.chain1_entry(0) + assert result == 20 # (0 + 1*10) * 2 + + def test_factorial_tail(self): + """Strong test.""" + assert complex.factorial_tail(5) == 120 + assert complex.factorial_tail(0) == 1 + assert complex.factorial_tail(1) == 1 + + def test_sum_tail(self): + """Strong test.""" + assert complex.sum_tail(10) == 55 + + def test_power_tail(self): + """Strong test.""" + assert complex.power_tail(2, 3) == 8 + assert complex.power_tail(3, 2) == 9 + + def test_gcd_tail(self): + """Strong test.""" + assert complex.gcd_tail(48, 18) == 6 + + def test_fibonacci(self): + """Strong test.""" + assert complex.fibonacci(0) == 0 + assert complex.fibonacci(1) == 1 + assert complex.fibonacci(10) == 55 + + def test_flatten(self): + """Strong test.""" + assert complex.flatten([1, [2, 3], [4, [5]]]) == [1, 2, 3, 4, 5] + + def test_is_even(self): + """Strong test.""" + assert complex.is_even(4) is True + assert complex.is_even(3) is False + + def test_is_odd(self): + """Strong test.""" + assert complex.is_odd(3) is True + assert complex.is_odd(4) is False + + def test_descend_a(self): + """Strong test - checks exact value.""" + # 5 -> b(4, 1) -> a(3, 3) -> b(2, 4) -> a(1, 6) -> b(0, 7) -> returns 7 + assert complex.descend_a(5) == 7 + # boundary: n=0 should return acc immediately + assert complex.descend_a(0) == 0 + + def test_apply_twice(self): + """Strong test.""" + assert complex.apply_twice(lambda x: x + 1, 0) == 2 + + def test_apply_n_times(self): + """Strong test.""" + assert complex.apply_n_times(lambda x: x * 2, 1, 3) == 8 + + def test_compose(self): + """Strong test.""" + f = complex.compose(lambda x: x + 1, lambda x: x * 2) + assert f(3) == 7 # (3 * 2) + 1 + + def test_map_reduce(self): + """Strong test.""" + result = complex.map_reduce([1, 2, 3], lambda x: x * 2, lambda acc, x: acc + x, 0) + assert result == 12 # (1*2) + (2*2) + (3*2) + + def test_with_callback(self): + """Strong test.""" + result = complex.with_callback("data", lambda d: f"success: {d}", lambda e: f"error: {e}") + assert result == "success: data" + + def test_nested_loops(self): + """Strong test - checks exact values.""" + # [[1, 2], [3, 4]] -> 1*2 + 2*2 + 3*2 + 4*2 = 20 + assert complex.nested_loops([[1, 2], [3, 4]]) == 20 + # Test with negative values: -1+1 + -2+1 = 0 + -1 = -1 + assert complex.nested_loops([[-1, -2]]) == -1 + # Test boundary: 0 is not > 0, so uses else branch: 0+1 = 1 + assert complex.nested_loops([[0]]) == 1 + + def test_nested_conditions(self): + """Strong test - tests all paths.""" + # x>0, y>0, z>0: x+y+z + assert complex.nested_conditions(1, 1, 1) == 3 + # x>0, y>0, z<=0: x+y-z + assert complex.nested_conditions(1, 1, -1) == 3 # 1+1-(-1)=3 + # x>0, y<=0, z>0: x-y+z + assert complex.nested_conditions(1, -1, 1) == 3 # 1-(-1)+1=3 + # x>0, y<=0, z<=0: x-y-z + assert complex.nested_conditions(1, -1, -1) == 3 # 1-(-1)-(-1)=3 + # x<=0, y>0: y+z + assert complex.nested_conditions(-1, 1, 1) == 2 + # x<=0, y<=0: z + assert complex.nested_conditions(-1, -1, 5) == 5 + # Test boundary: x=0 takes else branch + assert complex.nested_conditions(0, 1, 1) == 2 + + def test_accumulate_with_filter(self): + """Strong test.""" + result = complex.accumulate_with_filter([1, 2, 3, 4, 5], lambda x: x % 2 == 0, lambda x: x * 10) + assert result == 60 # (2*10) + (4*10) + + def test_calculate_backoff(self): + """Strong test - exponential backoff calculation.""" + assert complex.calculate_backoff(0) == 0.0 + assert complex.calculate_backoff(1) == 1.0 + assert complex.calculate_backoff(2) == 2.0 + assert complex.calculate_backoff(3) == 4.0 + # Test max_delay cap + assert complex.calculate_backoff(10, max_delay=10.0) == 10.0 diff --git a/e2e_projects/benchmark_1k/tests/test_numbers.py b/e2e_projects/benchmark_1k/tests/test_numbers.py new file mode 100644 index 00000000..2883a43e --- /dev/null +++ b/e2e_projects/benchmark_1k/tests/test_numbers.py @@ -0,0 +1,56 @@ +"""Tests for numbers.py module.""" + +from benchmark import numbers + + +class TestNumbers: + """Test number-heavy functions.""" + + def test_constants_batch_1(self): + """Test constants.""" + result = numbers.constants_batch_1() + assert result == 3 # 0+1+2 + + def test_float_constants_1(self): + """Test float constants.""" + result = numbers.float_constants_1() + assert 1.5 < result < 2.5 + + def test_negative_constants(self): + """Test negative constants.""" + result = numbers.negative_constants() + assert result < 0 + + def test_arithmetic_simple(self): + """Test arithmetic.""" + assert numbers.arithmetic_simple(0) == 1 # 0+1 + + def test_loop_range_1(self): + """Test loop range.""" + result = numbers.loop_range_1() + assert result == 15 # sum(i+1 for i in range(5)) + + def test_threshold_check_1(self): + """Test threshold check.""" + assert numbers.threshold_check_1(-1) == 0 + assert numbers.threshold_check_1(5) == 1 + + def test_array_indices(self): + """Test array indices.""" + assert numbers.array_indices([1, 2, 3, 4]) == 3 # items[0]+items[1] + + def test_multipliers(self): + """Test multipliers.""" + result = numbers.multipliers(10) + assert result == 50 # 10*2 + 10*3 = 50 + + def test_offsets(self): + """Test offsets.""" + result = numbers.offsets(100) + assert len(result) == 1 + assert result[0] == 101 + + def test_dimensions(self): + """Test dimensions.""" + result = numbers.dimensions() + assert result == (100, 200) diff --git a/e2e_projects/benchmark_1k/tests/test_operators.py b/e2e_projects/benchmark_1k/tests/test_operators.py new file mode 100644 index 00000000..28965f2e --- /dev/null +++ b/e2e_projects/benchmark_1k/tests/test_operators.py @@ -0,0 +1,94 @@ +"""Tests for operators.py module.""" + +from benchmark import operators + + +class TestOperators: + """Test operator functions.""" + + def test_add_sub_1(self): + """Test add/sub.""" + add, sub = operators.add_sub_1(10, 3) + assert add == 13 + assert sub == 7 + + def test_mul_div_1(self): + """Test mul/div.""" + mul, div = operators.mul_div_1(10, 2) + assert mul == 20 + assert div == 5 + + def test_integer_ops_1(self): + """Test integer ops.""" + floordiv, mod = operators.integer_ops_1(10, 3) + assert floordiv == 3 + assert mod == 1 + + def test_mixed_arithmetic_1(self): + """Test mixed arithmetic.""" + assert operators.mixed_arithmetic_1(2, 3, 4) == 14 # 2 + 3*4 + + def test_bitwise_shift_1(self): + """Test bitwise shift.""" + lshift, rshift = operators.bitwise_shift_1(4) + assert lshift == 8 + assert rshift == 2 + + def test_bitwise_and_or_1(self): + """Test bitwise and/or.""" + band, bor = operators.bitwise_and_or_1(0b1100, 0b1010) + assert band == 0b1000 + assert bor == 0b1110 + + def test_augmented_add_sub(self): + """Test augmented add/sub.""" + result = operators.augmented_add_sub(10) + assert result == 10 # 10 + 1 - 1 + + def test_augmented_in_loop(self): + """Test augmented in loop.""" + result = operators.augmented_in_loop() + assert result == 10 # sum(range(5)) + + def test_unary_not_1(self): + """Test unary not.""" + assert operators.unary_not_1(True) is False + assert operators.unary_not_1(False) is True + + def test_unary_invert_1(self): + """Test unary invert.""" + assert operators.unary_invert_1(0) == -1 + + def test_unary_minus(self): + """Test unary minus.""" + assert operators.unary_minus(5) == -5 + + def test_add_sub_2(self): + """Test more add/sub.""" + r1, r2, r3 = operators.add_sub_2(10, 5, 3) + assert r1 == 18 # 10+5+3 + assert r2 == 2 # 10-5-3 + assert r3 == 12 # 10+5-3 + + def test_mul_div_2(self): + """Test more mul/div.""" + r1, r2, r3 = operators.mul_div_2(2, 3, 4) + assert r1 == 24 # 2*3*4 + assert r3 == 1.5 # 2*3/4 + + def test_integer_ops_2(self): + """Test more integer ops.""" + r1, r2, r3, r4, r5 = operators.integer_ops_2(10, 11) + assert r1 == 5 # 10 // 2 + assert r2 == 0 # 10 % 2 + assert r3 == 100 # 10 ** 2 + + def test_augmented_batch(self): + """Test augmented batch.""" + result = operators.augmented_batch(10) + assert result == 10 # (10+10-5)*2//3 = 30//3 = 10 + + def test_bitwise_xor_ops(self): + """Test bitwise XOR.""" + r1, r2, r3 = operators.bitwise_xor_ops(0b1010, 0b1100) + assert r1 == 0b0110 # 1010 ^ 1100 diff --git a/e2e_projects/benchmark_1k/tests/test_returns.py b/e2e_projects/benchmark_1k/tests/test_returns.py new file mode 100644 index 00000000..84b98a2c --- /dev/null +++ b/e2e_projects/benchmark_1k/tests/test_returns.py @@ -0,0 +1,72 @@ +"""Tests for returns.py module.""" + +from benchmark import returns + + +class TestReturns: + """Test return/assignment functions.""" + + def test_simple_return_integers(self): + """Test simple integer return.""" + assert returns.simple_return_integers() == 42 + + def test_assign_integers(self): + """Test integer assignments.""" + result = returns.assign_integers() + assert result == (1, 2) + + def test_assign_strings(self): + """Test string assignments.""" + result = returns.assign_strings() + assert result[0] == "hello" + + def test_assign_lists(self): + """Test list assignments.""" + result = returns.assign_lists() + assert result[0] == [1, 2, 3] + + def test_assign_mixed(self): + """Test mixed assignments.""" + result = returns.assign_mixed() + assert result == (42, "answer") + + def test_assign_none_batch_1(self): + """Test None assignments.""" + result = returns.assign_none_batch_1() + assert all(r is None for r in result) + + def test_typed_int(self): + """Test typed int.""" + result = returns.typed_int() + assert result[0] == 42 + + def test_typed_str(self): + """Test typed str.""" + result = returns.typed_str() + assert result[0] == "test" + + def test_lambda_integers(self): + """Test lambda integers.""" + f1, f2 = returns.lambda_integers() + assert f1() == 1 + assert f2() == 2 + + def test_lambda_strings(self): + """Test lambda strings.""" + result = returns.lambda_strings() + assert result[0]() == "hello" + + def test_lambda_with_args(self): + """Test lambda with args.""" + result = returns.lambda_with_args() + assert result[0](5) == 6 + + def test_lambda_none_batch_1(self): + """Test lambda None.""" + f1, f2 = returns.lambda_none_batch_1() + assert f1() is None + + def test_conditional_assign_1(self): + """Test conditional assignment.""" + assert returns.conditional_assign_1(True) == "yes" + assert returns.conditional_assign_1(False) == "no" diff --git a/e2e_projects/benchmark_1k/tests/test_strings.py b/e2e_projects/benchmark_1k/tests/test_strings.py new file mode 100644 index 00000000..3071646a --- /dev/null +++ b/e2e_projects/benchmark_1k/tests/test_strings.py @@ -0,0 +1,142 @@ +"""Tests for strings.py module.""" + +from benchmark import strings + + +class TestStrings: + """Test string-heavy functions.""" + + def test_messages_batch_1(self): + """Test message strings.""" + result = strings.messages_batch_1() + assert result == ("hello", "world") + + def test_labels_batch_1(self): + """Test label strings.""" + result = strings.labels_batch_1() + assert result[0] == "name" + + def test_states(self): + """Test state strings.""" + result = strings.states() + assert result == ("pending", "active") + + def test_format_name(self): + """Test f-string with name.""" + assert strings.format_name("Alice") == "Name: Alice" + + def test_format_count(self): + """Test f-string with count.""" + assert strings.format_count(42) == "Count: 42" + + def test_format_result(self): + """Test f-string with result.""" + assert strings.format_result(10, "kg") == "Result: 10 kg" + + def test_case_methods_1(self): + """Test case methods.""" + lower, upper = strings.case_methods_1("HeLLo") + assert lower == "hello" + assert upper == "HELLO" + + def test_strip_methods_1(self): + """Test strip methods.""" + left, right = strings.strip_methods_1(" hello ") + assert left == "hello " + assert right == " hello" + + def test_find_methods_1(self): + """Test find methods.""" + pos1, pos2 = strings.find_methods_1("hello world hello", "hello") + assert pos1 == 0 + assert pos2 == 12 + + def test_split_methods_1(self): + """Test split methods.""" + parts1, parts2 = strings.split_methods_1("a-b-c-d", "-") + assert parts1 == ["a", "b", "c-d"] + assert parts2 == ["a-b", "c", "d"] + + def test_partition_methods(self): + """Test partition methods.""" + p1, p2 = strings.partition_methods("hello-world", "-") + assert p1 == ("hello", "-", "world") + assert p2 == ("hello", "-", "world") + + def test_messages_batch_2(self): + """Test batch 2 strings.""" + result = strings.messages_batch_2() + assert result == ("start", "stop", "pause") + + def test_messages_batch_3(self): + """Test batch 3 strings.""" + result = strings.messages_batch_3() + assert result[0] == "error" + + def test_symbols(self): + """Test symbol strings.""" + result = strings.symbols() + assert result == ("alpha", "beta", "gamma") + + def test_keywords(self): + """Test keyword strings.""" + result = strings.keywords() + assert "true" in result + + def test_format_error(self): + """Test error f-string.""" + assert strings.format_error(404, "Not Found") == "Error 404: Not Found" + + def test_format_coords(self): + """Test coords f-string.""" + assert strings.format_coords(1, 2) == "(1, 2)" + + def test_format_path(self): + """Test path f-string.""" + assert strings.format_path("/home", "file.txt") == "/home/file.txt" + + def test_format_greeting(self): + """Test greeting f-string.""" + assert strings.format_greeting("Dr", "Smith") == "Hello, Dr Smith!" + + def test_case_methods_2(self): + """Test more case methods.""" + title, cap, swap = strings.case_methods_2("hELLO") + assert title == "Hello" + assert cap == "Hello" + + def test_strip_methods_2(self): + """Test strip with chars.""" + left, right, both = strings.strip_methods_2("xxhelloxx", "x") + assert left == "helloxx" + assert right == "xxhello" + assert both == "hello" + + def test_find_methods_2(self): + """Test find with start.""" + pos1, pos2 = strings.find_methods_2("hello world hello", "hello", 1) + assert pos1 == 12 + + def test_replace_methods(self): + """Test replace methods.""" + r1, r2 = strings.replace_methods("a-b-c", "-", "_") + assert r1 == "a_b_c" + assert r2 == "a_b-c" + + def test_justify_methods(self): + """Test justify methods.""" + left, right, center = strings.justify_methods("hi", 5) + assert len(left) == 5 + assert len(right) == 5 + + def test_index_methods(self): + """Test index methods.""" + i1, i2 = strings.index_methods("hello world hello", "hello") + assert i1 == 0 + assert i2 == 12 + + def test_prefix_suffix_methods(self): + """Test prefix/suffix removal.""" + r1, r2 = strings.prefix_suffix_methods("pre_test_suf") + assert r1 == "test_suf" + assert r2 == "pre_test" From 8e4a9e0736d38d8cee95aa26faa83f2729f361bb Mon Sep 17 00:00:00 2001 From: nicklafleur <55208706+nicklafleur@users.noreply.github.com> Date: Fri, 5 Jun 2026 23:22:07 -0400 Subject: [PATCH 4/8] feat: invalidate cache on config and dependency changes Cached verdicts were only invalidated when a function body changed, so changes to config or dependency files silently produced stale results. - Config.config_fingerprint() hashes result-affecting config, grouped so we reset only what each change can affect: - timeout change -> reset only timeout verdicts - type_check_command change -> reset mutants whose type-check status flips (symmetric difference of old exit-37 and newly-caught) - pytest_add_cli_args / test-selection change -> reset all results and force full stats recollection - set-affecting config (source_paths, only_mutate, ...) is ignored: new mutants are uncached and dropped ones stop being walked - compute_watched_file_hashes() hashes dependency/build files (pyproject.toml, setup.cfg/py, requirements*.txt, lockfiles) plus user globs from the new cache_invalidation_files config. The on_dependency_change config ("warn" | "rerun" | "ignore", default "warn") controls whether a change warns or resets all results. - Fingerprints persist in mutmut-stats.json with pop-with-default, so old caches load and a missing fingerprint triggers no invalidation. --- README.rst | 43 +++++++ src/mutmut/__main__.py | 139 +++++++++++++++++++++- src/mutmut/configuration.py | 28 +++++ src/mutmut/state.py | 5 + tests/mutation/test_mutation.py | 202 ++++++++++++++++++++++++++++++++ tests/test_configuration.py | 2 + 6 files changed, 414 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 42896a19..7f9c4f03 100644 --- a/README.rst +++ b/README.rst @@ -401,6 +401,49 @@ You can add and override pytest arguments: also_copy = ["mutmut_pytest.ini"] +Detecting dependency and config changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Between runs, mutmut only re-tests mutants in functions whose source changed. +Changes outside your Python source — a dependency upgrade, a data file, a +config file — cannot be tied to a function, so they would otherwise be missed +and you would get cached results that no longer reflect reality. + +To catch this, mutmut hashes a set of build and dependency files and warns you +when any of them change since the last run. By default it watches: + +- `pyproject.toml` +- `setup.cfg` +- `setup.py` +- `requirements*.txt` +- `poetry.lock` +- `uv.lock` +- `Pipfile` +- `Pipfile.lock` + +You can watch additional files (for example data files your tests depend on) +with the `cache_invalidation_files` config, which accepts glob patterns +resolved against the project root: + +.. code-block:: toml + + cache_invalidation_files = [ "queries/*.sql", "config/*.yaml" ] + +When a watched file changes, `on_dependency_change` controls what happens: + +- `warn` (default): list the changed files and keep the cache. +- `rerun`: re-test all mutants. +- `ignore`: do nothing. + +.. code-block:: toml + + on_dependency_change = "warn" + +Changes to mutmut's own result-affecting config (such as `pytest_add_cli_args`, +`type_check_command`, or the timeout settings) are always detected and +invalidate the affected cached results automatically. + + Unstable configs ~~~~~~~~~~~~~~~~ diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py index 862acef9..8f98480b 100644 --- a/src/mutmut/__main__.py +++ b/src/mutmut/__main__.py @@ -22,6 +22,7 @@ import ast import fnmatch import gc +import hashlib import inspect import itertools import json @@ -62,6 +63,7 @@ from mutmut.code_coverage import get_covered_lines_for_file from mutmut.configuration import Config from mutmut.mutation.data import SourceFileMutationData +from mutmut.mutation.file_mutation import FailedTypeCheckMutant from mutmut.mutation.file_mutation import filter_mutants_with_type_checker from mutmut.mutation.file_mutation import mutate_file_contents from mutmut.mutation.trampoline_templates import CLASS_NAME_SEPARATOR @@ -819,10 +821,130 @@ def _invalidate_stale_dependency_edges() -> set[str]: return changed_functions -def collect_or_load_stats(runner: TestRunner, invalidate_stale_callers: bool = True) -> None: +# Dependency / build files whose changes the per-function source hashes cannot see. +# Globs are resolved against the project root; missing files are skipped. Users can +# extend this via the ``cache_invalidation_files`` config. +_DEFAULT_WATCHED_FILES = ( + "pyproject.toml", + "setup.cfg", + "setup.py", + "requirements*.txt", + "poetry.lock", + "uv.lock", + "Pipfile", + "Pipfile.lock", +) + + +def compute_watched_file_hashes() -> dict[str, str]: + """Map watched-file path -> content hash for the default set plus user globs.""" + patterns = list(_DEFAULT_WATCHED_FILES) + list(Config.get().cache_invalidation_files) + hashes: dict[str, str] = {} + for pattern in patterns: + for path in sorted(Path(".").glob(pattern)): + if path.is_file(): + hashes[str(path)] = hashlib.sha256(path.read_bytes()).hexdigest()[:12] + return hashes + + +def _reset_mutant_results(should_reset: Callable[[str, int], bool]) -> int: + """Reset cached verdicts to ``None`` (forcing a re-test) where ``should_reset`` holds. + + ``should_reset`` only sees already-decided mutants (``exit_code`` is not ``None``). + """ + count = 0 + for path in walk_mutatable_files(): + meta_path = Path("mutants") / (str(path) + ".meta") + if not meta_path.exists(): + continue + m = SourceFileMutationData(path=path) + m.load() + dirty = False + for key, exit_code in list(m.exit_code_by_key.items()): + if exit_code is not None and should_reset(key, exit_code): + m.exit_code_by_key[key] = None + dirty = True + count += 1 + if dirty: + m.save() + return count + + +def _report_watched_file_changes() -> bool: + """Surface changes to watched config/dependency files. + + Returns True only when the configured policy is ``rerun`` and something changed, + asking the caller to reset all results. Silent when no prior hashes exist. + """ + old = state().old_watched_file_hashes + if not old: + return False + new = compute_watched_file_hashes() + changed = sorted(p for p in old.keys() | new.keys() if old.get(p) != new.get(p)) + if not changed: + return False + + policy = Config.get().on_dependency_change + if policy == "ignore": + return False + if policy == "rerun": + print(f" {len(changed)} watched file(s) changed; rerunning all mutants: {', '.join(changed)}") + return True + # default: warn but keep the cache + print(f" Warning: {len(changed)} watched file(s) changed since the last run: {', '.join(changed)}") + print(" These cannot be tracked for behavioral changes, so cached results were kept.") + print(' If the changes affect your tests, delete the mutants/ directory or set on_dependency_change = "rerun".') + return False + + +def _apply_config_change_invalidation(mutants_caught_by_type_checker: dict[str, object]) -> bool: + """Reset only the cached verdicts a config / dependency change could have invalidated. + + Returns True if a full stats recollection is required (a global pytest config change + or an opt-in dependency rerun), in which case all results have already been reset. + """ + old_fp = state().old_config_fingerprint + new_fp = Config.get().config_fingerprint() + changed_groups = {g for g in new_fp if old_fp.get(g) != new_fp[g]} if old_fp else set() + + dependency_rerun = _report_watched_file_changes() + + # Global groups change how *every* test runs / which tests map to a function, so no + # subset of results is safe to keep -> full reset and full stats recollection. + if changed_groups & {"test_execution", "test_selection"} or dependency_rerun: + _reset_mutant_results(lambda key, exit_code: True) + mutmut.duration_by_test.clear() + mutmut.tests_by_mangled_function_name.clear() + state().function_dependencies.clear() + return True + + # Timeout config only reclassifies timeouts; keep every other verdict. + if "timeout" in changed_groups: + _reset_mutant_results(lambda key, exit_code: status_by_exit_code[exit_code] == "timeout") + + # The type-check pre-filter runs fresh every run; only verdicts whose type-check + # status flips are stale -> reset the symmetric difference of old (==37) and new. + if "type_check" in changed_groups: + caught = set(mutants_caught_by_type_checker) + _reset_mutant_results(lambda key, exit_code: (exit_code == 37) != (key in caught)) + + return False + + +def collect_or_load_stats( + runner: TestRunner, + *, + mutants_caught_by_type_checker: dict[str, Any] | None = None, + apply_config_invalidation: bool = False, + invalidate_stale_callers: bool = True, +) -> None: did_load = load_stats() - if not did_load: + force_full = False + if did_load and apply_config_invalidation: + force_full = _apply_config_change_invalidation(mutants_caught_by_type_checker or {}) + + if not did_load or force_full: # Run full stats run_stats_collection(runner) else: @@ -862,6 +984,8 @@ def load_stats() -> bool: state().old_function_hashes = data.pop("function_hashes", {}) for k, v in data.pop("function_dependencies", {}).items(): state().function_dependencies[k] = set(v) + state().old_config_fingerprint = data.pop("config_fingerprint", {}) + state().old_watched_file_hashes = data.pop("watched_file_hashes", {}) assert not data, data did_load = True except (FileNotFoundError, JSONDecodeError): @@ -878,6 +1002,8 @@ def save_stats() -> None: stats_time=mutmut.stats_time, function_hashes=state().current_function_hashes, function_dependencies={k: list(v) for k, v in state().function_dependencies.items()}, + config_fingerprint=Config.get().config_fingerprint(), + watched_file_hashes=compute_watched_file_hashes(), ), f, indent=4, @@ -1101,11 +1227,10 @@ def _run(mutant_names: tuple[str, ...] | list[str], max_children: int | None) -> f" done in {round(time.total_seconds() * 1000)}ms ({stats.mutated} files mutated, {stats.ignored} ignored, {stats.unmodified} unmodified)", ) + mutants_caught_by_type_checker: dict[str, FailedTypeCheckMutant] = {} if Config.get().type_check_command: with CatchOutput(spinner_title="Filtering mutations with type checker"): mutants_caught_by_type_checker = filter_mutants_with_type_checker() - else: - mutants_caught_by_type_checker = {} # TODO: config/option for runner # runner = HammettRunner() @@ -1114,7 +1239,11 @@ def _run(mutant_names: tuple[str, ...] | list[str], max_children: int | None) -> # TODO: run these steps only if we have mutants to test - collect_or_load_stats(runner) + collect_or_load_stats( + runner, + mutants_caught_by_type_checker=mutants_caught_by_type_checker, + apply_config_invalidation=True, + ) mutants, source_file_mutation_data_by_path = collect_source_file_mutation_data(mutant_names=mutant_names) diff --git a/src/mutmut/configuration.py b/src/mutmut/configuration.py index 9145d4f8..98fce390 100644 --- a/src/mutmut/configuration.py +++ b/src/mutmut/configuration.py @@ -1,6 +1,7 @@ from __future__ import annotations import fnmatch +import hashlib import os import platform import sys @@ -144,6 +145,8 @@ def _load_config() -> Config: ), # False on Mac, true otherwise as default (https://github.com/boxed/mutmut/pull/450#issuecomment-4002571055) track_dependencies=s("track_dependencies", True), dependency_tracking_depth=s("dependency_tracking_depth", None), + cache_invalidation_files=s("cache_invalidation_files", []), + on_dependency_change=s("on_dependency_change", "warn"), ) @@ -168,6 +171,31 @@ class Config: use_setproctitle: bool track_dependencies: bool dependency_tracking_depth: int | None + cache_invalidation_files: list[str] + on_dependency_change: str + + def config_fingerprint(self) -> dict[str, str]: + """Hash the config fields that can change cached mutant *results*, grouped so the + caller can invalidate only the verdict classes each group can affect. + + Fields that only change *which* mutants exist (source_paths, only_mutate, etc.) + are deliberately excluded: new mutants are born uncached and dropped ones simply + stop being walked, so they need no result invalidation. + """ + + def _hash(value: object) -> str: + return hashlib.sha256(repr(value).encode()).hexdigest()[:12] + + return { + # global pytest behaviour: a change can flip any verdict + "test_execution": _hash(tuple(self.pytest_add_cli_args)), + # which tests cover which function: a change reshapes the stats mapping + "test_selection": _hash(tuple(self.pytest_add_cli_args_test_selection)), + # only reclassifies timeouts + "timeout": _hash((self.timeout_multiplier, self.timeout_constant)), + # only changes the type-check pre-filter + "type_check": _hash(tuple(self.type_check_command)), + } def should_mutate(self, path: Path | str) -> bool: return self._should_include_for_mutation(path) and not self._should_ignore_for_mutation(path) diff --git a/src/mutmut/state.py b/src/mutmut/state.py index 6a774e31..c53a3ec4 100644 --- a/src/mutmut/state.py +++ b/src/mutmut/state.py @@ -8,6 +8,11 @@ class MutmutState: old_function_hashes: dict[str, str] = field(default_factory=dict) current_function_hashes: dict[str, str] = field(default_factory=dict) function_dependencies: defaultdict[str, set[str]] = field(default_factory=lambda: defaultdict(set)) + # Fingerprints loaded from the previous run, used to detect config / dependency + # changes the per-function source hashes cannot see. Empty when absent (pre-upgrade + # cache or first run), in which case no invalidation is triggered. + old_config_fingerprint: dict[str, str] = field(default_factory=dict) + old_watched_file_hashes: dict[str, str] = field(default_factory=dict) _state: MutmutState | None = None diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py index 7aa18ce1..9be27484 100644 --- a/tests/mutation/test_mutation.py +++ b/tests/mutation/test_mutation.py @@ -11,8 +11,12 @@ import mutmut from mutmut.__main__ import CatchOutput from mutmut.__main__ import MutmutProgrammaticFailException +from mutmut.__main__ import _apply_config_change_invalidation from mutmut.__main__ import _cleanup_stale_stats from mutmut.__main__ import _invalidate_stale_dependency_edges +from mutmut.__main__ import _report_watched_file_changes +from mutmut.__main__ import _reset_mutant_results +from mutmut.__main__ import compute_watched_file_hashes from mutmut.__main__ import get_diff_for_mutant from mutmut.__main__ import mangled_name_from_mutant_name from mutmut.__main__ import orig_function_and_class_names_from_key @@ -1293,3 +1297,201 @@ def test_invalidate_stale_dependency_edges_no_old_hashes_returns_empty(): assert changed == set() reset_state() + + +# --- config / dependency change invalidation tests (Tier 1 & 2) --- + + +def _config_for_invalidation(**overrides): + base = dict( + also_copy=[], + only_mutate=[], + do_not_mutate=[], + do_not_mutate_patterns=[], + max_stack_depth=-1, + debug=False, + source_paths=[pathlib.Path("src")], + pytest_add_cli_args=[], + pytest_add_cli_args_test_selection=[], + mutate_only_covered_lines=False, + timeout_multiplier=15.0, + timeout_constant=1.0, + type_check_command=[], + use_setproctitle=False, + track_dependencies=True, + dependency_tracking_depth=None, + cache_invalidation_files=[], + on_dependency_change="warn", + ) + base.update(overrides) + return Config(**base) + + +def _write_meta(exit_code_by_key, src_rel="src/mymod.py"): + """Create a source file under a mutatable source dir plus its .meta, return the path.""" + src = pathlib.Path(src_rel) + src.parent.mkdir(parents=True, exist_ok=True) + src.write_text("def foo():\n return 1\n") + m = SourceFileMutationData(path=src) + m.exit_code_by_key = dict(exit_code_by_key) + m.meta_path.parent.mkdir(parents=True, exist_ok=True) + m.save() + return src + + +def _load_results(src_rel="src/mymod.py"): + m = SourceFileMutationData(path=pathlib.Path(src_rel)) + m.load() + return m.exit_code_by_key + + +def test_reset_mutant_results_resets_only_matching(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation()) + _write_meta({"a": 36, "b": 0, "c": None}) # timeout, survived, uncached + + reset = _reset_mutant_results(lambda key, exit_code: exit_code == 36) + + assert reset == 1 + results = _load_results() + assert results == {"a": None, "b": 0, "c": None} + + +def test_timeout_config_change_resets_only_timeouts(tmp_path, monkeypatch): + """Changing timeout config invalidates timeout verdicts but keeps killed/survived.""" + reset_state() + monkeypatch.chdir(tmp_path) + old_cfg = _config_for_invalidation() + state().old_config_fingerprint = old_cfg.config_fingerprint() + + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(timeout_multiplier=30.0)) + _write_meta({"timed_out": 36, "killed": 1, "survived": 0}) + + force_full = _apply_config_change_invalidation({}) + + assert force_full is False + assert _load_results() == {"timed_out": None, "killed": 1, "survived": 0} + reset_state() + + +def test_type_check_config_change_resets_symmetric_difference(tmp_path, monkeypatch): + """A type-check command change re-tests verdicts whose type-check status flips.""" + reset_state() + monkeypatch.chdir(tmp_path) + old_cfg = _config_for_invalidation(type_check_command=["old"]) + state().old_config_fingerprint = old_cfg.config_fingerprint() + + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(type_check_command=["new"])) + # was_caught: cached 37 but no longer caught -> reset; now_caught: survived but newly caught -> reset; + # still_caught: 37 and still caught -> keep; untouched: survived and not caught -> keep + _write_meta({"was_caught": 37, "now_caught": 0, "still_caught": 37, "untouched": 0}) + + force_full = _apply_config_change_invalidation({"now_caught": object(), "still_caught": object()}) + + assert force_full is False + assert _load_results() == { + "was_caught": None, + "now_caught": None, + "still_caught": 37, + "untouched": 0, + } + reset_state() + + +def test_global_pytest_change_forces_full_rerun(tmp_path, monkeypatch): + """A pytest-arg change resets all results and requests full stats recollection.""" + reset_state() + monkeypatch.chdir(tmp_path) + state().old_config_fingerprint = _config_for_invalidation().config_fingerprint() + mutmut.duration_by_test["test_x"] = 1.0 + mutmut.tests_by_mangled_function_name["mod.x_foo"] = {"test_x"} + + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(pytest_add_cli_args=["-x"])) + _write_meta({"a": 1, "b": 0, "c": 36}) + + force_full = _apply_config_change_invalidation({}) + + assert force_full is True + assert all(v is None for v in _load_results().values()) + assert not mutmut.duration_by_test + assert not mutmut.tests_by_mangled_function_name + reset_state() + + +def test_no_config_change_keeps_all_results(tmp_path, monkeypatch): + reset_state() + monkeypatch.chdir(tmp_path) + cfg = _config_for_invalidation() + state().old_config_fingerprint = cfg.config_fingerprint() + monkeypatch.setattr(Config, "get", lambda: cfg) + _write_meta({"a": 1, "b": 0, "c": 36}) + + force_full = _apply_config_change_invalidation({}) + + assert force_full is False + assert _load_results() == {"a": 1, "b": 0, "c": 36} + reset_state() + + +def test_absent_fingerprint_is_silent(tmp_path, monkeypatch): + """A pre-upgrade cache (no stored fingerprint) triggers no invalidation.""" + reset_state() + monkeypatch.chdir(tmp_path) + # old_config_fingerprint left empty + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(pytest_add_cli_args=["-x"])) + _write_meta({"a": 1, "b": 0}) + + force_full = _apply_config_change_invalidation({}) + + assert force_full is False + assert _load_results() == {"a": 1, "b": 0} + reset_state() + + +def test_watched_file_change_warn_keeps_cache(tmp_path, monkeypatch, capsys): + reset_state() + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation()) + pathlib.Path("pyproject.toml").write_text("[project]\nname='x'\n") + state().old_watched_file_hashes = {"pyproject.toml": "deadbeef0000"} + + rerun = _report_watched_file_changes() + + assert rerun is False + assert "pyproject.toml" in capsys.readouterr().out + reset_state() + + +def test_watched_file_change_rerun_policy(tmp_path, monkeypatch): + reset_state() + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(on_dependency_change="rerun")) + pathlib.Path("uv.lock").write_text("changed") + state().old_watched_file_hashes = {"uv.lock": "deadbeef0000"} + + assert _report_watched_file_changes() is True + reset_state() + + +def test_watched_file_absent_old_hashes_is_silent(tmp_path, monkeypatch, capsys): + reset_state() + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation()) + pathlib.Path("pyproject.toml").write_text("[project]\nname='x'\n") + # old_watched_file_hashes left empty + + assert _report_watched_file_changes() is False + assert capsys.readouterr().out == "" + reset_state() + + +def test_compute_watched_file_hashes_includes_user_globs(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(cache_invalidation_files=["*.sql"])) + pathlib.Path("pyproject.toml").write_text("x") + pathlib.Path("query.sql").write_text("select 1") + + hashes = compute_watched_file_hashes() + + assert "pyproject.toml" in hashes + assert "query.sql" in hashes diff --git a/tests/test_configuration.py b/tests/test_configuration.py index bb9feb5c..469d2f47 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -73,6 +73,8 @@ def _get_config(only_mutate: list[str], do_not_mutate: list[str]) -> Config: use_setproctitle=False, track_dependencies=True, dependency_tracking_depth=None, + cache_invalidation_files=[], + on_dependency_change="warn", ) def test_ignores_non_python_files(self): From d81c127cfb2c950749c92500eb5c57b173302b2b Mon Sep 17 00:00:00 2001 From: nicklafleur <55208706+nicklafleur@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:49:35 -0400 Subject: [PATCH 5/8] feat: use git to detect non-Python dependency file changes Replace the fixed watched-file list with git-based change detection. mutmut now uses `git diff`/`git ls-files` to find every non-.py file changed since the last full run, falling back to the curated list when git is unavailable. A default exclude set (*.md, *.rst, docs/, LICENSE, etc.) drops files that never affect tests; users can extend it with `cache_invalidation_exclude`. The git commit and file hashes are persisted together as a baseline so a later git-less environment (e.g. a separate CI stage) can still detect changes to previously-tracked files by re-hashing them. New options: `use_git_change_detection` (default true) and `cache_invalidation_exclude`. --- README.rst | 34 +++++- src/mutmut/__main__.py | 182 +++++++++++++++++++++++++--- src/mutmut/configuration.py | 4 + src/mutmut/state.py | 6 + tests/mutation/test_mutation.py | 205 ++++++++++++++++++++++++++++++++ tests/test_configuration.py | 2 + 6 files changed, 415 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index 7f9c4f03..c0f76865 100644 --- a/README.rst +++ b/README.rst @@ -409,8 +409,19 @@ Changes outside your Python source — a dependency upgrade, a data file, a config file — cannot be tied to a function, so they would otherwise be missed and you would get cached results that no longer reflect reality. -To catch this, mutmut hashes a set of build and dependency files and warns you -when any of them change since the last run. By default it watches: +To catch this, mutmut detects non-Python files that changed since the last full +run and warns you about them. If your project is a git repository and git is +installed, mutmut uses git (a soft dependency no extra package is required) to +find every changed non-Python file, respecting your `.gitignore`. Python files +are excluded because their changes are already tracked per function. + +On a full run with git available, mutmut also records the content hashes of the +tracked non-Python files. This means a later run in an environment without git +(for example a different CI stage) can still detect changes to that known set of +files, even though it cannot discover brand-new ones. + +When git is unavailable, mutmut falls back to hashing a curated set of build and +dependency files: - `pyproject.toml` - `setup.cfg` @@ -423,12 +434,22 @@ when any of them change since the last run. By default it watches: You can watch additional files (for example data files your tests depend on) with the `cache_invalidation_files` config, which accepts glob patterns -resolved against the project root: +resolved against the project root. These are checked even when git ignores them, +and are never dropped by the exclusions below: .. code-block:: toml cache_invalidation_files = [ "queries/*.sql", "config/*.yaml" ] +Git detection reports every changed non-Python file, so mutmut drops files that +practically never affect tests (markdown, `LICENSE`, `CHANGELOG`, `docs/`, git +and editor metadata, ...). Exclude additional noisy files with +`cache_invalidation_exclude` (glob patterns, `*` spans directories): + +.. code-block:: toml + + cache_invalidation_exclude = [ "*.json", "fixtures/snapshots/*" ] + When a watched file changes, `on_dependency_change` controls what happens: - `warn` (default): list the changed files and keep the cache. @@ -439,6 +460,13 @@ When a watched file changes, `on_dependency_change` controls what happens: on_dependency_change = "warn" +Git detection is on by default; disable it (forcing the curated-list fallback) +with: + +.. code-block:: toml + + use_git_change_detection = false + Changes to mutmut's own result-affecting config (such as `pytest_add_cli_args`, `type_check_command`, or the timeout settings) are always detected and invalidate the affected cached results automatically. diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py index 8f98480b..01afe7fe 100644 --- a/src/mutmut/__main__.py +++ b/src/mutmut/__main__.py @@ -835,18 +835,166 @@ def _invalidate_stale_dependency_edges() -> set[str]: "Pipfile.lock", ) +# Files that practically never affect test behavior. Git change detection otherwise +# surfaces every non-.py file in the repo, so these are dropped to cut the noise. +# Users extend this via the ``cache_invalidation_exclude`` config; anything they +# explicitly register in ``cache_invalidation_files`` is never excluded. Patterns are +# matched with fnmatch (``*`` spans path separators). +_DEFAULT_INVALIDATION_EXCLUDE = ( + "*.md", + "*.rst", + "LICENSE*", + "COPYING*", + "NOTICE*", + "AUTHORS*", + "CHANGELOG*", + "CHANGES*", + ".gitignore", + ".gitattributes", + ".editorconfig", + ".pre-commit-config.yaml", + "docs/*", + "doc/*", +) + + +def _hash_files(paths: Iterable[str]) -> dict[str, str]: + """Content hash each existing path; missing files are simply omitted.""" + hashes: dict[str, str] = {} + for p in paths: + path = Path(p) + if path.is_file(): + hashes[p] = hashlib.sha256(path.read_bytes()).hexdigest()[:12] + return hashes + def compute_watched_file_hashes() -> dict[str, str]: """Map watched-file path -> content hash for the default set plus user globs.""" patterns = list(_DEFAULT_WATCHED_FILES) + list(Config.get().cache_invalidation_files) - hashes: dict[str, str] = {} - for pattern in patterns: - for path in sorted(Path(".").glob(pattern)): - if path.is_file(): - hashes[str(path)] = hashlib.sha256(path.read_bytes()).hexdigest()[:12] + paths = [str(path) for pattern in patterns for path in sorted(Path(".").glob(pattern))] + return _hash_files(paths) + + +def _run_git(args: list[str]) -> str | None: + """Run a git command at the project root. Returns stdout, or None on any failure + (git not installed, not a repo, unknown ref, ...). Git is a soft dependency: this + never raises so callers can silently fall back to content hashing. + """ + try: + result = subprocess.run(["git", *args], capture_output=True, text=True, check=False) + except OSError: + return None + if result.returncode != 0: + return None + return result.stdout + + +def git_head() -> str | None: + """The current HEAD commit, or None when git / a repo / a commit is unavailable.""" + out = _run_git(["rev-parse", "HEAD"]) + return out.strip() if out else None + + +def git_changed_non_py_files(since_ref: str) -> set[str] | None: + """Non-.py files changed since ``since_ref`` (tracked diffs against the working tree, + including uncommitted edits, plus new untracked files). ``.py`` files are excluded + because the per-function hashes already track them. Returns None if git cannot answer. + """ + diff = _run_git(["diff", "--name-only", since_ref, "--"]) + if diff is None: + return None + untracked = _run_git(["ls-files", "--others", "--exclude-standard"]) or "" + files = {line for line in (diff + "\n" + untracked).splitlines() if line} + return {f for f in files if not f.endswith(".py")} + + +def git_tracked_non_py_files() -> set[str] | None: + """Every non-.py file git knows about (tracked + untracked-not-ignored), or None if + git cannot answer. Recorded on a full run so a later git-less run can still detect + changes to these files by re-hashing them. + """ + out = _run_git(["ls-files", "--cached", "--others", "--exclude-standard"]) + if out is None: + return None + return {line for line in out.splitlines() if line and not line.endswith(".py")} + + +def _changed_hashed_files(restrict_to: list[str] | None = None) -> set[str]: + """Baseline files whose content changed, by re-hashing them now. + + Re-hashes every path in the stored baseline (which, after a full run with git, is + the comprehensive set of non-.py files) plus any newly-appearing curated/user-glob + files. This is how a git-less run still detects changes to files git discovered. + ``restrict_to`` limits the result to paths matching those glob patterns. + """ + old = state().old_watched_file_hashes + if not old: + return set() + new = _hash_files(old.keys()) + new.update(compute_watched_file_hashes()) # pick up newly-added curated/user files + changed = {p for p in old.keys() | new.keys() if old.get(p) != new.get(p)} + if restrict_to is not None: + changed = {p for p in changed if any(fnmatch.fnmatch(p, pat) for pat in restrict_to)} + return changed + + +def _is_excluded(path: str, config: Config) -> bool: + """Whether ``path`` should be dropped from change reporting as noise. + + Files explicitly registered in ``cache_invalidation_files`` are never excluded. + """ + if any(fnmatch.fnmatch(path, pat) for pat in config.cache_invalidation_files): + return False + patterns = list(_DEFAULT_INVALIDATION_EXCLUDE) + list(config.cache_invalidation_exclude) + return any(fnmatch.fnmatch(path, pat) for pat in patterns) + + +def _changed_dependency_files() -> set[str]: + """Files changed since the last full run that the per-function hashes cannot track. + + Prefers git (catches every non-.py file in the repo and respects .gitignore) and + falls back to hashing a curated set of build/dependency files when git is + unavailable. Silent on the first run (no baseline to compare against). Noisy files + (see ``_DEFAULT_INVALIDATION_EXCLUDE`` and ``cache_invalidation_exclude``) are dropped. + """ + config = Config.get() + old_commit = state().old_git_commit + if config.use_git_change_detection and old_commit is not None: + git_changed = git_changed_non_py_files(old_commit) + if git_changed is not None: + # also catch explicitly-registered files that git ignores + changed = git_changed | _changed_hashed_files(restrict_to=config.cache_invalidation_files) + else: + changed = _changed_hashed_files() + else: + changed = _changed_hashed_files() + return {p for p in changed if not _is_excluded(p, config)} + + +def _compute_baseline_file_hashes() -> dict[str, str]: + """The set of non-.py files to track, hashed. Always includes the curated/user-glob + files; when git is available it also records every tracked non-.py file (minus noise) + so a later git-less run can still detect changes to them. + """ + config = Config.get() + hashes = compute_watched_file_hashes() + if config.use_git_change_detection: + tracked = git_tracked_non_py_files() + if tracked is not None: + hashes.update(_hash_files(sorted(p for p in tracked if not _is_excluded(p, config)))) return hashes +def _refresh_change_detection_baseline() -> None: + """Snapshot the current git commit and tracked-file hashes as the new baseline. + + Only called on a full run; cached runs keep the previous baseline so a ``warn`` + keeps firing until the cache is actually rebuilt. + """ + state().git_commit = git_head() + state().watched_file_hashes = _compute_baseline_file_hashes() + + def _reset_mutant_results(should_reset: Callable[[str, int], bool]) -> int: """Reset cached verdicts to ``None`` (forcing a re-test) where ``should_reset`` holds. @@ -871,27 +1019,24 @@ def _reset_mutant_results(should_reset: Callable[[str, int], bool]) -> int: def _report_watched_file_changes() -> bool: - """Surface changes to watched config/dependency files. + """Surface non-Python files that changed since the last full run. Returns True only when the configured policy is ``rerun`` and something changed, - asking the caller to reset all results. Silent when no prior hashes exist. + asking the caller to reset all results. Silent when there is no baseline yet. """ - old = state().old_watched_file_hashes - if not old: - return False - new = compute_watched_file_hashes() - changed = sorted(p for p in old.keys() | new.keys() if old.get(p) != new.get(p)) + changed = _changed_dependency_files() if not changed: return False policy = Config.get().on_dependency_change if policy == "ignore": return False + listed = sorted(changed) if policy == "rerun": - print(f" {len(changed)} watched file(s) changed; rerunning all mutants: {', '.join(changed)}") + print(f" {len(listed)} non-Python file(s) changed; rerunning all mutants: {', '.join(listed)}") return True # default: warn but keep the cache - print(f" Warning: {len(changed)} watched file(s) changed since the last run: {', '.join(changed)}") + print(f" Warning: {len(listed)} non-Python file(s) changed since the last full run: {', '.join(listed)}") print(" These cannot be tracked for behavioral changes, so cached results were kept.") print(' If the changes affect your tests, delete the mutants/ directory or set on_dependency_change = "rerun".') return False @@ -945,6 +1090,8 @@ def collect_or_load_stats( force_full = _apply_config_change_invalidation(mutants_caught_by_type_checker or {}) if not did_load or force_full: + # A full run rebuilds the cache, so reset the change-detection baseline to "now". + _refresh_change_detection_baseline() # Run full stats run_stats_collection(runner) else: @@ -986,6 +1133,10 @@ def load_stats() -> bool: state().function_dependencies[k] = set(v) state().old_config_fingerprint = data.pop("config_fingerprint", {}) state().old_watched_file_hashes = data.pop("watched_file_hashes", {}) + state().old_git_commit = data.pop("git_commit", None) + # Preserve the loaded baseline; only a full run refreshes it. + state().watched_file_hashes = state().old_watched_file_hashes + state().git_commit = state().old_git_commit assert not data, data did_load = True except (FileNotFoundError, JSONDecodeError): @@ -1003,7 +1154,8 @@ def save_stats() -> None: function_hashes=state().current_function_hashes, function_dependencies={k: list(v) for k, v in state().function_dependencies.items()}, config_fingerprint=Config.get().config_fingerprint(), - watched_file_hashes=compute_watched_file_hashes(), + watched_file_hashes=state().watched_file_hashes, + git_commit=state().git_commit, ), f, indent=4, diff --git a/src/mutmut/configuration.py b/src/mutmut/configuration.py index 98fce390..24b275a6 100644 --- a/src/mutmut/configuration.py +++ b/src/mutmut/configuration.py @@ -146,7 +146,9 @@ def _load_config() -> Config: track_dependencies=s("track_dependencies", True), dependency_tracking_depth=s("dependency_tracking_depth", None), cache_invalidation_files=s("cache_invalidation_files", []), + cache_invalidation_exclude=s("cache_invalidation_exclude", []), on_dependency_change=s("on_dependency_change", "warn"), + use_git_change_detection=s("use_git_change_detection", True), ) @@ -172,7 +174,9 @@ class Config: track_dependencies: bool dependency_tracking_depth: int | None cache_invalidation_files: list[str] + cache_invalidation_exclude: list[str] on_dependency_change: str + use_git_change_detection: bool def config_fingerprint(self) -> dict[str, str]: """Hash the config fields that can change cached mutant *results*, grouped so the diff --git a/src/mutmut/state.py b/src/mutmut/state.py index c53a3ec4..c1020898 100644 --- a/src/mutmut/state.py +++ b/src/mutmut/state.py @@ -12,7 +12,13 @@ class MutmutState: # changes the per-function source hashes cannot see. Empty when absent (pre-upgrade # cache or first run), in which case no invalidation is triggered. old_config_fingerprint: dict[str, str] = field(default_factory=dict) + # Change-detection baselines describe the state at the *last full run*. The ``old_`` + # values are what we compare against; the others are what gets persisted (only + # refreshed on a full run, so a ``warn`` keeps firing until the cache is rebuilt). old_watched_file_hashes: dict[str, str] = field(default_factory=dict) + watched_file_hashes: dict[str, str] = field(default_factory=dict) + old_git_commit: str | None = None + git_commit: str | None = None _state: MutmutState | None = None diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py index 9be27484..d7a4d3ac 100644 --- a/tests/mutation/test_mutation.py +++ b/tests/mutation/test_mutation.py @@ -1,5 +1,7 @@ import os import pathlib +import shutil +import subprocess import tempfile from collections import defaultdict from unittest.mock import Mock @@ -12,12 +14,17 @@ from mutmut.__main__ import CatchOutput from mutmut.__main__ import MutmutProgrammaticFailException from mutmut.__main__ import _apply_config_change_invalidation +from mutmut.__main__ import _changed_dependency_files from mutmut.__main__ import _cleanup_stale_stats from mutmut.__main__ import _invalidate_stale_dependency_edges +from mutmut.__main__ import _refresh_change_detection_baseline from mutmut.__main__ import _report_watched_file_changes from mutmut.__main__ import _reset_mutant_results from mutmut.__main__ import compute_watched_file_hashes from mutmut.__main__ import get_diff_for_mutant +from mutmut.__main__ import git_changed_non_py_files +from mutmut.__main__ import git_head +from mutmut.__main__ import git_tracked_non_py_files from mutmut.__main__ import mangled_name_from_mutant_name from mutmut.__main__ import orig_function_and_class_names_from_key from mutmut.__main__ import record_trampoline_hit @@ -1321,7 +1328,9 @@ def _config_for_invalidation(**overrides): track_dependencies=True, dependency_tracking_depth=None, cache_invalidation_files=[], + cache_invalidation_exclude=[], on_dependency_change="warn", + use_git_change_detection=True, ) base.update(overrides) return Config(**base) @@ -1495,3 +1504,199 @@ def test_compute_watched_file_hashes_includes_user_globs(tmp_path, monkeypatch): assert "pyproject.toml" in hashes assert "query.sql" in hashes + + +# --- git-based change detection (soft dependency) --- + +_GIT = shutil.which("git") +requires_git = pytest.mark.skipif(_GIT is None, reason="git not installed") + + +def _git(args, cwd): + subprocess.run([_GIT, *args], cwd=cwd, check=True, capture_output=True, text=True) + + +def _init_repo(path): + _git(["init"], path) + _git(["config", "user.email", "t@example.com"], path) + _git(["config", "user.name", "Test"], path) + _git(["config", "commit.gpgsign", "false"], path) + + +def _commit_all(path, message="commit"): + _git(["add", "-A"], path) + _git(["commit", "-m", message], path) + + +@requires_git +def test_git_head_returns_commit(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "a.txt").write_text("1") + _commit_all(tmp_path) + + head = git_head() + + assert head and len(head) == 40 + + +def test_git_head_none_outside_repo(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + assert git_head() is None + + +@requires_git +def test_git_changed_non_py_files_detects_and_excludes_python(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "conf.yml").write_text("a: 1") + (tmp_path / "mod.py").write_text("x = 1") + _commit_all(tmp_path) + base = git_head() + + (tmp_path / "conf.yml").write_text("a: 2") # tracked non-.py modified + (tmp_path / "mod.py").write_text("x = 2") # tracked .py modified (excluded) + (tmp_path / "data.sql").write_text("select 1") # new untracked non-.py + + assert git_changed_non_py_files(base) == {"conf.yml", "data.sql"} + + +@requires_git +def test_git_changed_non_py_files_bad_ref_returns_none(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "a.txt").write_text("1") + _commit_all(tmp_path) + + assert git_changed_non_py_files("deadbeef" * 5) is None + + +@requires_git +def test_changed_dependency_files_prefers_git_over_curated_list(tmp_path, monkeypatch): + """Git catches a non-.py file that is not in the curated watched list.""" + reset_state() + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "config.yaml").write_text("a: 1") + _commit_all(tmp_path) + state().old_git_commit = git_head() + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation()) + + (tmp_path / "config.yaml").write_text("a: 2") + + assert "config.yaml" in _changed_dependency_files() + reset_state() + + +@requires_git +def test_use_git_change_detection_false_falls_back_to_curated(tmp_path, monkeypatch): + reset_state() + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "config.yaml").write_text("a: 1") + _commit_all(tmp_path) + state().old_git_commit = git_head() + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(use_git_change_detection=False)) + + (tmp_path / "config.yaml").write_text("a: 2") + + # config.yaml is not in the curated list and git is disabled -> not reported + assert "config.yaml" not in _changed_dependency_files() + reset_state() + + +@requires_git +def test_default_exclude_drops_noisy_files(tmp_path, monkeypatch): + """Docs / markdown changes are dropped by the default exclude list.""" + reset_state() + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "README.md").write_text("hi") + (tmp_path / "config.yaml").write_text("a: 1") + _commit_all(tmp_path) + state().old_git_commit = git_head() + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation()) + + (tmp_path / "README.md").write_text("changed") + (tmp_path / "config.yaml").write_text("a: 2") + + changed = _changed_dependency_files() + assert "config.yaml" in changed + assert "README.md" not in changed + reset_state() + + +@requires_git +def test_user_exclude_pattern_drops_file(tmp_path, monkeypatch): + reset_state() + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "noisy.json").write_text("1") + _commit_all(tmp_path) + state().old_git_commit = git_head() + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(cache_invalidation_exclude=["*.json"])) + + (tmp_path / "noisy.json").write_text("2") + + assert "noisy.json" not in _changed_dependency_files() + reset_state() + + +@requires_git +def test_registered_file_is_immune_to_exclusion(tmp_path, monkeypatch): + """A file explicitly registered in cache_invalidation_files is never excluded.""" + reset_state() + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "notes.md").write_text("a") # *.md is excluded by default + _commit_all(tmp_path) + state().old_git_commit = git_head() + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(cache_invalidation_files=["notes.md"])) + + (tmp_path / "notes.md").write_text("b") + + assert "notes.md" in _changed_dependency_files() + reset_state() + + +@requires_git +def test_git_tracked_non_py_files_lists_tracked_and_excludes_python(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "config.yaml").write_text("a: 1") + (tmp_path / "mod.py").write_text("x = 1") + _commit_all(tmp_path) + (tmp_path / "data.sql").write_text("select 1") # untracked but not ignored + + tracked = git_tracked_non_py_files() + + assert "config.yaml" in tracked + assert "data.sql" in tracked + assert "mod.py" not in tracked + + +@requires_git +def test_baseline_records_git_files_for_gitless_fallback(tmp_path, monkeypatch): + """A full run with git records all tracked non-.py files, so a later run without + git can still detect changes to them by re-hashing.""" + reset_state() + monkeypatch.chdir(tmp_path) + _init_repo(tmp_path) + (tmp_path / "config.yaml").write_text("a: 1") + (tmp_path / "README.md").write_text("hi") # excluded by default + _commit_all(tmp_path) + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation()) + + _refresh_change_detection_baseline() + baseline = state().watched_file_hashes + assert "config.yaml" in baseline # recorded for the gitless fallback + assert "README.md" not in baseline # noise stays out of the baseline + + # simulate a later run in an environment without git + state().old_watched_file_hashes = baseline + state().old_git_commit = None + monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(use_git_change_detection=False)) + (tmp_path / "config.yaml").write_text("a: 2") + + assert "config.yaml" in _changed_dependency_files() + reset_state() diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 469d2f47..7a240911 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -74,7 +74,9 @@ def _get_config(only_mutate: list[str], do_not_mutate: list[str]) -> Config: track_dependencies=True, dependency_tracking_depth=None, cache_invalidation_files=[], + cache_invalidation_exclude=[], on_dependency_change="warn", + use_git_change_detection=True, ) def test_ignores_non_python_files(self): From 19f9dcd15820e24978cb5a6ea70c6e01edb01529 Mon Sep 17 00:00:00 2001 From: nicklafleur <55208706+nicklafleur@users.noreply.github.com> Date: Sat, 6 Jun 2026 11:05:12 -0400 Subject: [PATCH 6/8] HISTORY --- HISTORY.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 0c471f3d..d1a91a2f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,22 @@ Changelog --------- +Unreleased +~~~~~~~~~~ + +* Per-function source hashing for incremental cache invalidation — only re-test mutants in functions that changed + +* Cross-call dependency tracking — invalidate mutants in callers when a called function changes + +* Use git to detect non-Python dependency file changes; falls back to a curated file list when git is unavailable + +* Add `cache_invalidation_exclude` config to suppress noisy files from change detection + +* Add `use_git_change_detection` config (default true) to opt out of git-based detection + +* Invalidate cached results automatically when result-affecting config fields change + + 3.6.0 ~~~~~ From e18ec31486be069fad6d8b4c9240840711224cb2 Mon Sep 17 00:00:00 2001 From: nicklafleur <55208706+nicklafleur@users.noreply.github.com> Date: Sat, 6 Jun 2026 11:07:30 -0400 Subject: [PATCH 7/8] lock --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index b956dad4..d17d7fe5 100644 --- a/uv.lock +++ b/uv.lock @@ -375,7 +375,7 @@ wheels = [ [[package]] name = "mutmut" -version = "3.5.0" +version = "3.6.0" source = { editable = "." } dependencies = [ { name = "click" }, From f35583e17e9bade258f584add548c993325d9410 Mon Sep 17 00:00:00 2001 From: nicklafleur <55208706+nicklafleur@users.noreply.github.com> Date: Tue, 9 Jun 2026 15:56:43 -0400 Subject: [PATCH 8/8] fix: address three cache-correctness bugs from Copilot review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Return cached function hashes for mtime-skipped files so _cleanup_stale_stats and _invalidate_stale_dependency_edges don't treat unchanged files as deleted; use get_mutant_name at both call sites instead of inlining the path→module conversion - Change dependency_tracking_depth default from None to -1 so setup.cfg values are correctly coerced to int by the config loader; narrow type from int | None to int and drop the conditional in run_stats_collection - Fix benchmark fixture to yield unconditionally so it doesn't fail when BENCHMARK_TEST_DELAY=0 --- e2e_projects/benchmark_1k/tests/conftest.py | 2 +- src/mutmut/__main__.py | 16 +++++++++------- src/mutmut/configuration.py | 4 ++-- tests/mutation/test_mutation.py | 2 +- tests/test_configuration.py | 4 ++-- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/e2e_projects/benchmark_1k/tests/conftest.py b/e2e_projects/benchmark_1k/tests/conftest.py index acf81987..2b7a61f9 100644 --- a/e2e_projects/benchmark_1k/tests/conftest.py +++ b/e2e_projects/benchmark_1k/tests/conftest.py @@ -28,4 +28,4 @@ def benchmark_test_delay(): jittered = random.gauss(_test_delay, _test_delay * 0.1) # Clamp to 0.01s time.sleep(max(0.01, jittered)) - yield + yield diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py index 01afe7fe..9ade02e8 100644 --- a/src/mutmut/__main__.py +++ b/src/mutmut/__main__.py @@ -305,7 +305,12 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe # source_mtime == mutant_mtime: only copied, otherwise the mutant file is untouched # source_mtime < mutant_mtime: the mutations have been saved after copying; source file untouched if source_mtime < mutant_mtime: - return FileMutationResult(unmodified=True) + data = SourceFileMutationData(path=filename) + data.load() + return FileMutationResult( + unmodified=True, + current_hashes={get_mutant_name(filename, func): h for func, h in data.hash_by_function_name.items()}, + ) except OSError: pass @@ -347,11 +352,8 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe data.hash_by_function_name = hash_by_function_name data.save() - module_name = strip_prefix(str(filename)[: -len(filename.suffix)].replace(os.sep, "."), prefix="src.") - current_hashes_qualified = { - f"{module_name}.{func}".replace(".__init__.", "."): h for func, h in hash_by_function_name.items() - } - changed_functions_qualified = {f"{module_name}.{func}".replace(".__init__.", ".") for func in changed} + current_hashes_qualified = {get_mutant_name(filename, func): h for func, h in hash_by_function_name.items()} + changed_functions_qualified = {get_mutant_name(filename, func) for func in changed} return FileMutationResult( warnings=warnings, @@ -744,7 +746,7 @@ def run_stats_collection(runner: TestRunner, tests: Iterable[str] | None = None) os.environ["MUTANT_UNDER_TEST"] = "stats" os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1" depth = Config.get().dependency_tracking_depth - os.environ["MUTMUT_DEPENDENCY_DEPTH"] = str(depth) if depth is not None else "-1" + os.environ["MUTMUT_DEPENDENCY_DEPTH"] = str(depth) start_cpu_time = process_time() with CatchOutput(spinner_title="Running stats") as output_catcher: diff --git a/src/mutmut/configuration.py b/src/mutmut/configuration.py index 24b275a6..ec9ef8b7 100644 --- a/src/mutmut/configuration.py +++ b/src/mutmut/configuration.py @@ -144,7 +144,7 @@ def _load_config() -> Config: "use_setproctitle", not platform.system() == "Darwin" ), # False on Mac, true otherwise as default (https://github.com/boxed/mutmut/pull/450#issuecomment-4002571055) track_dependencies=s("track_dependencies", True), - dependency_tracking_depth=s("dependency_tracking_depth", None), + dependency_tracking_depth=s("dependency_tracking_depth", -1), cache_invalidation_files=s("cache_invalidation_files", []), cache_invalidation_exclude=s("cache_invalidation_exclude", []), on_dependency_change=s("on_dependency_change", "warn"), @@ -172,7 +172,7 @@ class Config: type_check_command: list[str] use_setproctitle: bool track_dependencies: bool - dependency_tracking_depth: int | None + dependency_tracking_depth: int cache_invalidation_files: list[str] cache_invalidation_exclude: list[str] on_dependency_change: str diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py index d7a4d3ac..579a381b 100644 --- a/tests/mutation/test_mutation.py +++ b/tests/mutation/test_mutation.py @@ -1326,7 +1326,7 @@ def _config_for_invalidation(**overrides): type_check_command=[], use_setproctitle=False, track_dependencies=True, - dependency_tracking_depth=None, + dependency_tracking_depth=-1, cache_invalidation_files=[], cache_invalidation_exclude=[], on_dependency_change="warn", diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 7a240911..bb834783 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -72,7 +72,7 @@ def _get_config(only_mutate: list[str], do_not_mutate: list[str]) -> Config: type_check_command=[], use_setproctitle=False, track_dependencies=True, - dependency_tracking_depth=None, + dependency_tracking_depth=-1, cache_invalidation_files=[], cache_invalidation_exclude=[], on_dependency_change="warn", @@ -349,7 +349,7 @@ def test_uses_defaults_when_no_config(self, in_tmp_dir: Path): assert config.timeout_constant == 1.0 assert config.type_check_command == [] assert config.track_dependencies is True - assert config.dependency_tracking_depth is None + assert config.dependency_tracking_depth == -1 def test_also_copy_includes_defaults(self, in_tmp_dir: Path): (in_tmp_dir / "src").mkdir()