From c1e4b25031389ad590cf757a87618f80bcfaaf21 Mon Sep 17 00:00:00 2001
From: nicklafleur <55208706+nicklafleur@users.noreply.github.com>
Date: Fri, 5 Jun 2026 20:21:09 -0400
Subject: [PATCH 1/8] feat: per-function hashing for incremental cache
 invalidation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a source file changes, only re-test mutants in functions whose AST
hash changed; preserve prior results for unchanged functions in the same
file.

- compute_function_hashes / _compute_mutated_function_hashes in
file_mutation.py: class-qualified mangled keys (x_foo /
xǁClassǁmethod) -> 12-char sha256 of the function AST. Methods and
nested-class methods are indexed under the same key the merge looks up,
closing the latent silent-preservation bug for changed methods.
- mutate_file_contents returns a 3-tuple (code, names, hashes).
- SourceFileMutationData gains hash_by_function_name, persisted in .meta
with a pop-with-default so old files still load.
- create_mutants_for_file: mtime short-circuit now preserves all prior
results instead of resetting them; on a real change, load-and-merge
compares new hashes against old, resets only changed/unhashed mutants,
and preserves the rest.
- Tests: update all mutate_file_contents unpack sites; add tests for
hash stability, body-change detection, comment-insensitivity, method
key inclusion, two-function preserve/reset integration, and the method
regression guard.
---
 src/mutmut/__main__.py                  |  38 ++---
 src/mutmut/mutation/data.py             |   3 +
 src/mutmut/mutation/file_mutation.py    |  56 ++++++-
 tests/mutation/test_mutation.py         | 193 +++++++++++++++++++++++-
 tests/mutation/test_mutation_runtime.py |  10 +-
 tests/test_mutation regression.py       |   2 +-
 6 files changed, 273 insertions(+), 29 deletions(-)

diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py
index fb6dfc4c..b57963d8 100644
--- a/src/mutmut/__main__.py
+++ b/src/mutmut/__main__.py
@@ -295,13 +295,6 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe
         # source_mtime == mutant_mtime: only copied, otherwise the mutant file is untouched
         # source_mtime < mutant_mtime: the mutations have been saved after copying; source file untouched
         if source_mtime < mutant_mtime:
-            # reset the mutation stats
-            source_file_mutation_data = SourceFileMutationData(path=filename)
-            source_file_mutation_data.load()
-            for key in source_file_mutation_data.exit_code_by_key:
-                source_file_mutation_data.exit_code_by_key[key] = None
-            source_file_mutation_data.save()
-
             return FileMutationResult(unmodified=True)
     except OSError:
         pass
@@ -311,12 +304,12 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe
 
     with open(output_path, "w") as out:
         try:
-            mutant_names = write_all_mutants_to_file(out=out, source=source, filename=filename)
+            mutant_names, hash_by_function_name = write_all_mutants_to_file(out=out, source=source, filename=filename)
         except cst.ParserSyntaxError as e:
             # if libcst cannot parse it, then copy the source without any mutations
             warnings.append(SyntaxWarning(f"Unsupported syntax in {filename} ({str(e)}), skipping"))
             out.write(source)
-            mutant_names = []
+            mutant_names, hash_by_function_name = [], {}
 
     # validate no syntax errors of mutants
     with open(output_path) as f:
@@ -327,22 +320,33 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe
             invalid_syntax_error.__cause__ = e
             return FileMutationResult(warnings=warnings, error=invalid_syntax_error)
 
-    source_file_mutation_data = SourceFileMutationData(path=filename)
-    source_file_mutation_data.exit_code_by_key = {
-        get_mutant_name(filename, mutant_name): None for mutant_name in mutant_names
-    }
-    source_file_mutation_data.save()
+    data = SourceFileMutationData(path=filename)
+    data.load()
+    old_hashes = data.hash_by_function_name
+    changed = {f for f, h in hash_by_function_name.items() if old_hashes.get(f) != h}
+
+    merged: dict[str, int | None] = {}
+    for name in mutant_names:
+        key = get_mutant_name(filename, name)
+        func = mangled_name_from_mutant_name(key).rpartition(".")[2]
+        if func not in hash_by_function_name or func in changed:
+            merged[key] = None
+        else:
+            merged[key] = data.exit_code_by_key.get(key)
+    data.exit_code_by_key = merged
+    data.hash_by_function_name = hash_by_function_name
+    data.save()
 
     return FileMutationResult(warnings=warnings)
 
 
-def write_all_mutants_to_file(*, out: TextIOBase, source: str, filename: Path) -> Sequence[str]:
-    result, mutant_names = mutate_file_contents(
+def write_all_mutants_to_file(*, out: TextIOBase, source: str, filename: Path) -> tuple[Sequence[str], dict[str, str]]:
+    result, mutant_names, hash_by_function_name = mutate_file_contents(
         str(filename), source, get_covered_lines_for_file(str(filename), mutmut._covered_lines)
     )
     out.write(result)
 
-    return mutant_names
+    return mutant_names, hash_by_function_name
 
 
 def unused(*_: object) -> None:
diff --git a/src/mutmut/mutation/data.py b/src/mutmut/mutation/data.py
index 46a1f51b..fc7f583a 100644
--- a/src/mutmut/mutation/data.py
+++ b/src/mutmut/mutation/data.py
@@ -12,6 +12,7 @@ def __init__(self, *, path: Path | str) -> None:
         self.meta_path = Path("mutants") / (str(path) + ".meta")
         self.key_by_pid: dict[int, str] = {}
         self.exit_code_by_key: dict[str, int | None] = {}
+        self.hash_by_function_name: dict[str, str] = {}
         self.durations_by_key: dict[str, float] = {}
         self.start_time_by_pid: dict[int, datetime] = {}
         self.type_check_error_by_key: dict[str, str | None] = {}
@@ -24,6 +25,7 @@ def load(self) -> None:
             return
 
         self.exit_code_by_key = meta.pop("exit_code_by_key")
+        self.hash_by_function_name = meta.pop("hash_by_function_name", {})
         self.type_check_error_by_key = meta.pop("type_check_error_by_key", {})
         self.durations_by_key = meta.pop("durations_by_key")
         self.estimated_time_of_tests_by_mutant = meta.pop("estimated_durations_by_key")
@@ -52,6 +54,7 @@ def save(self) -> None:
             json.dump(
                 {
                     "exit_code_by_key": self.exit_code_by_key,
+                    "hash_by_function_name": self.hash_by_function_name,
                     "type_check_error_by_key": self.type_check_error_by_key,
                     "durations_by_key": self.durations_by_key,
                     "estimated_durations_by_key": self.estimated_time_of_tests_by_mutant,
diff --git a/src/mutmut/mutation/file_mutation.py b/src/mutmut/mutation/file_mutation.py
index e654c931..17422e0f 100644
--- a/src/mutmut/mutation/file_mutation.py
+++ b/src/mutmut/mutation/file_mutation.py
@@ -1,6 +1,9 @@
 """This module contains code for managing mutant creation for whole files."""
 
+import ast
+import hashlib
 from collections import defaultdict
+from collections.abc import Callable
 from collections.abc import Iterable
 from collections.abc import Mapping
 from collections.abc import Sequence
@@ -32,6 +35,49 @@
 NEVER_MUTATE_FUNCTION_CALLS = {"len", "isinstance"}
 
 
+def compute_function_hashes(source_code: str, accept: Callable[[str], bool] | None = None) -> dict[str, str]:
+    """class-qualified mangled key (x_foo / xǁClassǁmethod) -> 12-char sha256 of the func AST."""
+    try:
+        tree = ast.parse(source_code)
+    except SyntaxError:
+        return {}
+    hashes: dict[str, str] = {}
+
+    def _visit(stmts: list[ast.stmt], class_name: str = "") -> None:
+        for node in stmts:
+            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                key = mangle_function_name(name=node.name, class_name=class_name or None)
+                if accept is None or accept(key):
+                    hashes[key] = hashlib.sha256(ast.dump(node, annotate_fields=False).encode()).hexdigest()[:12]
+            elif isinstance(node, ast.ClassDef):
+                _visit(node.body, node.name if not class_name else f"{class_name}.{node.name}")
+
+    _visit(tree.body)
+    return hashes
+
+
+def _compute_mutated_function_hashes(
+    source_code: str, module: cst.Module, mutations: Sequence["Mutation"]
+) -> dict[str, str]:
+    key_by_node: dict[cst.FunctionDef, str] = {}
+
+    def _index(body: Sequence[cst.CSTNode], class_name: str = "") -> None:
+        for stmt in body:
+            if isinstance(stmt, cst.FunctionDef):
+                key_by_node[stmt] = mangle_function_name(name=stmt.name.value, class_name=class_name or None)
+            elif isinstance(stmt, cst.ClassDef) and isinstance(stmt.body, cst.IndentedBlock):
+                _index(stmt.body.body, stmt.name.value if not class_name else f"{class_name}.{stmt.name.value}")
+
+    _index(module.body)
+    mutated = {
+        key_by_node[m.contained_by_top_level_function]
+        for m in mutations
+        if isinstance(m.contained_by_top_level_function, cst.FunctionDef)
+        and m.contained_by_top_level_function in key_by_node
+    }
+    return compute_function_hashes(source_code, accept=lambda key: key in mutated)
+
+
 @dataclass
 class Mutation:
     original_node: cst.CSTNode
@@ -39,17 +85,19 @@ class Mutation:
     contained_by_top_level_function: cst.FunctionDef | None
 
 
-def mutate_file_contents(filename: str, code: str, covered_lines: set[int] | None = None) -> tuple[str, Sequence[str]]:
+def mutate_file_contents(
+    filename: str, code: str, covered_lines: set[int] | None = None
+) -> tuple[str, Sequence[str], dict[str, str]]:
     """Create mutations for `code` and merge them to a single mutated file with trampolines.
 
-    :return: A tuple of (mutated code, list of mutant function names)."""
+    :return: A tuple of (mutated code, list of mutant function names, hash by function name)."""
     module, mutations, ignored_classes, ignored_functions = create_mutations(filename, code, covered_lines)
 
     mutated_code, mutant_names = combine_mutations_to_source(module, mutations, ignored_classes, ignored_functions)
 
-    # TODO: implement function hashing to skip testing unchanged functions
+    hash_by_function_name = _compute_mutated_function_hashes(code, module, mutations)
 
-    return mutated_code, mutant_names
+    return mutated_code, mutant_names, hash_by_function_name
 
 
 def create_mutations(
diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py
index 38687ed2..9fbcf47b 100644
--- a/tests/mutation/test_mutation.py
+++ b/tests/mutation/test_mutation.py
@@ -1,4 +1,6 @@
 import os
+import pathlib
+import tempfile
 from unittest.mock import Mock
 from unittest.mock import patch
 
@@ -8,12 +10,16 @@
 from mutmut.__main__ import CatchOutput
 from mutmut.__main__ import MutmutProgrammaticFailException
 from mutmut.__main__ import get_diff_for_mutant
+from mutmut.__main__ import mangled_name_from_mutant_name
 from mutmut.__main__ import orig_function_and_class_names_from_key
 from mutmut.__main__ import run_forced_fail_test
+from mutmut.mutation.data import SourceFileMutationData
+from mutmut.mutation.file_mutation import compute_function_hashes
 from mutmut.mutation.file_mutation import create_mutations
 from mutmut.mutation.file_mutation import mutate_file_contents
 from mutmut.mutation.trampoline_templates import CLASS_NAME_SEPARATOR
 from mutmut.mutation.trampoline_templates import mangle_function_name
+from mutmut.utils.format_utils import get_mutant_name
 
 
 def mutants_for_source(source: str, covered_lines: set[int] | None = None) -> list[str]:
@@ -24,7 +30,7 @@ def mutants_for_source(source: str, covered_lines: set[int] | None = None) -> li
 
 
 def mutated_module(source: str) -> str:
-    mutated_code, _ = mutate_file_contents("", source)
+    mutated_code, _, _ = mutate_file_contents("", source)
     return mutated_code
 
 
@@ -794,7 +800,7 @@ def member(self):
 
     """.strip()
 
-    mutants_source, mutant_names = mutate_file_contents("filename", source)
+    mutants_source, mutant_names, _ = mutate_file_contents("filename", source)
     assert len(mutant_names) == 2
 
     diff1 = get_diff_for_mutant(mutant_name=mutant_names[0], source=mutants_source, path="test.py").strip()
@@ -1004,3 +1010,186 @@ def inner():
 
     mutants = mutants_for_source(source)
     assert mutants == [expected]
+
+
+# --- function hashing tests ---
+
+
+def test_compute_function_hashes_module_level():
+    source = """
+def foo():
+    return 1
+
+def bar():
+    return 2
+""".strip()
+    hashes = compute_function_hashes(source)
+    assert "x_foo" in hashes
+    assert "x_bar" in hashes
+    assert len(hashes["x_foo"]) == 12
+    assert hashes["x_foo"] != hashes["x_bar"]
+
+
+def test_compute_function_hashes_stable():
+    source = "def foo():\n    return 1\n"
+    assert compute_function_hashes(source) == compute_function_hashes(source)
+
+
+def test_compute_function_hashes_changes_on_body_change():
+    source1 = "def foo():\n    return 1\n"
+    source2 = "def foo():\n    return 2\n"
+    assert compute_function_hashes(source1)["x_foo"] != compute_function_hashes(source2)["x_foo"]
+
+
+def test_compute_function_hashes_insensitive_to_comments():
+    source1 = "def foo():\n    return 1\n"
+    source2 = "def foo():\n    # a comment\n    return 1\n"
+    # ast.dump ignores comments, so hashes must be equal
+    assert compute_function_hashes(source1)["x_foo"] == compute_function_hashes(source2)["x_foo"]
+
+
+def test_compute_function_hashes_includes_methods():
+    source = """
+class Foo:
+    def bar(self):
+        return 1
+""".strip()
+    hashes = compute_function_hashes(source)
+    from mutmut.mutation.trampoline_templates import CLASS_NAME_SEPARATOR
+
+    method_key = f"x{CLASS_NAME_SEPARATOR}Foo{CLASS_NAME_SEPARATOR}bar"
+    assert method_key in hashes
+
+
+def test_mutate_file_contents_returns_hashes_for_mutated_functions():
+    source = """
+def foo():
+    return 1
+
+def bar():
+    return 2
+""".strip()
+    _, mutant_names, hashes = mutate_file_contents("test.py", source)
+    assert mutant_names
+    # every mutated function appears in the hashes
+
+    for name in mutant_names:
+        func = mangled_name_from_mutant_name(name).rpartition(".")[2]
+        assert func in hashes, f"{func!r} not in hashes {set(hashes)}"
+
+
+def test_hashing_preserves_unchanged_function_results():
+    """Unchanged function's mutants keep prior results; changed function's reset."""
+    source_v1 = """
+def foo():
+    return 1
+
+def bar():
+    return 2
+""".strip()
+    source_v2 = """
+def foo():
+    return 99
+
+def bar():
+    return 2
+""".strip()
+
+    with tempfile.TemporaryDirectory() as tmp:
+        os.makedirs(os.path.join(tmp, "mutants"), exist_ok=True)
+
+        src_path = pathlib.Path(tmp) / "mymod.py"
+        src_path.write_text(source_v1)
+
+        _, mutant_names_v1, hashes_v1 = mutate_file_contents("mymod.py", source_v1)
+
+        data = SourceFileMutationData(path=src_path)
+        data.exit_code_by_key = {}
+        for name in mutant_names_v1:
+            key = get_mutant_name(src_path, name)
+            data.exit_code_by_key[key] = 1  # fake "killed"
+        data.hash_by_function_name = hashes_v1
+        data.meta_path = pathlib.Path(tmp) / "mutants" / (str(src_path) + ".meta")
+        data.meta_path.parent.mkdir(parents=True, exist_ok=True)
+        data.save()
+
+        # simulate second run with foo changed
+        _, mutant_names_v2, hashes_v2 = mutate_file_contents("mymod.py", source_v2)
+
+        prior = SourceFileMutationData(path=src_path)
+        prior.meta_path = data.meta_path
+        prior.load()
+        old_hashes = prior.hash_by_function_name
+        changed = {f for f, h in hashes_v2.items() if old_hashes.get(f) != h}
+
+        merged: dict = {}
+        for name in mutant_names_v2:
+            key = get_mutant_name(src_path, name)
+            func = mangled_name_from_mutant_name(key).rpartition(".")[2]
+            if func not in hashes_v2 or func in changed:
+                merged[key] = None
+            else:
+                merged[key] = prior.exit_code_by_key.get(key)
+
+        foo_keys = [k for k in merged if "x_foo" in mangled_name_from_mutant_name(k)]
+        bar_keys = [k for k in merged if "x_bar" in mangled_name_from_mutant_name(k)]
+
+        assert foo_keys, "expected foo mutants"
+        assert bar_keys, "expected bar mutants"
+        assert all(merged[k] is None for k in foo_keys), "foo changed — should reset"
+        assert all(merged[k] == 1 for k in bar_keys), "bar unchanged — should preserve"
+
+
+def test_hashing_resets_changed_method(monkeypatch):
+    """A changed class method's mutants must be reset, not silently preserved."""
+    source_v1 = """
+class Foo:
+    def method(self):
+        return 1
+""".strip()
+    source_v2 = """
+class Foo:
+    def method(self):
+        return 99
+""".strip()
+
+    with tempfile.TemporaryDirectory() as tmp:
+        os.makedirs(os.path.join(tmp, "mutants"), exist_ok=True)
+
+        src_path = pathlib.Path(tmp) / "mymod.py"
+
+        _, mutant_names_v1, hashes_v1 = mutate_file_contents("mymod.py", source_v1)
+        assert mutant_names_v1, "expected at least one method mutant"
+
+        data = SourceFileMutationData(path=src_path)
+        data.exit_code_by_key = {}
+        for name in mutant_names_v1:
+            key = get_mutant_name(src_path, name)
+            data.exit_code_by_key[key] = 1
+        data.hash_by_function_name = hashes_v1
+        data.meta_path = pathlib.Path(tmp) / "mutants" / (str(src_path) + ".meta")
+        data.meta_path.parent.mkdir(parents=True, exist_ok=True)
+        data.save()
+
+        _, mutant_names_v2, hashes_v2 = mutate_file_contents("mymod.py", source_v2)
+
+        prior = SourceFileMutationData(path=src_path)
+        prior.meta_path = data.meta_path
+        prior.load()
+        old_hashes = prior.hash_by_function_name
+        changed = {f for f, h in hashes_v2.items() if old_hashes.get(f) != h}
+
+        merged: dict = {}
+        for name in mutant_names_v2:
+            key = get_mutant_name(src_path, name)
+            func = mangled_name_from_mutant_name(key).rpartition(".")[2]
+            if func not in hashes_v2 or func in changed:
+                merged[key] = None
+            else:
+                merged[key] = prior.exit_code_by_key.get(key)
+
+        assert merged, "expected merged mutants"
+        assert all(v is None for v in merged.values()), (
+            "method changed — all mutants should be reset, but some were preserved: "
+            + str({k: v for k, v in merged.items() if v is not None})
+        )
diff --git a/tests/mutation/test_mutation_runtime.py b/tests/mutation/test_mutation_runtime.py
index 5d97b547..e51521db 100644
--- a/tests/mutation/test_mutation_runtime.py
+++ b/tests/mutation/test_mutation_runtime.py
@@ -20,7 +20,7 @@ def describe(self):
         return self.name.lower()
 """.strip()
 
-    mutated_code, mutant_names = mutate_file_contents("test.py", source)
+    mutated_code, mutant_names, _ = mutate_file_contents("test.py", source)
     assert len(mutant_names) > 0, "Should have at least one mutant"
 
     monkeypatch.setenv("MUTANT_UNDER_TEST", "none")
@@ -65,7 +65,7 @@ def from_name(cls, name: str) -> "Color":
         return vals[name]
 """.strip()
 
-    mutated_code, mutant_names = mutate_file_contents("test.py", source)
+    mutated_code, mutant_names, _ = mutate_file_contents("test.py", source)
     assert len(mutant_names) > 0, "Should have at least one mutant"
 
     monkeypatch.setenv("MUTANT_UNDER_TEST", "none")
@@ -88,7 +88,7 @@ def add(a, b):
         return a + b
 """.strip()
 
-    mutated_code, mutant_names = mutate_file_contents("test.py", source)
+    mutated_code, mutant_names, _ = mutate_file_contents("test.py", source)
     assert len(mutant_names) > 0, "Should have at least one mutant"
 
     monkeypatch.setenv("MUTANT_UNDER_TEST", "none")
@@ -117,7 +117,7 @@ def __init__(self, value):
         self.value = value
 """.strip()
 
-    mutated_code, mutant_names = mutate_file_contents("test.py", source)
+    mutated_code, mutant_names, _ = mutate_file_contents("test.py", source)
     assert len(mutant_names) > 0, "Should have at least one mutant"
 
     monkeypatch.setenv("MUTANT_UNDER_TEST", "none")
@@ -140,7 +140,7 @@ def foo(a: int, b: int = 2):
     return a + b
 """.strip()
 
-    mutated_code, mutant_names = mutate_file_contents("test.py", source)
+    mutated_code, mutant_names, _ = mutate_file_contents("test.py", source)
     assert len(mutant_names) > 0, "Should have at least one mutant"
 
     monkeypatch.setenv("MUTANT_UNDER_TEST", "none")
diff --git a/tests/test_mutation regression.py b/tests/test_mutation regression.py
index 1f2958a9..81766649 100644
--- a/tests/test_mutation regression.py	
+++ b/tests/test_mutation regression.py	
@@ -48,7 +48,7 @@ def default(cls) -> "Color":
 
 print(Adder(1).add(2))"""
 
-    src, _ = mutate_file_contents("file.py", source)
+    src, _, _ = mutate_file_contents("file.py", source)
 
     assert src == snapshot("""\
 from __future__ import division

From 2f3e3f88f3222d192b17a15f02a8c015a4f0df8f Mon Sep 17 00:00:00 2001
From: nicklafleur <55208706+nicklafleur@users.noreply.github.com>
Date: Thu, 4 Jun 2026 22:43:54 -0400
Subject: [PATCH 2/8] feat: cross-call dependency tracking for incremental
 stats invalidation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Records caller->callee edges at stats collection time so stale outgoing
call edges can be cleared when a callee's code changes.

- state.py: MutmutState singleton holding old_function_hashes,
current_function_hashes, and function_dependencies (callee → callers).
- core.py: MutmutCallStack ContextVar propagates caller context through
call chains.
- trampoline.py stats branch: resolves caller via MutmutCallStack,
passes it to record_trampoline_hit, sets updated context for inner
calls, respects MUTMUT_DEPENDENCY_DEPTH env ceiling.
- record_trampoline_hit gains caller param; upstream's source-path-
resolving max_stack_depth walk preserved verbatim; dependency edge
written only when track_dependencies=True.
- FileMutationResult gains changed_functions/current_hashes (deferred
from commit 1); create_mutants accumulates current_hashes into
state().current_function_hashes across worker results.
- create_mutants_for_file builds module-qualified current_hashes and
changed_functions for return to parent.
- load_stats/save_stats persist function_hashes and function_dependencies
alongside existing test associations (backwards-compatible pop-with-
default on load).
- _cleanup_stale_stats: removes test associations and dependency edges
for modules absent from current_function_hashes.
- _invalidate_stale_dependency_edges: clears changed functions from all
caller sets so stale outgoing edges are rebuilt on next stats run.
- collect_or_load_stats: on incremental load, runs cleanup always and
invalidation when track_dependencies; persists the result.
- Config gains track_dependencies (default True) and
dependency_tracking_depth (default None); run_stats_collection sets
MUTMUT_DEPENDENCY_DEPTH from config.
- Tests: record_trampoline_hit with/without track_dependencies,
_cleanup_stale_stats removes unknown modules, _invalidate_stale_
dependency_edges clears changed callers and no-ops on first run,
config defaults asserted.
---
 src/mutmut/__init__.py            |   2 +
 src/mutmut/__main__.py            |  77 ++++++++++++++++++++++-
 src/mutmut/configuration.py       |   4 ++
 src/mutmut/core.py                |  21 +++++++
 src/mutmut/mutation/trampoline.py |  15 ++++-
 src/mutmut/state.py               |  25 ++++++++
 src/mutmut/utils/format_utils.py  |  13 ++++
 tests/mutation/test_mutation.py   | 100 ++++++++++++++++++++++++++++++
 tests/test_configuration.py       |   4 ++
 9 files changed, 256 insertions(+), 5 deletions(-)
 create mode 100644 src/mutmut/core.py
 create mode 100644 src/mutmut/state.py

diff --git a/src/mutmut/__init__.py b/src/mutmut/__init__.py
index d50ba1b6..51026469 100644
--- a/src/mutmut/__init__.py
+++ b/src/mutmut/__init__.py
@@ -5,6 +5,7 @@
 from collections import defaultdict
 
 from mutmut.configuration import Config
+from mutmut.state import reset_state
 
 __version__ = importlib.metadata.version("mutmut")
 
@@ -40,3 +41,4 @@ def _reset_globals() -> None:
     _stats = set()
     tests_by_mangled_function_name = defaultdict(set)
     _covered_lines = None
+    reset_state()
diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py
index b57963d8..862acef9 100644
--- a/src/mutmut/__main__.py
+++ b/src/mutmut/__main__.py
@@ -8,7 +8,9 @@
 from typing import TYPE_CHECKING
 from typing import Any
 
+from mutmut.state import state
 from mutmut.utils.file_utils import change_cwd
+from mutmut.utils.format_utils import get_module_from_key
 from mutmut.utils.format_utils import get_mutant_name
 from mutmut.utils.format_utils import strip_prefix
 
@@ -114,7 +116,7 @@
 exit_code_to_emoji = {exit_code: emoji_by_status[status] for exit_code, status in status_by_exit_code.items()}
 
 
-def record_trampoline_hit(name: str) -> None:
+def record_trampoline_hit(name: str, caller: str | None = None) -> None:
     assert not name.startswith("src."), "Failed trampoline hit. Module name starts with `src.`, which is invalid"
 
     source_paths = [p.resolve(strict=True) for p in Config.get().source_paths]
@@ -136,6 +138,8 @@ def record_trampoline_hit(name: str) -> None:
             return
 
     mutmut._stats.add(name)
+    if caller is not None and Config.get().track_dependencies:
+        state().function_dependencies[name].add(caller)
 
 
 def walk_all_files() -> Iterator[tuple[str, str]]:
@@ -209,6 +213,8 @@ class FileMutationResult:
     error: Exception | None = None
     unmodified: bool = False
     ignored: bool = False
+    changed_functions: set[str] | None = None
+    current_hashes: dict[str, str] | None = None
 
 
 @dataclass
@@ -232,6 +238,8 @@ def create_mutants(max_children: int) -> MutantGenerationStats:
                 stats.ignored += 1
             else:
                 stats.mutated += 1
+            if result.current_hashes:
+                state().current_function_hashes.update(result.current_hashes)
     return stats
 
 
@@ -337,7 +345,17 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe
     data.hash_by_function_name = hash_by_function_name
     data.save()
 
-    return FileMutationResult(warnings=warnings)
+    module_name = strip_prefix(str(filename)[: -len(filename.suffix)].replace(os.sep, "."), prefix="src.")
+    current_hashes_qualified = {
+        f"{module_name}.{func}".replace(".__init__.", "."): h for func, h in hash_by_function_name.items()
+    }
+    changed_functions_qualified = {f"{module_name}.{func}".replace(".__init__.", ".") for func in changed}
+
+    return FileMutationResult(
+        warnings=warnings,
+        changed_functions=changed_functions_qualified,
+        current_hashes=current_hashes_qualified,
+    )
 
 
 def write_all_mutants_to_file(*, out: TextIOBase, source: str, filename: Path) -> tuple[Sequence[str], dict[str, str]]:
@@ -723,6 +741,8 @@ def run_stats_collection(runner: TestRunner, tests: Iterable[str] | None = None)
 
     os.environ["MUTANT_UNDER_TEST"] = "stats"
     os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1"
+    depth = Config.get().dependency_tracking_depth
+    os.environ["MUTMUT_DEPENDENCY_DEPTH"] = str(depth) if depth is not None else "-1"
     start_cpu_time = process_time()
 
     with CatchOutput(spinner_title="Running stats") as output_catcher:
@@ -758,13 +778,59 @@ def run_stats_collection(runner: TestRunner, tests: Iterable[str] | None = None)
     save_stats()
 
 
-def collect_or_load_stats(runner: TestRunner) -> None:
+def _cleanup_stale_stats() -> None:
+    valid_modules = {get_module_from_key(key) for key in state().current_function_hashes}
+
+    def _is_valid_key(key: str) -> bool:
+        return get_module_from_key(key) in valid_modules
+
+    stale_keys = [k for k in mutmut.tests_by_mangled_function_name if not _is_valid_key(k)]
+    for k in stale_keys:
+        del mutmut.tests_by_mangled_function_name[k]
+
+    stale_dep_keys = [k for k in state().function_dependencies if not _is_valid_key(k)]
+    for k in stale_dep_keys:
+        del state().function_dependencies[k]
+
+    for _, callers in state().function_dependencies.items():
+        callers -= {c for c in callers if not _is_valid_key(c)}
+
+
+def _invalidate_stale_dependency_edges() -> set[str]:
+    old_hashes = state().old_function_hashes
+    new_hashes = state().current_function_hashes
+
+    if not old_hashes:
+        return set()
+
+    all_functions = old_hashes.keys() | new_hashes.keys()
+    changed_functions = {f for f in all_functions if old_hashes.get(f) != new_hashes.get(f)}
+
+    if not changed_functions:
+        return set()
+
+    for callers in state().function_dependencies.values():
+        callers -= changed_functions
+
+    deleted_functions = old_hashes.keys() - new_hashes.keys()
+    for f in deleted_functions:
+        state().function_dependencies.pop(f, None)
+
+    return changed_functions
+
+
+def collect_or_load_stats(runner: TestRunner, invalidate_stale_callers: bool = True) -> None:
     did_load = load_stats()
 
     if not did_load:
         # Run full stats
         run_stats_collection(runner)
     else:
+        _cleanup_stale_stats()
+        if Config.get().track_dependencies and invalidate_stale_callers:
+            _invalidate_stale_dependency_edges()
+        save_stats()
+
         # Run incremental stats
         with CatchOutput(spinner_title="Listing all tests") as output_catcher:
             os.environ["MUTANT_UNDER_TEST"] = "list_all_tests"
@@ -793,6 +859,9 @@ def load_stats() -> bool:
                 mutmut.tests_by_mangled_function_name[k] |= set(v)
             mutmut.duration_by_test = data.pop("duration_by_test")
             mutmut.stats_time = data.pop("stats_time")
+            state().old_function_hashes = data.pop("function_hashes", {})
+            for k, v in data.pop("function_dependencies", {}).items():
+                state().function_dependencies[k] = set(v)
             assert not data, data
             did_load = True
     except (FileNotFoundError, JSONDecodeError):
@@ -807,6 +876,8 @@ def save_stats() -> None:
                 tests_by_mangled_function_name={k: list(v) for k, v in mutmut.tests_by_mangled_function_name.items()},
                 duration_by_test=mutmut.duration_by_test,
                 stats_time=mutmut.stats_time,
+                function_hashes=state().current_function_hashes,
+                function_dependencies={k: list(v) for k, v in state().function_dependencies.items()},
             ),
             f,
             indent=4,
diff --git a/src/mutmut/configuration.py b/src/mutmut/configuration.py
index 3f581bd9..9145d4f8 100644
--- a/src/mutmut/configuration.py
+++ b/src/mutmut/configuration.py
@@ -142,6 +142,8 @@ def _load_config() -> Config:
         use_setproctitle=s(
             "use_setproctitle", not platform.system() == "Darwin"
         ),  # False on Mac, true otherwise as default (https://github.com/boxed/mutmut/pull/450#issuecomment-4002571055)
+        track_dependencies=s("track_dependencies", True),
+        dependency_tracking_depth=s("dependency_tracking_depth", None),
     )
 
 
@@ -164,6 +166,8 @@ class Config:
     timeout_constant: float
     type_check_command: list[str]
     use_setproctitle: bool
+    track_dependencies: bool
+    dependency_tracking_depth: int | None
 
     def should_mutate(self, path: Path | str) -> bool:
         return self._should_include_for_mutation(path) and not self._should_ignore_for_mutation(path)
diff --git a/src/mutmut/core.py b/src/mutmut/core.py
new file mode 100644
index 00000000..4e42db7a
--- /dev/null
+++ b/src/mutmut/core.py
@@ -0,0 +1,21 @@
+from contextvars import ContextVar
+from contextvars import Token
+from typing import ClassVar
+
+
+class MutmutCallStack:
+    """Async-compatible call context for dependency tracking."""
+
+    _ctx: ClassVar[ContextVar[tuple[str | None, int]]] = ContextVar("_mutmut_call_context", default=(None, 0))
+
+    @classmethod
+    def get(cls) -> tuple[str | None, int]:
+        return cls._ctx.get()
+
+    @classmethod
+    def set(cls, value: tuple[str, int]) -> Token[tuple[str | None, int]]:
+        return cls._ctx.set(value)
+
+    @classmethod
+    def reset(cls, token: Token[tuple[str | None, int]]) -> None:
+        cls._ctx.reset(token)
diff --git a/src/mutmut/mutation/trampoline.py b/src/mutmut/mutation/trampoline.py
index 3e761ff5..4d470180 100644
--- a/src/mutmut/mutation/trampoline.py
+++ b/src/mutmut/mutation/trampoline.py
@@ -10,6 +10,7 @@
 from mutmut.__main__ import MutmutProgrammaticFailException
 from mutmut.__main__ import mangled_name_from_mutant_name
 from mutmut.__main__ import record_trampoline_hit
+from mutmut.core import MutmutCallStack
 
 TReturn = TypeVar("TReturn")
 MutantDict = Annotated[dict[str, Callable[..., TReturn]], "Mutant"]
@@ -57,8 +58,18 @@ def trampoline(*args: P.args, **kwargs: P.kwargs) -> R:
                 )
 
             if mutant_under_test == "stats":
-                record_trampoline_hit(f"{orig_func.__module__}.{mangled_name_from_mutant_name(orig_func.__name__)}")
-                return orig_func(*call_args, **kwargs)
+                orig_qual_name = f"{orig_func.__module__}.{mangled_name_from_mutant_name(orig_func.__name__)}"
+                caller_name, depth = MutmutCallStack.get()
+                max_depth = int(os.environ.get("MUTMUT_DEPENDENCY_DEPTH", "-1"))
+                if max_depth == -1 or depth < max_depth:
+                    record_trampoline_hit(orig_qual_name, caller=caller_name)
+                    token = MutmutCallStack.set((orig_qual_name, depth + 1))
+                    try:
+                        return orig_func(*call_args, **kwargs)
+                    finally:
+                        MutmutCallStack.reset(token)
+                else:
+                    return orig_func(*call_args, **kwargs)
 
             # mutant under test is {module}.{mutant_name}
             module, _, mutant_name = mutant_under_test.rpartition(".")
diff --git a/src/mutmut/state.py b/src/mutmut/state.py
new file mode 100644
index 00000000..6a774e31
--- /dev/null
+++ b/src/mutmut/state.py
@@ -0,0 +1,25 @@
+from collections import defaultdict
+from dataclasses import dataclass
+from dataclasses import field
+
+
+@dataclass
+class MutmutState:
+    old_function_hashes: dict[str, str] = field(default_factory=dict)
+    current_function_hashes: dict[str, str] = field(default_factory=dict)
+    function_dependencies: defaultdict[str, set[str]] = field(default_factory=lambda: defaultdict(set))
+
+
+_state: MutmutState | None = None
+
+
+def state() -> MutmutState:
+    global _state
+    if _state is None:
+        _state = MutmutState()
+    return _state
+
+
+def reset_state() -> None:
+    global _state
+    _state = None
diff --git a/src/mutmut/utils/format_utils.py b/src/mutmut/utils/format_utils.py
index a1228719..19b82ae4 100644
--- a/src/mutmut/utils/format_utils.py
+++ b/src/mutmut/utils/format_utils.py
@@ -45,6 +45,19 @@ def strip_prefix(s: str, *, prefix: str, strict: bool = False) -> str:
     return s
 
 
+def get_module_from_key(key: str) -> str:
+    """Extract module name from a mangled function key like 'app.foo.x_bar'.
+
+    The function name starts with 'x_' or 'xǁ', so we find that part
+    and return everything before it as the module path.
+    """
+    parts = key.split(".")
+    for i in range(len(parts) - 1, -1, -1):
+        if parts[i].startswith("x_") or parts[i].startswith("xǁ"):
+            return ".".join(parts[:i])
+    return key.rsplit(".", 1)[0] if "." in key else key
+
+
 def get_mutant_name(relative_source_path: Path, mutant_method_name: str) -> str:
     module_name = str(relative_source_path)[: -len(relative_source_path.suffix)].replace(os.sep, ".")
     module_name = strip_prefix(module_name, prefix="src.")
diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py
index 9fbcf47b..7aa18ce1 100644
--- a/tests/mutation/test_mutation.py
+++ b/tests/mutation/test_mutation.py
@@ -1,24 +1,32 @@
 import os
 import pathlib
 import tempfile
+from collections import defaultdict
 from unittest.mock import Mock
 from unittest.mock import patch
 
 import libcst as cst
 import pytest
 
+import mutmut
 from mutmut.__main__ import CatchOutput
 from mutmut.__main__ import MutmutProgrammaticFailException
+from mutmut.__main__ import _cleanup_stale_stats
+from mutmut.__main__ import _invalidate_stale_dependency_edges
 from mutmut.__main__ import get_diff_for_mutant
 from mutmut.__main__ import mangled_name_from_mutant_name
 from mutmut.__main__ import orig_function_and_class_names_from_key
+from mutmut.__main__ import record_trampoline_hit
 from mutmut.__main__ import run_forced_fail_test
+from mutmut.configuration import Config
 from mutmut.mutation.data import SourceFileMutationData
 from mutmut.mutation.file_mutation import compute_function_hashes
 from mutmut.mutation.file_mutation import create_mutations
 from mutmut.mutation.file_mutation import mutate_file_contents
 from mutmut.mutation.trampoline_templates import CLASS_NAME_SEPARATOR
 from mutmut.mutation.trampoline_templates import mangle_function_name
+from mutmut.state import reset_state
+from mutmut.state import state
 from mutmut.utils.format_utils import get_mutant_name
 
 
@@ -1193,3 +1201,95 @@ def method(self):
             "method changed — all mutants should be reset, but some were preserved: "
             + str({k: v for k, v in merged.items() if v is not None})
         )
+
+
+# --- dependency tracking tests ---
+
+
+def test_record_trampoline_hit_records_caller(monkeypatch):
+    """record_trampoline_hit(name, caller=...) stores the edge in function_dependencies."""
+
+    reset_state()
+    mutmut._stats.clear()
+
+    cfg = Mock(spec=Config)
+    cfg.max_stack_depth = -1
+    cfg.source_paths = []
+    cfg.track_dependencies = True
+    monkeypatch.setattr(Config, "get", lambda: cfg)
+
+    record_trampoline_hit("my_module.x_foo", caller="my_module.x_bar")
+
+    assert "my_module.x_bar" in state().function_dependencies["my_module.x_foo"]
+    reset_state()
+
+
+def test_record_trampoline_hit_skips_caller_when_disabled(monkeypatch):
+    """record_trampoline_hit does not record dependencies when track_dependencies=False."""
+
+    reset_state()
+    mutmut._stats.clear()
+
+    cfg = Mock(spec=Config)
+    cfg.max_stack_depth = -1
+    cfg.source_paths = []
+    cfg.track_dependencies = False
+    monkeypatch.setattr(Config, "get", lambda: cfg)
+
+    record_trampoline_hit("my_module.x_foo", caller="my_module.x_bar")
+
+    assert "my_module.x_foo" not in state().function_dependencies
+    reset_state()
+
+
+def test_cleanup_stale_stats_removes_unknown_modules(monkeypatch):
+    """_cleanup_stale_stats removes test associations for modules not in current_function_hashes."""
+
+    reset_state()
+    old_stats = mutmut.tests_by_mangled_function_name
+    mutmut.tests_by_mangled_function_name = defaultdict(set)
+
+    state().current_function_hashes["live_mod.x_foo"] = "aabbcc"
+    mutmut.tests_by_mangled_function_name["live_mod.x_foo__mutmut_orig"] = {"test_alive"}
+    mutmut.tests_by_mangled_function_name["dead_mod.x_bar__mutmut_orig"] = {"test_dead"}
+    state().function_dependencies["live_mod.x_baz"] = {"dead_mod.x_bar"}
+
+    _cleanup_stale_stats()
+
+    assert "live_mod.x_foo__mutmut_orig" in mutmut.tests_by_mangled_function_name
+    assert "dead_mod.x_bar__mutmut_orig" not in mutmut.tests_by_mangled_function_name
+    assert "dead_mod.x_bar" not in state().function_dependencies["live_mod.x_baz"]
+
+    mutmut.tests_by_mangled_function_name = old_stats
+    reset_state()
+
+
+def test_invalidate_stale_dependency_edges_clears_changed_callers():
+    """When B's hash changes, B is removed from all caller sets in function_dependencies."""
+
+    reset_state()
+
+    state().function_dependencies["mod.x_c"] = {"mod.x_b", "mod.x_a"}
+    state().old_function_hashes["mod.x_b"] = "old"
+    state().current_function_hashes["mod.x_b"] = "new"
+    state().old_function_hashes["mod.x_a"] = "same"
+    state().current_function_hashes["mod.x_a"] = "same"
+
+    changed = _invalidate_stale_dependency_edges()
+
+    assert "mod.x_b" in changed
+    assert "mod.x_b" not in state().function_dependencies["mod.x_c"]
+    assert "mod.x_a" in state().function_dependencies["mod.x_c"]
+    reset_state()
+
+
+def test_invalidate_stale_dependency_edges_no_old_hashes_returns_empty():
+    """With no prior hashes (first run), nothing is invalidated."""
+
+    reset_state()
+    state().current_function_hashes["mod.x_foo"] = "abc"
+
+    changed = _invalidate_stale_dependency_edges()
+
+    assert changed == set()
+    reset_state()
diff --git a/tests/test_configuration.py b/tests/test_configuration.py
index b1451eb0..bb9feb5c 100644
--- a/tests/test_configuration.py
+++ b/tests/test_configuration.py
@@ -71,6 +71,8 @@ def _get_config(only_mutate: list[str], do_not_mutate: list[str]) -> Config:
             timeout_constant=1.0,
             type_check_command=[],
             use_setproctitle=False,
+            track_dependencies=True,
+            dependency_tracking_depth=None,
         )
 
     def test_ignores_non_python_files(self):
@@ -342,6 +344,8 @@ def test_uses_defaults_when_no_config(self, in_tmp_dir: Path):
         assert config.timeout_multiplier == 15.0
         assert config.timeout_constant == 1.0
         assert config.type_check_command == []
+        assert config.track_dependencies is True
+        assert config.dependency_tracking_depth is None
 
     def test_also_copy_includes_defaults(self, in_tmp_dir: Path):
         (in_tmp_dir / "src").mkdir()

From a526f6a661d8f3012d2d16577f5c7b6785e0d363 Mon Sep 17 00:00:00 2001
From: nicklafleur <55208706+nicklafleur@users.noreply.github.com>
Date: Fri, 5 Jun 2026 21:17:38 -0400
Subject: [PATCH 3/8] e2e: add benchmark project with 1k mutants

- Add e2e_projects/benchmark_1k/ with ~1000 mutants for testing
- Includes modules: numbers, strings, booleans, operators, comparisons,
  arguments, returns, complex (recursion, higher-order functions)
- Configurable delays via BENCHMARK_IMPORT_DELAY, BENCHMARK_CONFTEST_DELAY,
  BENCHMARK_TEST_DELAY environment variables to simulate the performance
  under variable test and startup runtimes.
---
 e2e_projects/benchmark_1k/README.md           |  60 +++
 .../benchmark_1k/benchmark_results.json       | 503 ++++++++++++++++++
 e2e_projects/benchmark_1k/mutmut_preload.txt  |   8 +
 e2e_projects/benchmark_1k/pyproject.toml      |  19 +
 e2e_projects/benchmark_1k/requirements.txt    |   2 +
 e2e_projects/benchmark_1k/run_benchmark.py    | 332 ++++++++++++
 .../benchmark_1k/src/benchmark/__init__.py    |  34 ++
 .../benchmark_1k/src/benchmark/arguments.py   |  71 +++
 .../benchmark_1k/src/benchmark/booleans.py    | 180 +++++++
 .../benchmark_1k/src/benchmark/comparisons.py | 242 +++++++++
 .../benchmark_1k/src/benchmark/complex.py     | 240 +++++++++
 .../benchmark_1k/src/benchmark/numbers.py     |  69 +++
 .../benchmark_1k/src/benchmark/operators.py   | 129 +++++
 .../benchmark_1k/src/benchmark/returns.py     | 108 ++++
 .../benchmark_1k/src/benchmark/strings.py     | 199 +++++++
 e2e_projects/benchmark_1k/tests/__init__.py   |   0
 e2e_projects/benchmark_1k/tests/conftest.py   |  31 ++
 .../benchmark_1k/tests/test_arguments.py      |  56 ++
 .../benchmark_1k/tests/test_booleans.py       | 201 +++++++
 .../benchmark_1k/tests/test_comparisons.py    | 269 ++++++++++
 .../benchmark_1k/tests/test_complex.py        | 121 +++++
 .../benchmark_1k/tests/test_numbers.py        |  56 ++
 .../benchmark_1k/tests/test_operators.py      |  94 ++++
 .../benchmark_1k/tests/test_returns.py        |  72 +++
 .../benchmark_1k/tests/test_strings.py        | 142 +++++
 25 files changed, 3238 insertions(+)
 create mode 100644 e2e_projects/benchmark_1k/README.md
 create mode 100644 e2e_projects/benchmark_1k/benchmark_results.json
 create mode 100644 e2e_projects/benchmark_1k/mutmut_preload.txt
 create mode 100644 e2e_projects/benchmark_1k/pyproject.toml
 create mode 100644 e2e_projects/benchmark_1k/requirements.txt
 create mode 100644 e2e_projects/benchmark_1k/run_benchmark.py
 create mode 100644 e2e_projects/benchmark_1k/src/benchmark/__init__.py
 create mode 100644 e2e_projects/benchmark_1k/src/benchmark/arguments.py
 create mode 100644 e2e_projects/benchmark_1k/src/benchmark/booleans.py
 create mode 100644 e2e_projects/benchmark_1k/src/benchmark/comparisons.py
 create mode 100644 e2e_projects/benchmark_1k/src/benchmark/complex.py
 create mode 100644 e2e_projects/benchmark_1k/src/benchmark/numbers.py
 create mode 100644 e2e_projects/benchmark_1k/src/benchmark/operators.py
 create mode 100644 e2e_projects/benchmark_1k/src/benchmark/returns.py
 create mode 100644 e2e_projects/benchmark_1k/src/benchmark/strings.py
 create mode 100644 e2e_projects/benchmark_1k/tests/__init__.py
 create mode 100644 e2e_projects/benchmark_1k/tests/conftest.py
 create mode 100644 e2e_projects/benchmark_1k/tests/test_arguments.py
 create mode 100644 e2e_projects/benchmark_1k/tests/test_booleans.py
 create mode 100644 e2e_projects/benchmark_1k/tests/test_comparisons.py
 create mode 100644 e2e_projects/benchmark_1k/tests/test_complex.py
 create mode 100644 e2e_projects/benchmark_1k/tests/test_numbers.py
 create mode 100644 e2e_projects/benchmark_1k/tests/test_operators.py
 create mode 100644 e2e_projects/benchmark_1k/tests/test_returns.py
 create mode 100644 e2e_projects/benchmark_1k/tests/test_strings.py

diff --git a/e2e_projects/benchmark_1k/README.md b/e2e_projects/benchmark_1k/README.md
new file mode 100644
index 00000000..f5f4ab20
--- /dev/null
+++ b/e2e_projects/benchmark_1k/README.md
@@ -0,0 +1,60 @@
+# Benchmark 1K
+
+A synthetic benchmark project with 1000 mutants for validating mutmut's process isolation and hot-fork warmup strategy performance.
+
+**TL;DR:**
+- `fork` is fastest and nearly immune to import delays (requires fork-safe libraries)
+- `collect` (hot-fork default) is 2-9x faster than `import`/`none` depending on import cost
+- Higher import delays dramatically penalize `import` and `none` strategies
+
+
+## Mutant Distribution
+
+| Type       | Total | Killed | Survived | Kill Rate |
+|------------|-------|--------|----------|-----------|
+| return     | 221   | 161    | 60       | 73%       |
+| number     | 159   | 99     | 60       | 62%       |
+| argument   | 141   | 132    | 9        | 94%       |
+| string     | 125   | 78     | 47       | 62%       |
+| boolean    | 120   | 47     | 73       | 39%       |
+| comparison | 119   | 19     | 100      | 16%       |
+| operator   | 115   | 90     | 25       | 78%       |
+| **Total**  | **1000** | **626** | **374** | **63%** |
+
+## Usage
+
+### Run mutation testing
+
+```bash
+cd e2e_projects/benchmark_1k
+mutmut run
+```
+
+### Run benchmark comparison
+
+```bash
+python run_benchmark.py
+```
+
+This runs `mutmut run` under each strategy (`fork`, `collect`, `import`, `none`) and outputs:
+- Throughput (mutations/second) for each strategy
+- Results saved to `benchmark_results.json`
+
+### View results
+
+```bash
+cat mutants/summary.json | python -m json.tool
+```
+
+## Test Design
+
+Tests are fast unit tests with instant assertions. Configurable delays simulate real-world costs:
+
+- **Import delay**: Simulates library import time (Flask, SQLAlchemy, etc.)
+- **Conftest delay**: Simulates fixture/plugin setup time
+- **Test delay**: Per-test runtime with +/-10% gaussian jitter for realistic variance
+
+Usage:
+```bash
+python run_benchmark.py --test-delay 0.01  # Add 10ms per-test with jitter
+```
diff --git a/e2e_projects/benchmark_1k/benchmark_results.json b/e2e_projects/benchmark_1k/benchmark_results.json
new file mode 100644
index 00000000..aa9270e9
--- /dev/null
+++ b/e2e_projects/benchmark_1k/benchmark_results.json
@@ -0,0 +1,503 @@
+{
+  "timestamp": "2026-01-26T19:02:52",
+  "python_version": "3.14.2",
+  "strategies": [
+    "fork",
+    "collect",
+    "import",
+    "none"
+  ],
+  "delay_configs": [
+    [
+      0.1,
+      0.1
+    ],
+    [
+      0.5,
+      0.5
+    ],
+    [
+      1.0,
+      1.0
+    ]
+  ],
+  "test_delay": 0.05,
+  "results": [
+    {
+      "import_delay": 0.1,
+      "conftest_delay": 0.1,
+      "strategy": "fork",
+      "elapsed_seconds": 27.89,
+      "mutations_per_second": 101.07,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.27,
+      "phase_stats_collection": 0.611,
+      "phase_clean_tests": 8.405,
+      "phase_forced_fail_test": 0.177,
+      "phase_mutation_testing": 9.845,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 0.1,
+      "conftest_delay": 0.1,
+      "strategy": "collect",
+      "elapsed_seconds": 32.31,
+      "mutations_per_second": 71.42,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.283,
+      "phase_stats_collection": 0.002,
+      "phase_clean_tests": 8.516,
+      "phase_forced_fail_test": 0.488,
+      "phase_mutation_testing": 13.932,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 0.1,
+      "conftest_delay": 0.1,
+      "strategy": "import",
+      "elapsed_seconds": 47.53,
+      "mutations_per_second": 34.81,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.258,
+      "phase_stats_collection": 0.002,
+      "phase_clean_tests": 8.794,
+      "phase_forced_fail_test": 0.683,
+      "phase_mutation_testing": 28.587,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 0.1,
+      "conftest_delay": 0.1,
+      "strategy": "none",
+      "elapsed_seconds": 59.19,
+      "mutations_per_second": 24.48,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.277,
+      "phase_stats_collection": 0.002,
+      "phase_clean_tests": 8.692,
+      "phase_forced_fail_test": 0.482,
+      "phase_mutation_testing": 40.648,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 0.5,
+      "conftest_delay": 0.5,
+      "strategy": "fork",
+      "elapsed_seconds": 29.4,
+      "mutations_per_second": 95.02,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.262,
+      "phase_stats_collection": 0.619,
+      "phase_clean_tests": 8.366,
+      "phase_forced_fail_test": 0.211,
+      "phase_mutation_testing": 10.472,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 0.5,
+      "conftest_delay": 0.5,
+      "strategy": "collect",
+      "elapsed_seconds": 36.78,
+      "mutations_per_second": 64.17,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.263,
+      "phase_stats_collection": 0.002,
+      "phase_clean_tests": 9.578,
+      "phase_forced_fail_test": 1.369,
+      "phase_mutation_testing": 15.505,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 0.5,
+      "conftest_delay": 0.5,
+      "strategy": "import",
+      "elapsed_seconds": 96.69,
+      "mutations_per_second": 13.14,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.267,
+      "phase_stats_collection": 0.001,
+      "phase_clean_tests": 9.365,
+      "phase_forced_fail_test": 1.392,
+      "phase_mutation_testing": 75.721,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 0.5,
+      "conftest_delay": 0.5,
+      "strategy": "none",
+      "elapsed_seconds": 167.88,
+      "mutations_per_second": 6.78,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.264,
+      "phase_stats_collection": 0.002,
+      "phase_clean_tests": 9.606,
+      "phase_forced_fail_test": 1.355,
+      "phase_mutation_testing": 146.664,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 1.0,
+      "conftest_delay": 1.0,
+      "strategy": "fork",
+      "elapsed_seconds": 30.83,
+      "mutations_per_second": 92.72,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.276,
+      "phase_stats_collection": 0.646,
+      "phase_clean_tests": 8.405,
+      "phase_forced_fail_test": 0.191,
+      "phase_mutation_testing": 10.731,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 1.0,
+      "conftest_delay": 1.0,
+      "strategy": "collect",
+      "elapsed_seconds": 44.51,
+      "mutations_per_second": 48.09,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.272,
+      "phase_stats_collection": 0.002,
+      "phase_clean_tests": 10.365,
+      "phase_forced_fail_test": 2.374,
+      "phase_mutation_testing": 20.691,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 1.0,
+      "conftest_delay": 1.0,
+      "strategy": "import",
+      "elapsed_seconds": 155.79,
+      "mutations_per_second": 7.53,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.298,
+      "phase_stats_collection": 0.002,
+      "phase_clean_tests": 10.311,
+      "phase_forced_fail_test": 2.336,
+      "phase_mutation_testing": 132.081,
+      "test_delay": 0.05
+    },
+    {
+      "import_delay": 1.0,
+      "conftest_delay": 1.0,
+      "strategy": "none",
+      "elapsed_seconds": 290.97,
+      "mutations_per_second": 3.73,
+      "total_mutants": 995,
+      "killed": 832,
+      "survived": 163,
+      "timeout": 0,
+      "suspicious": 0,
+      "exit_code": 0,
+      "phase_mutant_generation": 0.256,
+      "phase_stats_collection": 0.002,
+      "phase_clean_tests": 10.52,
+      "phase_forced_fail_test": 2.365,
+      "phase_mutation_testing": 266.689,
+      "test_delay": 0.05
+    }
+  ],
+  "results_by_config": [
+    {
+      "import_delay": 0.1,
+      "conftest_delay": 0.1,
+      "results": [
+        {
+          "strategy": "fork",
+          "elapsed_seconds": 27.89,
+          "mutations_per_second": 101.07,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.27,
+          "phase_stats_collection": 0.611,
+          "phase_clean_tests": 8.405,
+          "phase_forced_fail_test": 0.177,
+          "phase_mutation_testing": 9.845,
+          "import_delay": 0.1,
+          "conftest_delay": 0.1,
+          "test_delay": 0.05
+        },
+        {
+          "strategy": "collect",
+          "elapsed_seconds": 32.31,
+          "mutations_per_second": 71.42,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.283,
+          "phase_stats_collection": 0.002,
+          "phase_clean_tests": 8.516,
+          "phase_forced_fail_test": 0.488,
+          "phase_mutation_testing": 13.932,
+          "import_delay": 0.1,
+          "conftest_delay": 0.1,
+          "test_delay": 0.05
+        },
+        {
+          "strategy": "import",
+          "elapsed_seconds": 47.53,
+          "mutations_per_second": 34.81,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.258,
+          "phase_stats_collection": 0.002,
+          "phase_clean_tests": 8.794,
+          "phase_forced_fail_test": 0.683,
+          "phase_mutation_testing": 28.587,
+          "import_delay": 0.1,
+          "conftest_delay": 0.1,
+          "test_delay": 0.05
+        },
+        {
+          "strategy": "none",
+          "elapsed_seconds": 59.19,
+          "mutations_per_second": 24.48,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.277,
+          "phase_stats_collection": 0.002,
+          "phase_clean_tests": 8.692,
+          "phase_forced_fail_test": 0.482,
+          "phase_mutation_testing": 40.648,
+          "import_delay": 0.1,
+          "conftest_delay": 0.1,
+          "test_delay": 0.05
+        }
+      ]
+    },
+    {
+      "import_delay": 0.5,
+      "conftest_delay": 0.5,
+      "results": [
+        {
+          "strategy": "fork",
+          "elapsed_seconds": 29.4,
+          "mutations_per_second": 95.02,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.262,
+          "phase_stats_collection": 0.619,
+          "phase_clean_tests": 8.366,
+          "phase_forced_fail_test": 0.211,
+          "phase_mutation_testing": 10.472,
+          "import_delay": 0.5,
+          "conftest_delay": 0.5,
+          "test_delay": 0.05
+        },
+        {
+          "strategy": "collect",
+          "elapsed_seconds": 36.78,
+          "mutations_per_second": 64.17,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.263,
+          "phase_stats_collection": 0.002,
+          "phase_clean_tests": 9.578,
+          "phase_forced_fail_test": 1.369,
+          "phase_mutation_testing": 15.505,
+          "import_delay": 0.5,
+          "conftest_delay": 0.5,
+          "test_delay": 0.05
+        },
+        {
+          "strategy": "import",
+          "elapsed_seconds": 96.69,
+          "mutations_per_second": 13.14,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.267,
+          "phase_stats_collection": 0.001,
+          "phase_clean_tests": 9.365,
+          "phase_forced_fail_test": 1.392,
+          "phase_mutation_testing": 75.721,
+          "import_delay": 0.5,
+          "conftest_delay": 0.5,
+          "test_delay": 0.05
+        },
+        {
+          "strategy": "none",
+          "elapsed_seconds": 167.88,
+          "mutations_per_second": 6.78,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.264,
+          "phase_stats_collection": 0.002,
+          "phase_clean_tests": 9.606,
+          "phase_forced_fail_test": 1.355,
+          "phase_mutation_testing": 146.664,
+          "import_delay": 0.5,
+          "conftest_delay": 0.5,
+          "test_delay": 0.05
+        }
+      ]
+    },
+    {
+      "import_delay": 1.0,
+      "conftest_delay": 1.0,
+      "results": [
+        {
+          "strategy": "fork",
+          "elapsed_seconds": 30.83,
+          "mutations_per_second": 92.72,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.276,
+          "phase_stats_collection": 0.646,
+          "phase_clean_tests": 8.405,
+          "phase_forced_fail_test": 0.191,
+          "phase_mutation_testing": 10.731,
+          "import_delay": 1.0,
+          "conftest_delay": 1.0,
+          "test_delay": 0.05
+        },
+        {
+          "strategy": "collect",
+          "elapsed_seconds": 44.51,
+          "mutations_per_second": 48.09,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.272,
+          "phase_stats_collection": 0.002,
+          "phase_clean_tests": 10.365,
+          "phase_forced_fail_test": 2.374,
+          "phase_mutation_testing": 20.691,
+          "import_delay": 1.0,
+          "conftest_delay": 1.0,
+          "test_delay": 0.05
+        },
+        {
+          "strategy": "import",
+          "elapsed_seconds": 155.79,
+          "mutations_per_second": 7.53,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.298,
+          "phase_stats_collection": 0.002,
+          "phase_clean_tests": 10.311,
+          "phase_forced_fail_test": 2.336,
+          "phase_mutation_testing": 132.081,
+          "import_delay": 1.0,
+          "conftest_delay": 1.0,
+          "test_delay": 0.05
+        },
+        {
+          "strategy": "none",
+          "elapsed_seconds": 290.97,
+          "mutations_per_second": 3.73,
+          "total_mutants": 995,
+          "killed": 832,
+          "survived": 163,
+          "timeout": 0,
+          "suspicious": 0,
+          "exit_code": 0,
+          "phase_mutant_generation": 0.256,
+          "phase_stats_collection": 0.002,
+          "phase_clean_tests": 10.52,
+          "phase_forced_fail_test": 2.365,
+          "phase_mutation_testing": 266.689,
+          "import_delay": 1.0,
+          "conftest_delay": 1.0,
+          "test_delay": 0.05
+        }
+      ]
+    }
+  ]
+}
diff --git a/e2e_projects/benchmark_1k/mutmut_preload.txt b/e2e_projects/benchmark_1k/mutmut_preload.txt
new file mode 100644
index 00000000..6aeb0c2c
--- /dev/null
+++ b/e2e_projects/benchmark_1k/mutmut_preload.txt
@@ -0,0 +1,8 @@
+# Modules to preload for the 'import' warmup strategy
+# These are imported in the orchestrator and inherited by forked workers
+pytest
+
+#normally you would avoid importing the module itself in real projects
+#as it could result in unwanted side effects, now it's a convenient way
+#to simulate import delays through /sr/benchmark/__init__.py
+benchmark
diff --git a/e2e_projects/benchmark_1k/pyproject.toml b/e2e_projects/benchmark_1k/pyproject.toml
new file mode 100644
index 00000000..e3fa0d99
--- /dev/null
+++ b/e2e_projects/benchmark_1k/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "benchmark-1k"
+version = "0.1.0"
+description = "Benchmark project for mutmut warmup strategy comparison (~1000 mutants)"
+requires-python = ">=3.10"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/benchmark"]
+
+[tool.mutmut]
+log_to_file = true
+source_paths = ["src/"]
+process_isolation = "hot-fork"
+debug = false
+hot_fork_warmup = "none"
diff --git a/e2e_projects/benchmark_1k/requirements.txt b/e2e_projects/benchmark_1k/requirements.txt
new file mode 100644
index 00000000..dc136dda
--- /dev/null
+++ b/e2e_projects/benchmark_1k/requirements.txt
@@ -0,0 +1,2 @@
+# Test dependencies
+pytest>=7.0.0
diff --git a/e2e_projects/benchmark_1k/run_benchmark.py b/e2e_projects/benchmark_1k/run_benchmark.py
new file mode 100644
index 00000000..9164ff3a
--- /dev/null
+++ b/e2e_projects/benchmark_1k/run_benchmark.py
@@ -0,0 +1,332 @@
+#!/usr/bin/env python3
+"""
+Benchmark runner for mutmut process isolation comparison.
+
+Runs mutmut under each strategy and reports throughput.
+
+Usage:
+    python run_benchmark.py [--strategies collect,import,none,fork] [--delay-configs 0.1:0.1,0.5:0.5,1.0:1.0]
+                            [--show-output] [--verbose]
+
+The delay configs simulate different conftest.py loading times (Flask, SQLAlchemy, etc.).
+Format: import_delay:conftest_delay pairs, comma-separated.
+Higher values show bigger differences between warmup strategies.
+
+Optionally add --test-delay to simulate per-test runtime with +/-10% gaussian jitter.
+"""
+
+import argparse
+import json
+import os
+import shutil
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+
+STRATEGIES = ["fork", "collect", "import", "none"]
+DEFAULT_OUTPUT = "benchmark_results.json"
+DEFAULT_DELAY_CONFIGS = "0.1:0.1,0.5:0.5,1.0:1.0"  # cli format
+
+
+def clean_mutants():
+    """Remove mutants directory for fresh run."""
+    mutants_dir = Path("mutants")
+    if mutants_dir.exists():
+        shutil.rmtree(mutants_dir)
+
+
+def get_pyproject_content(debug: bool = False, process_isolation: str = "fork") -> str:
+    """Get base pyproject.toml content."""
+    return f"""[project]
+name = "benchmark-1k"
+version = "0.1.0"
+description = "Benchmark project for mutmut warmup strategy comparison (~1000 mutants)"
+requires-python = ">=3.10"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/benchmark"]
+
+[tool.mutmut]
+log_to_file = true
+source_paths = ["src/"]
+process_isolation = "{process_isolation}"
+debug = {"true" if debug else "false"}
+"""
+
+
+def run_mutmut(
+    strategy: str,
+    import_delay: float = 0.1,
+    conftest_delay: float = 0.1,
+    test_delay: float = 0.0,
+    verbose: bool = False,
+    show_output: bool = False,
+) -> dict:
+    """Run mutmut with specified strategy."""
+    config = get_pyproject_content(debug=verbose, process_isolation=strategy if strategy == "fork" else "hot-fork")
+
+    if strategy != "fork":
+        config += f'hot_fork_warmup = "{strategy}"\n'
+        if strategy == "import":
+            config += 'preload_modules_file = "mutmut_preload.txt"\n'
+
+    config_path = Path("pyproject.toml")
+    config_path.write_text(config)
+
+    clean_mutants()
+
+    print("  Starting mutmut run...")
+    start = time.perf_counter()
+
+    cmd = ["mutmut", "run"]
+
+    env = {
+        **os.environ,
+        "PYTHONPATH": "src",
+        "BENCHMARK_IMPORT_DELAY": str(import_delay),
+        "BENCHMARK_CONFTEST_DELAY": str(conftest_delay),
+        "BENCHMARK_TEST_DELAY": str(test_delay),
+    }
+
+    if verbose or show_output:
+        result = subprocess.run(cmd, text=True, env=env)
+    else:
+        result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, text=True, env=env)
+
+    returncode = result.returncode
+    elapsed = time.perf_counter() - start
+
+    summary_path = Path("mutants/summary.json")
+    summary = json.loads(summary_path.read_text()) if summary_path.exists() else {}
+
+    stats = summary.get("stats", {})
+    phase_timings = summary.get("phase_timings", {})
+
+    total_mutants = stats.get("total", 0)
+    mutation_testing_time = phase_timings.get("mutation_testing", 0)
+    if mutation_testing_time > 0 and total_mutants > 0:
+        throughput = total_mutants / mutation_testing_time
+    else:
+        throughput = 0
+
+    # Rename mutants dir to preserve results for this strategy
+    mutants_dir = Path("mutants")
+    dir_name = f"mutants_{strategy}_i{int(import_delay*1000)}_c{int(conftest_delay*1000)}_t{int(test_delay*1000)}"
+    strategy_dir = Path(dir_name)
+    if strategy_dir.exists():
+        shutil.rmtree(strategy_dir)
+    if mutants_dir.exists():
+        mutants_dir.rename(strategy_dir)
+        print(f"  Results saved to {strategy_dir}/")
+
+    return {
+        "strategy": strategy,
+        "elapsed_seconds": round(elapsed, 2),
+        "mutations_per_second": round(throughput, 2),
+        "total_mutants": stats.get("total", 0),
+        "killed": stats.get("killed", 0),
+        "survived": stats.get("survived", 0),
+        "timeout": stats.get("timeout", 0),
+        "suspicious": stats.get("suspicious", 0),
+        "exit_code": returncode,
+        "phase_mutant_generation": round(phase_timings.get("mutant_generation", 0), 3),
+        "phase_stats_collection": round(phase_timings.get("stats_collection", 0), 3),
+        "phase_clean_tests": round(phase_timings.get("clean_tests", 0), 3),
+        "phase_forced_fail_test": round(phase_timings.get("forced_fail_test", 0), 3),
+        "phase_mutation_testing": round(phase_timings.get("mutation_testing", 0), 3),
+    }
+
+
+def print_result(result: dict):
+    """Print result summary for one strategy."""
+    print(f"  Avg. Mut/s:    {result['mutations_per_second']:.2f} mut/s")
+    print(f"  Total time:    {result['elapsed_seconds']:.1f}s")
+    print(f"  Total mutants: {result['total_mutants']}")
+    print(f"  Killed:        {result['killed']}")
+    print(f"  Survived:      {result['survived']}")
+    if result["timeout"] > 0:
+        print(f"  Timeout:       {result['timeout']}")
+    if result["exit_code"] != 0:
+        print(f"  Exit code:     {result['exit_code']} (non-zero)")
+    print("  Phase timings:")
+    print(f"    Mutant generation: {result['phase_mutant_generation']:.3f}s")
+    print(f"    Stats collection:  {result['phase_stats_collection']:.3f}s")
+    print(f"    Clean tests:       {result['phase_clean_tests']:.3f}s")
+    print(f"    Forced fail test:  {result['phase_forced_fail_test']:.3f}s")
+    print(f"    Mutation testing:  {result['phase_mutation_testing']:.3f}s")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Benchmark mutmut run modes")
+    parser.add_argument(
+        "--strategies",
+        default=",".join(STRATEGIES),
+        help=f"Comma-separated list of strategies (default: {','.join(STRATEGIES)})",
+    )
+    parser.add_argument("--output", default=DEFAULT_OUTPUT, help=f"Output JSON file (default: {DEFAULT_OUTPUT})")
+    parser.add_argument(
+        "--delay-configs",
+        default=DEFAULT_DELAY_CONFIGS,
+        help="Comma-separated import:conftest delay pairs. Default: 0.1:0.1,0.5:0.5,1.0:1.0",
+    )
+    parser.add_argument("--verbose", "-v", action="store_true", help="Enable mutmut debug mode and show all output")
+    parser.add_argument(
+        "--show-output", "-s", action="store_true",
+        help="Show mutmut stdout/stderr (spinners, progress) without enabling debug mode",
+    )
+    parser.add_argument(
+        "--test-delay",
+        type=float,
+        default=0.05,
+        help="Per-test delay in seconds with +/-10%% gaussian jitter (default: 0.05)",
+    )
+    args = parser.parse_args()
+
+    strategies = [s.strip() for s in args.strategies.split(",")]
+    for s in strategies:
+        if s not in STRATEGIES:
+            print(f"Error: Unknown strategy '{s}'. Valid: {STRATEGIES}")
+            sys.exit(1)
+
+    # Parse delay configs (e.g., "0.1:0.1,0.5:0.5" -> [(0.1, 0.1), (0.5, 0.5)])
+    delay_configs = []
+    for pair in args.delay_configs.split(","):
+        import_delay, conftest_delay = pair.strip().split(":")
+        delay_configs.append((float(import_delay), float(conftest_delay)))
+
+    test_delay = args.test_delay
+
+    if not Path("src/benchmark").exists():
+        print("Error: Must run from benchmark_1k directory")
+        sys.exit(1)
+
+    print("=" * 60)
+    print("Mutmut Process Isolation Benchmark")
+    print("=" * 60)
+    print(f"Strategies to test: {strategies}")
+    print(f"Delay configs (import, conftest): {delay_configs}")
+    print(f"Per-test delay: {test_delay}s (+/-10% jitter)")
+
+    all_results = []
+    import_delay = 0.05
+    conftest_delay = 0.05
+
+    for import_delay, conftest_delay in delay_configs:
+        print(f"\n{'#' * 60}")
+        print(f"# DELAY CONFIG: import={import_delay}s, conftest={conftest_delay}s, test={test_delay}s")
+        print(f"{'#' * 60}")
+
+        config_results = []
+
+        for strategy in strategies:
+            print(f"\n{'=' * 60}")
+            print(f"Strategy: {strategy}")
+            print("=" * 60)
+
+            result = run_mutmut(
+                strategy,
+                import_delay=import_delay,
+                conftest_delay=conftest_delay,
+                test_delay=test_delay,
+                verbose=args.verbose,
+                show_output=args.show_output,
+            )
+            result["import_delay"] = import_delay
+            result["conftest_delay"] = conftest_delay
+            result["test_delay"] = test_delay
+            config_results.append(result)
+            print_result(result)
+
+        all_results.append(
+            {
+                "import_delay": import_delay,
+                "conftest_delay": conftest_delay,
+                "results": config_results,
+            }
+        )
+
+    flat_results = []
+    for config in all_results:
+        for r in config["results"]:
+            flat_results.append(
+                {
+                    "import_delay": config["import_delay"],
+                    "conftest_delay": config["conftest_delay"],
+                    **r,
+                }
+            )
+
+    output = {
+        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "python_version": sys.version.split()[0],
+        "strategies": strategies,
+        "delay_configs": [(c["import_delay"], c["conftest_delay"]) for c in all_results],
+        "test_delay": test_delay,
+        "results": flat_results,
+        "results_by_config": all_results,
+    }
+
+    output_path = Path(args.output)
+    output_path.write_text(json.dumps(output, indent=2))
+    print(f"\n\nResults saved to {output_path}")
+
+    print("\n" + "=" * 80)
+    print("RESULTS SUMMARY")
+    print("=" * 80)
+
+    for config in all_results:
+        import_delay = config["import_delay"]
+        conftest_delay = config["conftest_delay"]
+        config_results = config["results"]
+
+        print(f"\n--- Delay: import={import_delay}s, conftest={conftest_delay}s ---")
+
+        max_throughput = max(r["mutations_per_second"] for r in config_results) if config_results else 1
+
+        print(f"{'Strategy':<12} {'Avg. Mut/s':>12} {'% of Max':>10} {'Mut Test':>10} {'Wall Time':>10}")
+        print("-" * 60)
+
+        for r in config_results:
+            throughput = r["mutations_per_second"]
+            pct_of_max = (throughput / max_throughput * 100) if max_throughput > 0 else 0
+            mut_test_time = r.get("phase_mutation_testing", 0)
+            print(
+                f"{r['strategy']:<12} {throughput:>10.1f}/s {pct_of_max:>9.0f}% {mut_test_time:>8.1f}s {r['elapsed_seconds']:>8.1f}s"
+            )
+
+    print("\n" + "=" * 80)
+    print("MUTATION THROUGHPUT COMPARISON ACROSS ALL DELAY CONFIGS")
+    print("=" * 80)
+
+    print(f"\n{'Strategy':<12}", end="")
+    for config in all_results:
+        delay = config["import_delay"]
+        print(f" {delay}s delay".center(15), end="")
+    print()
+    print("-" * (12 + 15 * len(all_results)))
+
+    for strategy in strategies:
+        print(f"{strategy:<12}", end="")
+        for config in all_results:
+            for r in config["results"]:
+                if r["strategy"] == strategy:
+                    print(f" {r['mutations_per_second']:>10.1f}/s  ", end="")
+                    break
+        print()
+
+    print()
+
+    config = get_pyproject_content()
+    Path("pyproject.toml").write_text(config)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/e2e_projects/benchmark_1k/src/benchmark/__init__.py b/e2e_projects/benchmark_1k/src/benchmark/__init__.py
new file mode 100644
index 00000000..9ea039f4
--- /dev/null
+++ b/e2e_projects/benchmark_1k/src/benchmark/__init__.py
@@ -0,0 +1,34 @@
+"""Benchmark package for mutmut warmup strategy testing.
+
+Simulates a real application that imports heavy libraries on startup.
+Set BENCHMARK_IMPORT_DELAY environment variable to control the delay.
+"""
+
+import os
+import time
+
+from benchmark import arguments
+from benchmark import booleans
+from benchmark import comparisons
+from benchmark import complex
+from benchmark import numbers
+from benchmark import operators
+from benchmark import returns
+from benchmark import strings
+
+__all__ = [
+    "numbers",
+    "strings",
+    "booleans",
+    "operators",
+    "comparisons",
+    "arguments",
+    "returns",
+    "complex",
+]
+
+
+# Simulate library imports
+import_delay = float(os.environ.get("BENCHMARK_IMPORT_DELAY", "0.05"))
+if import_delay > 0:
+    time.sleep(import_delay)
diff --git a/e2e_projects/benchmark_1k/src/benchmark/arguments.py b/e2e_projects/benchmark_1k/src/benchmark/arguments.py
new file mode 100644
index 00000000..d151cef8
--- /dev/null
+++ b/e2e_projects/benchmark_1k/src/benchmark/arguments.py
@@ -0,0 +1,71 @@
+"""Benchmark functions with various argument patterns."""
+
+
+# === Helper functions ===
+
+
+def helper_2(a, b):
+    """Helper with 2 args."""
+    return (a, b)
+
+
+def helper_3(a, b, c):
+    """Helper with 3 args."""
+    return (a, b, c)
+
+
+def combiner(first, second):
+    """Combine 2 values."""
+    if first is None or second is None:
+        return None
+    return f"{first}-{second}"
+
+
+# === 2-arg calls ===
+
+
+def call_2args_batch_1():
+    """2-arg calls."""
+    r1 = helper_2(1, 2)
+    r2 = helper_2(3, 4)
+    return r1, r2
+
+
+# === 3-arg calls ===
+
+
+def call_3args_batch_1():
+    """3-arg calls."""
+    r1 = helper_3(1, 2, 3)
+    return (r1,)
+
+
+# === dict() keyword calls ===
+
+
+def dict_2keys_batch_1():
+    """dict with 2 keys."""
+    d1 = {"a": 1, "b": 2}
+    return (d1,)
+
+
+def dict_3keys_batch_1():
+    """dict with 3 keys."""
+    d1 = {"x": 1, "y": 2, "z": 3}
+    return (d1,)
+
+
+# === String method calls ===
+
+
+def string_method_calls():
+    """String method calls with multiple args."""
+    text = "a-b-c-d-e"
+    r1 = text.split("-", 2)
+    return (r1,)
+
+
+def format_calls():
+    """String format calls."""
+    r1 = "{} {}".format("hello", "world")
+    return (r1,)
diff --git a/e2e_projects/benchmark_1k/src/benchmark/booleans.py b/e2e_projects/benchmark_1k/src/benchmark/booleans.py
new file mode 100644
index 00000000..db281f8e
--- /dev/null
+++ b/e2e_projects/benchmark_1k/src/benchmark/booleans.py
@@ -0,0 +1,180 @@
+"""Boolean mutation targets"""
+
+
+# === Boolean literals ===
+
+
+def flags_batch_1():
+    """Boolean flags - batch 1."""
+    enabled = True
+    disabled = False
+    active = True
+    paused = False
+    return enabled, disabled, active, paused
+
+
+def flags_batch_2():
+    """Boolean flags - batch 2."""
+    visible = True
+    hidden = False
+    selected = True
+    focused = False
+    return visible, hidden, selected, focused
+
+
+def flags_batch_3():
+    """Boolean flags - batch 3."""
+    running = True
+    stopped = False
+    ready = True
+    waiting = False
+    return running, stopped, ready, waiting
+
+
+def flags_batch_4():
+    """Boolean flags - batch 4."""
+    valid = True
+    invalid = False
+    complete = True
+    pending = False
+    return valid, invalid, complete, pending
+
+
+def conditional_returns_1(x):
+    """Conditional boolean returns - batch 1."""
+    if x > 0:
+        return True
+    return False
+
+
+def conditional_returns_2(x, y):
+    """Conditional boolean returns - batch 2."""
+    if x == y:
+        return True
+    elif x > y:
+        return False
+    return True
+
+
+def default_values():
+    """Default boolean values."""
+    debug = False
+    verbose = False
+    quiet = True
+    strict = True
+    return debug, verbose, quiet, strict
+
+
+def config_flags():
+    """Configuration flags."""
+    auto_save = True
+    auto_load = False
+    cache_enabled = True
+    logging_enabled = False
+    return auto_save, auto_load, cache_enabled, logging_enabled
+
+
+def feature_flags():
+    """Feature flags."""
+    feature_a = True
+    feature_b = False
+    feature_c = True
+    feature_d = False
+    return feature_a, feature_b, feature_c, feature_d
+
+
+# === Boolean operators  ===
+
+
+def logical_and_simple(a, b):
+    """Simple AND."""
+    return a and b
+
+
+def logical_or_simple(a, b):
+    """Simple OR."""
+    return a or b
+
+
+def logical_and_chain_1(a, b, c):
+    """Chained AND - batch 1."""
+    return a and b and c
+
+
+def logical_and_chain_2(a, b, c, d):
+    """Chained AND - batch 2."""
+    return a and b and c and d
+
+
+def logical_or_chain_1(a, b, c):
+    """Chained OR - batch 1."""
+    return a or b or c
+
+
+def logical_or_chain_2(a, b, c, d):
+    """Chained OR - batch 2."""
+    return a or b or c or d
+
+
+def mixed_logic_1(a, b, c, d):
+    """Mixed AND/OR - batch 1."""
+    return (a and b) or (c and d)
+
+
+def mixed_logic_2(a, b, c, d):
+    """Mixed AND/OR - batch 2."""
+    return (a or b) and (c or d)
+
+
+def mixed_logic_3(a, b, c):
+    """Mixed AND/OR - batch 3."""
+    return a and b or c
+
+
+def mixed_logic_4(a, b, c):
+    """Mixed AND/OR - batch 4."""
+    return a or b and c
+
+
+def condition_with_and(x, y, z):
+    """Conditions with AND."""
+    result = False
+    if x > 0 and y > 0:
+        result = True
+    if y > 0 and z > 0:
+        result = result and True
+    return result
+
+
+def condition_with_or(x, y, z):
+    """Conditions with OR."""
+    result = False or True
+    if x > 0 or y > 0:
+        result = True
+    if y < 0 or z < 0:
+        result = result or False
+    return result
+
+
+def complex_condition_1(a, b, c, d):
+    """Complex condition - batch 1."""
+    return (a > 0 and b > 0) or (c > 0 and d > 0)
+
+
+def guard_clauses(value, min_val, max_val, required):
+    """Guard clauses with boolean operators."""
+    if not required and value is None:
+        return True
+    if value is None or value < min_val or value > max_val:
+        return False
+    return True
+
+
+def validation_flags(has_name, has_email, has_phone, is_verified, is_active):
+    """Validation with multiple boolean flags."""
+    has_contact = has_email or has_phone
+    is_complete = has_name and has_contact
+    is_valid = is_complete and is_verified
+    can_proceed = is_valid and is_active
+    needs_review = is_complete and not is_verified
+    return has_contact, is_complete, is_valid, can_proceed, needs_review
diff --git a/e2e_projects/benchmark_1k/src/benchmark/comparisons.py b/e2e_projects/benchmark_1k/src/benchmark/comparisons.py
new file mode 100644
index 00000000..cd9dc5e7
--- /dev/null
+++ b/e2e_projects/benchmark_1k/src/benchmark/comparisons.py
@@ -0,0 +1,242 @@
+"""Comparison mutation targets."""
+
+
+# === Equality comparisons  ===
+
+
+def equality_simple(a, b):
+    """Simple equality."""
+    eq = a == b
+    neq = a != b
+    return eq, neq
+
+
+def equality_batch_1(a, b, c):
+    """Equality - batch 1."""
+    r1 = a == b
+    r2 = b == c
+    r3 = a != c
+    return r1, r2, r3
+
+
+def equality_with_literals(value):
+    """Equality with literals."""
+    is_zero = value == 0
+    is_one = value == 1
+    not_zero = value != 0
+    not_one = value != 1
+    return is_zero, is_one, not_zero, not_one
+
+
+def equality_strings(s):
+    """String equality."""
+    is_empty = s == ""
+    is_hello = s == "hello"
+    not_empty = s != ""
+    return is_empty, is_hello, not_empty
+
+
+# === Less than comparisons  ===
+
+
+def less_than_simple(a, b):
+    """Simple less than."""
+    lt = a < b
+    le = a <= b
+    return lt, le
+
+
+def less_than_batch_1(x, y, z):
+    """Less than - batch 1."""
+    r1 = x < y
+    r2 = y < z
+    r3 = x <= z
+    return r1, r2, r3
+
+
+def less_than_batch_2(value, threshold):
+    """Less than - batch 2."""
+    below = value < threshold
+    at_or_below = value <= threshold
+    return below, at_or_below
+
+
+def less_than_literals(value):
+    """Less than with literals."""
+    lt_zero = value < 0
+    lt_ten = value < 10
+    le_zero = value <= 0
+    return lt_zero, lt_ten, le_zero
+
+
+# === Greater than comparisons  ===
+
+
+def greater_than_simple(a, b):
+    """Simple greater than."""
+    gt = a > b
+    ge = a >= b
+    return gt, ge
+
+
+def greater_than_batch_1(x, y, z):
+    """Greater than - batch 1."""
+    r1 = x > y
+    r2 = y > z
+    r3 = x >= z
+    return r1, r2, r3
+
+
+def greater_than_batch_2(value, threshold):
+    """Greater than - batch 2."""
+    above = value > threshold
+    at_or_above = value >= threshold
+    return above, at_or_above
+
+
+def greater_than_literals(value):
+    """Greater than with literals."""
+    gt_zero = value > 0
+    gt_ten = value > 10
+    ge_zero = value >= 0
+    return gt_zero, gt_ten, ge_zero
+
+
+# === Identity comparisons  ===
+
+
+def identity_none(obj):
+    """Identity with None."""
+    is_none = obj is None
+    is_not_none = obj is not None
+    return is_none, is_not_none
+
+
+def identity_batch_1(a, b):
+    """Identity - batch 1."""
+    same = a is b
+    different = a is not b
+    return same, different
+
+
+def identity_checks(value, default):
+    """Multiple identity checks."""
+    if value is None:
+        return default
+    if value is not default:
+        return value
+    return None
+
+
+# === Membership comparisons  ===
+
+
+def membership_simple(item, collection):
+    """Simple membership."""
+    present = item in collection
+    absent = item not in collection
+    return present, absent
+
+
+def membership_batch_1(x, items):
+    """Membership - batch 1."""
+    r1 = x in items
+    r2 = x not in items
+    return r1, r2
+
+
+def membership_string(char, text):
+    """String membership."""
+    found = char in text
+    not_found = char not in text
+    return found, not_found
+
+
+def membership_dict(key, d):
+    """Dictionary membership."""
+    has_key = key in d
+    missing_key = key not in d
+    return has_key, missing_key
+
+
+# === Complex boundary checks  ===
+
+
+def boundary_check_1(value):
+    """Boundary check - batch 1."""
+    if value < 0:
+        return "negative"
+    elif value == 0:
+        return "zero"
+    elif value <= 10:
+        return "small"
+    elif value < 100:
+        return "medium"
+    else:
+        return "large"
+
+
+def boundary_check_2(value, low, high):
+    """Boundary check - batch 2."""
+    if value < low:
+        return "below"
+    elif value > high:
+        return "above"
+    elif value == low:
+        return "at_low"
+    elif value == high:
+        return "at_high"
+    else:
+        return "within"
+
+
+def range_check(value, min_val, max_val):
+    """Range check."""
+    if value < min_val:
+        return False
+    if value > max_val:
+        return False
+    if value >= min_val and value <= max_val:
+        return True
+    return False
+
+
+def compare_all(a, b):
+    """All comparison operators on two values."""
+    results = {
+        "eq": a == b,
+        "ne": a != b,
+        "lt": a < b,
+        "le": a <= b,
+        "gt": a > b,
+        "ge": a >= b,
+    }
+    return results
+
+
+# === Additional comparisons ===
+
+
+def chained_comparisons(x, low, mid, high):
+    """Chained comparison checks."""
+    in_lower = low <= x < mid
+    in_upper = mid <= x <= high
+    below_all = x < low
+    above_all = x > high
+    return in_lower, in_upper, below_all, above_all
+
+
+def multi_condition_check(a, b, c, threshold):
+    """Multiple condition checks."""
+    all_above = a > threshold and b > threshold and c > threshold
+    any_above = a > threshold or b > threshold or c > threshold
+    all_equal = a == b == c
+    none_below = a >= threshold and b >= threshold and c >= threshold
+    return all_above, any_above, all_equal, none_below
+
+
+def sorted_check(a, b, c):
+    """Check if values are sorted."""
+    ascending = a < b < c
+    descending = a > b > c
+    return ascending, descending
diff --git a/e2e_projects/benchmark_1k/src/benchmark/complex.py b/e2e_projects/benchmark_1k/src/benchmark/complex.py
new file mode 100644
index 00000000..980b3546
--- /dev/null
+++ b/e2e_projects/benchmark_1k/src/benchmark/complex.py
@@ -0,0 +1,240 @@
+"""Complex call patterns."""
+
+
+# === Deep call chains (10 levels) - Chain 1 ===
+
+
+def chain1_level_10(x):
+    """Chain 1, level 10."""
+    return x + 1
+
+
+def chain1_level_9(x):
+    """Chain 1, level 9."""
+    return chain1_level_10(x) + 1
+
+
+def chain1_level_8(x):
+    """Chain 1, level 8."""
+    return chain1_level_9(x) + 1
+
+
+def chain1_level_7(x):
+    """Chain 1, level 7."""
+    return chain1_level_8(x) + 1
+
+
+def chain1_level_6(x):
+    """Chain 1, level 6."""
+    return chain1_level_7(x) + 1
+
+
+def chain1_level_5(x):
+    """Chain 1, level 5."""
+    return chain1_level_6(x) + 1
+
+
+def chain1_level_4(x):
+    """Chain 1, level 4."""
+    return chain1_level_5(x) + 1
+
+
+def chain1_level_3(x):
+    """Chain 1, level 3."""
+    return chain1_level_4(x) + 1
+
+
+def chain1_level_2(x):
+    """Chain 1, level 2."""
+    return chain1_level_3(x) + 1
+
+
+def chain1_level_1(x):
+    """Chain 1, level 1."""
+    return chain1_level_2(x) + 1
+
+
+def chain1_entry(x):
+    """Entry point for chain 1 (10 levels deep)."""
+    return chain1_level_1(x) * 2
+
+
+# === Tail recursion ===
+
+
+def factorial_tail(n, acc=1):
+    """Tail-recursive factorial."""
+    if n <= 1:
+        return acc
+    return factorial_tail(n - 1, acc * n)
+
+
+def sum_tail(n, acc=0):
+    """Tail-recursive sum."""
+    if n <= 0:
+        return acc
+    return sum_tail(n - 1, acc + n)
+
+
+def power_tail(base, exp, acc=1):
+    """Tail-recursive power."""
+    if exp <= 0:
+        return acc
+    return power_tail(base, exp - 1, acc * base)
+
+
+def gcd_tail(a, b):
+    """Tail-recursive GCD."""
+    if b == 0:
+        return a
+    return gcd_tail(b, a % b)
+
+
+# === Standard recursion ===
+
+
+def fibonacci(n):
+    """Standard recursive fibonacci."""
+    if n <= 0:
+        return 0
+    if n == 1:
+        return 1
+    return fibonacci(n - 1) + fibonacci(n - 2)
+
+
+def flatten(nested):
+    """Recursive list flattening."""
+    result = []
+    for item in nested:
+        if isinstance(item, list):
+            result.extend(flatten(item))
+        else:
+            result.append(item)
+    return result
+
+
+# === Mutual recursion ===
+
+
+def is_even(n):
+    """Check even via mutual recursion."""
+    if n == 0:
+        return True
+    if n < 0:
+        return is_even(-n)
+    return is_odd(n - 1)
+
+
+def is_odd(n):
+    """Check odd via mutual recursion."""
+    if n == 0:
+        return False
+    if n < 0:
+        return is_odd(-n)
+    return is_even(n - 1)
+
+
+def descend_a(n, acc=0):
+    """Mutual recursion pair A."""
+    if n <= 0:
+        return acc
+    return descend_b(n - 1, acc + 1)
+
+
+def descend_b(n, acc=0):
+    """Mutual recursion pair B."""
+    if n <= 0:
+        return acc
+    return descend_a(n - 1, acc + 2)
+
+
+# === Higher-order functions ===
+
+
+def apply_twice(f, x):
+    """Apply function twice."""
+    return f(f(x))
+
+
+def apply_n_times(f, x, n):
+    """Apply function n times."""
+    result = x
+    for _ in range(n):
+        result = f(result)
+    return result
+
+
+def compose(f, g):
+    """Compose two functions."""
+    return lambda x: f(g(x))
+
+
+def map_reduce(items, mapper, reducer, initial):
+    """Map-reduce pattern."""
+    mapped = [mapper(item) for item in items]
+    result = initial
+    for item in mapped:
+        result = reducer(result, item)
+    return result
+
+
+def with_callback(data, on_success, on_error):
+    """Process with callbacks."""
+    if data is not None:
+        return on_success(data)
+    return on_error("no data")
+
+
+# === Complex nested patterns ===
+
+
+def nested_loops(matrix):
+    """Nested loop processing."""
+    total = 0
+    for i in range(len(matrix)):
+        for j in range(len(matrix[i]) if i < len(matrix) else 0):
+            if matrix[i][j] > 0:
+                total += matrix[i][j] * 2
+            else:
+                total += matrix[i][j] + 1
+    return total
+
+
+def nested_conditions(x, y, z):
+    """Deeply nested conditions."""
+    if x > 0:
+        if y > 0:
+            if z > 0:
+                return x + y + z
+            else:
+                return x + y - z
+        else:
+            if z > 0:
+                return x - y + z
+            else:
+                return x - y - z
+    else:
+        if y > 0:
+            return y + z
+        else:
+            return z
+
+
+def accumulate_with_filter(items, predicate, transform):
+    """Accumulate filtered and transformed items."""
+    result = 0
+    for item in items:
+        if predicate(item):
+            transformed = transform(item)
+            result += transformed
+    return result
+
+
+def calculate_backoff(attempt, base_delay=1.0, max_delay=60.0):
+    """Calculate exponential backoff delay."""
+    if attempt <= 0:
+        return 0.0
+    delay = base_delay * (2 ** (attempt - 1))
+    if delay > max_delay:
+        return max_delay
+    return delay
diff --git a/e2e_projects/benchmark_1k/src/benchmark/numbers.py b/e2e_projects/benchmark_1k/src/benchmark/numbers.py
new file mode 100644
index 00000000..4939dea7
--- /dev/null
+++ b/e2e_projects/benchmark_1k/src/benchmark/numbers.py
@@ -0,0 +1,69 @@
+"""Number mutation targets."""
+
+
+def constants_batch_1():
+    """Numeric constants."""
+    a = 0
+    b = 1
+    c = 2
+    return a + b + c
+
+
+def float_constants_1():
+    """Float constants."""
+    a = 0.5
+    b = 1.5
+    return a + b
+
+
+def negative_constants():
+    """Negative numeric constants."""
+    a = -1
+    b = -2
+    return a + b
+
+
+def arithmetic_simple(x):
+    """Simple arithmetic with literals."""
+    return x + 1
+
+
+def loop_range_1():
+    """Loop with range literals."""
+    total = 0
+    for i in range(5):
+        total += i + 1
+    return total
+
+
+def threshold_check_1(value):
+    """Threshold checking."""
+    if value > 0:
+        return 1
+    return 0
+
+
+def array_indices(items):
+    """Array index access with literals."""
+    if len(items) > 2:
+        return items[0] + items[1]
+    return 0
+
+
+def multipliers(x):
+    """Various multiplier values."""
+    a = x * 2
+    b = x * 3
+    return a + b
+
+
+def offsets(base):
+    """Offset calculations."""
+    return [base + 1]
+
+
+def dimensions():
+    """Dimension values."""
+    width = 100
+    height = 200
+    return width, height
diff --git a/e2e_projects/benchmark_1k/src/benchmark/operators.py b/e2e_projects/benchmark_1k/src/benchmark/operators.py
new file mode 100644
index 00000000..fa35e4d2
--- /dev/null
+++ b/e2e_projects/benchmark_1k/src/benchmark/operators.py
@@ -0,0 +1,129 @@
+"""Operator mutation targets."""
+
+
+# === Arithmetic operators ===
+
+
+def add_sub_1(a, b):
+    """Addition and subtraction."""
+    add = a + b
+    sub = a - b
+    return add, sub
+
+
+def mul_div_1(a, b):
+    """Multiplication and division."""
+    mul = a * b
+    div = a / b if b != 0 else 0
+    return mul, div
+
+
+def integer_ops_1(a, b):
+    """Integer operations."""
+    floordiv = a // b if b != 0 else 0
+    mod = a % b if b != 0 else 0
+    return floordiv, mod
+
+
+def mixed_arithmetic_1(a, b, c):
+    """Mixed arithmetic."""
+    return a + b * c
+
+
+# === Bitwise operators ===
+
+
+def bitwise_shift_1(a):
+    """Bit shift."""
+    lshift = a << 1
+    rshift = a >> 1
+    return lshift, rshift
+
+
+def bitwise_and_or_1(a, b):
+    """Bitwise AND/OR."""
+    band = a & b
+    bor = a | b
+    return band, bor
+
+
+# === Augmented assignment ===
+
+
+def augmented_add_sub(x):
+    """Augmented add/sub."""
+    x += 1
+    x -= 1
+    return x
+
+
+def augmented_in_loop():
+    """Augmented assignment in loop."""
+    total = 0
+    for i in range(5):
+        total += i
+    return total
+
+
+# === Unary operators ===
+
+
+def unary_not_1(flag):
+    """Unary not."""
+    return not flag
+
+
+def unary_invert_1(x):
+    """Unary invert."""
+    return ~x
+
+
+def unary_minus(x):
+    """Unary minus."""
+    return -x
+
+
+# === Additional arithmetic ===
+
+
+def add_sub_2(a, b, c):
+    """More addition and subtraction."""
+    r1 = a + b + c
+    r2 = a - b - c
+    r3 = a + b - c
+    return r1, r2, r3
+
+
+def mul_div_2(a, b, c):
+    """More multiplication and division."""
+    r1 = a * b * c
+    r2 = a / b / c if b != 0 and c != 0 else 0
+    r3 = a * b / c if c != 0 else 0
+    return r1, r2, r3
+
+
+def integer_ops_2(a, b):
+    """More integer operations."""
+    r1 = a // 2
+    r2 = a % 2
+    r3 = a**2
+    r4 = b // 3
+    r5 = b % 3
+    return r1, r2, r3, r4, r5
+
+
+def augmented_batch(value):
+    """Batch of augmented assignments."""
+    value += 10
+    value -= 5
+    value *= 2
+    value //= 3
+    return value
+
+
+def bitwise_xor_ops(a, b):
+    """Bitwise XOR operations."""
+    r1 = a ^ b
+    r2 = a ^ 0xFF
+    r3 = b ^ 0x0F
+    return r1, r2, r3
diff --git a/e2e_projects/benchmark_1k/src/benchmark/returns.py b/e2e_projects/benchmark_1k/src/benchmark/returns.py
new file mode 100644
index 00000000..f7c434de
--- /dev/null
+++ b/e2e_projects/benchmark_1k/src/benchmark/returns.py
@@ -0,0 +1,108 @@
+"""Return/assignment mutation targets."""
+
+
+# === Simple return values ===
+
+
+def simple_return_integers():
+    """Simple integer returns."""
+    return 42
+
+
+# === Simple value assignments ===
+
+
+def assign_integers():
+    """Integer assignments."""
+    a = 1
+    b = 2
+    return a, b
+
+
+def assign_strings():
+    """String assignments."""
+    a = "hello"
+    b = "world"
+    return a, b
+
+
+def assign_lists():
+    """List assignments."""
+    a = [1, 2, 3]
+    return (a,)
+
+
+def assign_mixed():
+    """Mixed type assignments."""
+    num = 42
+    text = "answer"
+    return num, text
+
+
+# === None assignments ===
+
+
+def assign_none_batch_1():
+    """None assignments."""
+    a = None
+    b = None
+    return a, b
+
+
+# === Typed assignments ===
+
+
+def typed_int():
+    """Typed integer assignments."""
+    x: int = 42
+    return (x,)
+
+
+def typed_str():
+    """Typed string assignments."""
+    name: str = "test"
+    return (name,)
+
+
+# === Lambdas returning values ===
+
+
+def lambda_integers():
+    """Lambdas returning integers."""
+    f1 = lambda: 1  # noqa: E731
+    f2 = lambda: 2  # noqa: E731
+    return f1, f2
+
+
+def lambda_strings():
+    """Lambdas returning strings."""
+    f1 = lambda: "hello"  # noqa: E731
+    return (f1,)
+
+
+def lambda_with_args():
+    """Lambdas with arguments."""
+    f1 = lambda x: x + 1  # noqa: E731
+    return (f1,)
+
+
+# === Lambdas returning None ===
+
+
+def lambda_none_batch_1():
+    """Lambdas returning None."""
+    f1 = lambda: None  # noqa: E731
+    f2 = lambda: None  # noqa: E731
+    return f1, f2
+
+
+# === Conditional assignments ===
+
+
+def conditional_assign_1(flag):
+    """Conditional assignment."""
+    if flag:
+        result = "yes"
+    else:
+        result = "no"
+    return result
diff --git a/e2e_projects/benchmark_1k/src/benchmark/strings.py b/e2e_projects/benchmark_1k/src/benchmark/strings.py
new file mode 100644
index 00000000..158ca2c5
--- /dev/null
+++ b/e2e_projects/benchmark_1k/src/benchmark/strings.py
@@ -0,0 +1,199 @@
+"""String mutation targets."""
+
+
+# === Simple strings ===
+
+
+def messages_batch_1():
+    """Simple string literals."""
+    a = "hello"
+    b = "world"
+    return a, b
+
+
+def labels_batch_1():
+    """Label strings."""
+    a = "name"
+    b = "value"
+    return a, b
+
+
+def states():
+    """State strings."""
+    a = "pending"
+    b = "active"
+    return a, b
+
+
+# === f-strings ===
+
+
+def format_name(name):
+    """f-string with name."""
+    return f"Name: {name}"
+
+
+def format_count(count):
+    """f-string with count."""
+    return f"Count: {count}"
+
+
+def format_result(value, unit):
+    """f-string with multiple values."""
+    return f"Result: {value} {unit}"
+
+
+# === String method calls ===
+
+
+def case_methods_1(s):
+    """Case conversion."""
+    lower = s.lower()
+    upper = s.upper()
+    return lower, upper
+
+
+def strip_methods_1(s):
+    """Strip methods."""
+    left = s.lstrip()
+    right = s.rstrip()
+    return left, right
+
+
+def find_methods_1(s, sub):
+    """Find methods."""
+    pos1 = s.find(sub)
+    pos2 = s.rfind(sub)
+    return pos1, pos2
+
+
+def split_methods_1(s, sep):
+    """Split methods."""
+    parts1 = s.split(sep, 2)
+    parts2 = s.rsplit(sep, 2)
+    return parts1, parts2
+
+
+def partition_methods(s, sep):
+    """Partition methods."""
+    p1 = s.partition(sep)
+    p2 = s.rpartition(sep)
+    return p1, p2
+
+
+# === Additional simple strings ===
+
+
+def messages_batch_2():
+    """More string literals."""
+    a = "start"
+    b = "stop"
+    c = "pause"
+    return a, b, c
+
+
+def messages_batch_3():
+    """Even more string literals."""
+    a = "error"
+    b = "warning"
+    c = "info"
+    d = "debug"
+    return a, b, c, d
+
+
+def symbols():
+    """Symbol strings."""
+    a = "alpha"
+    b = "beta"
+    c = "gamma"
+    return a, b, c
+
+
+def keywords():
+    """Keyword strings."""
+    a = "true"
+    b = "false"
+    c = "null"
+    d = "undefined"
+    return a, b, c, d
+
+
+# === Additional f-strings ===
+
+
+def format_error(code, message):
+    """f-string for error."""
+    return f"Error {code}: {message}"
+
+
+def format_coords(x, y):
+    """f-string for coordinates."""
+    return f"({x}, {y})"
+
+
+def format_path(directory, filename):
+    """f-string for path."""
+    return f"{directory}/{filename}"
+
+
+def format_greeting(title, name):
+    """f-string for greeting."""
+    return f"Hello, {title} {name}!"
+
+
+# === Additional string methods ===
+
+
+def case_methods_2(s):
+    """More case conversion."""
+    title = s.title()
+    cap = s.capitalize()
+    swap = s.swapcase()
+    return title, cap, swap
+
+
+def strip_methods_2(s, chars):
+    """Strip with chars."""
+    left = s.lstrip(chars)
+    right = s.rstrip(chars)
+    both = s.strip(chars)
+    return left, right, both
+
+
+def find_methods_2(s, sub, start):
+    """Find with start position."""
+    pos1 = s.find(sub, start)
+    pos2 = s.rfind(sub, start)
+    return pos1, pos2
+
+
+def replace_methods(s, old, new):
+    """Replace methods."""
+    r1 = s.replace(old, new)
+    r2 = s.replace(old, new, 1)
+    return r1, r2
+
+
+def justify_methods(s, width):
+    """Justify methods."""
+    left = s.ljust(width)
+    right = s.rjust(width)
+    center = s.center(width)
+    return left, right, center
+
+
+def index_methods(s, sub):
+    """Index methods."""
+    try:
+        i1 = s.index(sub)
+        i2 = s.rindex(sub)
+        return i1, i2
+    except ValueError:
+        return -1, -1
+
+
+def prefix_suffix_methods(s):
+    """Prefix/suffix removal."""
+    r1 = s.removeprefix("pre_")
+    r2 = s.removesuffix("_suf")
+    return r1, r2
diff --git a/e2e_projects/benchmark_1k/tests/__init__.py b/e2e_projects/benchmark_1k/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/e2e_projects/benchmark_1k/tests/conftest.py b/e2e_projects/benchmark_1k/tests/conftest.py
new file mode 100644
index 00000000..acf81987
--- /dev/null
+++ b/e2e_projects/benchmark_1k/tests/conftest.py
@@ -0,0 +1,31 @@
+"""
+Pytest configuration for benchmark_1k tests.
+
+Simulates real-world test infrastructure costs:
+- BENCHMARK_CONFTEST_DELAY: Time for fixture/plugin setup (default: 0.1s)
+- BENCHMARK_IMPORT_DELAY: In src/benchmark/__init__.py for library imports
+- BENCHMARK_TEST_DELAY: Per-test runtime with +/-10% gaussian jitter (default: 0.1)
+"""
+
+import os
+import random
+import time
+
+import pytest
+
+# Simulate conftest.py costs: fixtures, plugins, pytest hooks
+conftest_delay = float(os.environ.get("BENCHMARK_CONFTEST_DELAY", "0.1"))
+if conftest_delay > 0:
+    time.sleep(conftest_delay)
+_test_delay = float(os.environ.get("BENCHMARK_TEST_DELAY", "0.05"))
+
+
+@pytest.fixture(autouse=True)
+def benchmark_test_delay():
+    """Add realistic per-test runtime variance."""
+    if _test_delay > 0:
+        # Apply +/-10% gaussian jitter (std = 10% of mean)
+        jittered = random.gauss(_test_delay, _test_delay * 0.1)
+        # Clamp to 0.01s
+        time.sleep(max(0.01, jittered))
+        yield
diff --git a/e2e_projects/benchmark_1k/tests/test_arguments.py b/e2e_projects/benchmark_1k/tests/test_arguments.py
new file mode 100644
index 00000000..6f35d105
--- /dev/null
+++ b/e2e_projects/benchmark_1k/tests/test_arguments.py
@@ -0,0 +1,56 @@
+"""Tests for arguments.py module."""
+
+from benchmark import arguments
+
+
+class TestArguments:
+    """Test argument functions."""
+
+    def test_combiner(self):
+        """Test combiner function."""
+        # Normal case - both values provided
+        assert arguments.combiner("a", "b") == "a-b"
+        # First is None - should return None
+        assert arguments.combiner(None, "b") is None
+        # Second is None - should return None
+        assert arguments.combiner("a", None) is None
+        # Both None - should return None
+        assert arguments.combiner(None, None) is None
+
+    def test_helper_2(self):
+        """Test helper_2."""
+        assert arguments.helper_2(1, 2) == (1, 2)
+
+    def test_helper_3(self):
+        """Test helper_3."""
+        assert arguments.helper_3(1, 2, 3) == (1, 2, 3)
+
+    def test_call_2args_batch_1(self):
+        """Test 2-arg calls."""
+        result = arguments.call_2args_batch_1()
+        assert result[0] == (1, 2)
+
+    def test_call_3args_batch_1(self):
+        """Test 3-arg calls."""
+        result = arguments.call_3args_batch_1()
+        assert result[0] == (1, 2, 3)
+
+    def test_dict_2keys_batch_1(self):
+        """Test dict with 2 keys."""
+        result = arguments.dict_2keys_batch_1()
+        assert result[0] == {"a": 1, "b": 2}
+
+    def test_dict_3keys_batch_1(self):
+        """Test dict with 3 keys."""
+        result = arguments.dict_3keys_batch_1()
+        assert result[0] == {"x": 1, "y": 2, "z": 3}
+
+    def test_string_method_calls(self):
+        """Test string method calls."""
+        result = arguments.string_method_calls()
+        assert result[0] == ["a", "b", "c-d-e"]
+
+    def test_format_calls(self):
+        """Test format calls."""
+        result = arguments.format_calls()
+        assert result[0] == "hello world"
diff --git a/e2e_projects/benchmark_1k/tests/test_booleans.py b/e2e_projects/benchmark_1k/tests/test_booleans.py
new file mode 100644
index 00000000..3417d09d
--- /dev/null
+++ b/e2e_projects/benchmark_1k/tests/test_booleans.py
@@ -0,0 +1,201 @@
+"""Tests for booleans.py module."""
+
+from benchmark import booleans
+
+
+class TestBooleans:
+    """Test boolean functions."""
+
+    def test_flags_batch_1(self):
+        """Strong test - checks all values."""
+        enabled, disabled, active, paused = booleans.flags_batch_1()
+        assert enabled is True
+        assert disabled is False
+        assert active is True
+        assert paused is False
+
+    def test_flags_batch_2(self):
+        """Strong test - checks all values."""
+        visible, hidden, selected, focused = booleans.flags_batch_2()
+        assert visible is True
+        assert hidden is False
+        assert selected is True
+        assert focused is False
+
+    def test_flags_batch_3(self):
+        """Strong test - checks all values."""
+        running, stopped, ready, waiting = booleans.flags_batch_3()
+        assert running is True
+        assert stopped is False
+        assert ready is True
+        assert waiting is False
+
+    def test_flags_batch_4(self):
+        """Strong test - checks all values."""
+        valid, invalid, complete, pending = booleans.flags_batch_4()
+        assert valid is True
+        assert invalid is False
+        assert complete is True
+        assert pending is False
+
+    def test_conditional_returns_1(self):
+        """Strong test."""
+        assert booleans.conditional_returns_1(5) is True
+        assert booleans.conditional_returns_1(-5) is False
+
+    def test_conditional_returns_2(self):
+        """Strong test - checks all paths."""
+        assert booleans.conditional_returns_2(5, 5) is True  # x == y
+        assert booleans.conditional_returns_2(10, 5) is False  # x > y
+        assert booleans.conditional_returns_2(3, 5) is True  # x < y
+
+    def test_default_values(self):
+        """Strong test - checks all values."""
+        debug, verbose, quiet, strict = booleans.default_values()
+        assert debug is False
+        assert verbose is False
+        assert quiet is True
+        assert strict is True
+
+    def test_config_flags(self):
+        """Strong test - checks all values."""
+        auto_save, auto_load, cache_enabled, logging_enabled = booleans.config_flags()
+        assert auto_save is True
+        assert auto_load is False
+        assert cache_enabled is True
+        assert logging_enabled is False
+
+    def test_feature_flags(self):
+        """Strong test - checks all values."""
+        a, b, c, d = booleans.feature_flags()
+        assert a is True
+        assert b is False
+        assert c is True
+        assert d is False
+
+    def test_logical_and_simple(self):
+        """Strong test."""
+        assert booleans.logical_and_simple(True, True) is True
+        assert booleans.logical_and_simple(True, False) is False
+
+    def test_logical_or_simple(self):
+        """Strong test."""
+        assert booleans.logical_or_simple(False, True) is True
+        assert booleans.logical_or_simple(False, False) is False
+
+    def test_logical_and_chain_1(self):
+        """Strong test - distinguishes and from or."""
+        assert booleans.logical_and_chain_1(True, True, True) is True
+        # This would be True if any 'and' became 'or'
+        assert booleans.logical_and_chain_1(False, True, True) is False
+        assert booleans.logical_and_chain_1(True, False, True) is False
+
+    def test_logical_and_chain_2(self):
+        """Weak test."""
+        result = booleans.logical_and_chain_2(True, True, True, False)
+        assert result is False
+
+    def test_logical_or_chain_1(self):
+        """Strong test - distinguishes or from and."""
+        assert booleans.logical_or_chain_1(False, False, True) is True
+        # This would be False if any 'or' became 'and'
+        assert booleans.logical_or_chain_1(True, False, False) is True
+        assert booleans.logical_or_chain_1(False, True, False) is True
+        assert booleans.logical_or_chain_1(False, False, False) is False
+
+    def test_logical_or_chain_2(self):
+        """Strong test - distinguishes or from and."""
+        assert booleans.logical_or_chain_2(False, False, False, False) is False
+        # These would fail if 'or' became 'and'
+        assert booleans.logical_or_chain_2(True, False, False, False) is True
+        assert booleans.logical_or_chain_2(False, True, False, False) is True
+        assert booleans.logical_or_chain_2(False, False, True, False) is True
+        assert booleans.logical_or_chain_2(False, False, False, True) is True
+
+    def test_mixed_logic_1(self):
+        """Strong test - (a and b) or (c and d)."""
+        # True when a and b are both True
+        assert booleans.mixed_logic_1(True, True, False, False) is True
+        # True when c and d are both True
+        assert booleans.mixed_logic_1(False, False, True, True) is True
+        # False when neither pair is both True
+        assert booleans.mixed_logic_1(True, False, True, False) is False
+        assert booleans.mixed_logic_1(False, True, False, True) is False
+
+    def test_mixed_logic_2(self):
+        """Strong test - (a or b) and (c or d)."""
+        # True when both pairs have at least one True
+        assert booleans.mixed_logic_2(True, False, True, False) is True
+        assert booleans.mixed_logic_2(False, True, False, True) is True
+        # False when first pair has no True
+        assert booleans.mixed_logic_2(False, False, True, True) is False
+        # False when second pair has no True
+        assert booleans.mixed_logic_2(True, True, False, False) is False
+
+    def test_mixed_logic_3(self):
+        """Strong test - a and b or c (precedence: (a and b) or c)."""
+        assert booleans.mixed_logic_3(True, True, False) is True  # (T and T) or F = T
+        assert booleans.mixed_logic_3(False, True, True) is True  # (F and T) or T = T
+        assert booleans.mixed_logic_3(True, False, False) is False  # (T and F) or F = F
+        # This catches if 'and' becomes 'or': True or False or False = True
+        assert booleans.mixed_logic_3(False, False, False) is False
+
+    def test_mixed_logic_4(self):
+        """Strong test - a or b and c (precedence: a or (b and c))."""
+        assert booleans.mixed_logic_4(False, True, True) is True  # F or (T and T) = T
+        assert booleans.mixed_logic_4(True, False, False) is True  # T or (F and F) = T
+        assert booleans.mixed_logic_4(False, True, False) is False  # F or (T and F) = F
+        assert booleans.mixed_logic_4(False, False, True) is False  # F or (F and T) = F
+
+    def test_condition_with_and(self):
+        """Strong test - detects and/or and comparison mutations."""
+        # All positive: first condition True, second condition True, result stays True
+        assert booleans.condition_with_and(1, 1, 1) is True
+        # x not > 0: first condition fails, second condition (y>0 and z>0) True, result = False and True = False
+        assert booleans.condition_with_and(0, 1, 1) is False
+        # y not > 0: both conditions fail
+        assert booleans.condition_with_and(1, 0, 1) is False
+        # y > 0, z not > 0: first True, second fails, result stays True
+        assert booleans.condition_with_and(1, 1, 0) is True
+        # All zero: both conditions fail
+        assert booleans.condition_with_and(0, 0, 0) is False
+
+    def test_condition_with_or(self):
+        """Strong test - detects and/or mutations."""
+        # x > 0: first or condition True
+        assert booleans.condition_with_or(1, 0, 0) is True
+        # y > 0: first or condition True
+        assert booleans.condition_with_or(0, 1, 0) is True
+        # Neither x nor y > 0: first or condition False, result stays True from init
+        assert booleans.condition_with_or(0, 0, 0) is True
+        # y < 0 or z < 0: second or condition (result or False stays same)
+        assert booleans.condition_with_or(-1, -1, 0) is True  # -1 < 0 is True
+
+    def test_complex_condition_1(self):
+        """Strong test - (a > 0 and b > 0) or (c > 0 and d > 0)."""
+        # First pair True
+        assert booleans.complex_condition_1(1, 1, 0, 0) is True
+        # Second pair True
+        assert booleans.complex_condition_1(0, 0, 1, 1) is True
+        # Neither pair True
+        assert booleans.complex_condition_1(1, 0, 1, 0) is False
+        assert booleans.complex_condition_1(0, 1, 0, 1) is False
+        # All zero
+        assert booleans.complex_condition_1(0, 0, 0, 0) is False
+
+    def test_guard_clauses(self):
+        """Strong test."""
+        assert booleans.guard_clauses(5, 0, 10, True) is True
+        assert booleans.guard_clauses(None, 0, 10, False) is True
+        assert booleans.guard_clauses(15, 0, 10, True) is False
+
+    def test_validation_flags(self):
+        """Test validation flags."""
+        has_contact, is_complete, is_valid, can_proceed, needs_review = booleans.validation_flags(
+            has_name=True, has_email=True, has_phone=False, is_verified=True, is_active=True
+        )
+        assert has_contact is True
+        assert is_complete is True
+        assert is_valid is True
+        assert can_proceed is True
+        assert needs_review is False
diff --git a/e2e_projects/benchmark_1k/tests/test_comparisons.py b/e2e_projects/benchmark_1k/tests/test_comparisons.py
new file mode 100644
index 00000000..57b71416
--- /dev/null
+++ b/e2e_projects/benchmark_1k/tests/test_comparisons.py
@@ -0,0 +1,269 @@
+"""Tests for comparisons.py module."""
+
+from benchmark import comparisons
+
+
+class TestComparisons:
+    """Test comparison functions."""
+
+    def test_equality_simple(self):
+        """Strong test."""
+        eq, neq = comparisons.equality_simple(5, 5)
+        assert eq is True
+        assert neq is False
+
+    def test_equality_batch_1(self):
+        """Strong test - checks all return values."""
+        r1, r2, r3 = comparisons.equality_batch_1(1, 1, 2)
+        assert r1 is True  # 1 == 1
+        assert r2 is False  # 1 == 2
+        assert r3 is True  # 1 != 2
+
+    def test_equality_with_literals(self):
+        """Strong test - checks all values."""
+        result = comparisons.equality_with_literals(0)
+        assert result[0] is True  # is_zero
+        assert result[1] is False  # is_one
+        assert result[2] is False  # not_zero
+        assert result[3] is True  # not_one
+        # Also test with 1 to catch == 1 / != 1 mutations
+        result1 = comparisons.equality_with_literals(1)
+        assert result1[1] is True  # is_one
+
+    def test_equality_strings(self):
+        """Strong test - checks all values."""
+        result = comparisons.equality_strings("")
+        assert result[0] is True  # is_empty
+        assert result[1] is False  # is_hello
+        assert result[2] is False  # not_empty
+        # Test with "hello" to catch == "hello" mutation
+        result_hello = comparisons.equality_strings("hello")
+        assert result_hello[1] is True  # is_hello
+        assert result_hello[2] is True  # not_empty
+
+    def test_less_than_simple(self):
+        """Strong test - tests boundary to distinguish < from <=."""
+        lt, le = comparisons.less_than_simple(3, 5)
+        assert lt is True
+        assert le is True
+        # Test at boundary: 5, 5 - lt should be False, le should be True
+        lt_eq, le_eq = comparisons.less_than_simple(5, 5)
+        assert lt_eq is False  # 5 < 5 is False
+        assert le_eq is True  # 5 <= 5 is True
+
+    def test_less_than_batch_1(self):
+        """Strong test - checks all values and boundaries."""
+        result = comparisons.less_than_batch_1(1, 2, 3)
+        assert result[0] is True  # 1 < 2
+        assert result[1] is True  # 2 < 3
+        assert result[2] is True  # 1 <= 3
+        # Test boundary to distinguish < from <=
+        result_eq = comparisons.less_than_batch_1(2, 2, 2)
+        assert result_eq[0] is False  # 2 < 2 is False
+        assert result_eq[1] is False  # 2 < 2 is False
+        assert result_eq[2] is True  # 2 <= 2 is True
+
+    def test_less_than_batch_2(self):
+        """Strong test - checks boundary."""
+        below, at_or_below = comparisons.less_than_batch_2(5, 10)
+        assert below is True
+        assert at_or_below is True
+        # Test at boundary to distinguish < from <=
+        below_eq, at_eq = comparisons.less_than_batch_2(10, 10)
+        assert below_eq is False  # 10 < 10 is False
+        assert at_eq is True  # 10 <= 10 is True
+
+    def test_less_than_literals(self):
+        """Strong test - checks boundaries."""
+        result = comparisons.less_than_literals(-1)
+        assert result[0] is True  # lt_zero: -1 < 0
+        assert result[1] is True  # lt_ten: -1 < 10
+        assert result[2] is True  # le_zero: -1 <= 0
+        # Test at boundary 0 to distinguish < from <=
+        result_zero = comparisons.less_than_literals(0)
+        assert result_zero[0] is False  # 0 < 0 is False
+        assert result_zero[2] is True  # 0 <= 0 is True
+
+    def test_greater_than_simple(self):
+        """Strong test - tests boundary."""
+        gt, ge = comparisons.greater_than_simple(5, 3)
+        assert gt is True
+        assert ge is True
+        # Test at boundary to distinguish > from >=
+        gt_eq, ge_eq = comparisons.greater_than_simple(5, 5)
+        assert gt_eq is False  # 5 > 5 is False
+        assert ge_eq is True  # 5 >= 5 is True
+
+    def test_greater_than_batch_1(self):
+        """Strong test - checks all values and boundary."""
+        result = comparisons.greater_than_batch_1(3, 2, 1)
+        assert result[0] is True  # 3 > 2
+        assert result[1] is True  # 2 > 1
+        assert result[2] is True  # 3 >= 1
+        # Test boundary to distinguish > from >=
+        result_eq = comparisons.greater_than_batch_1(2, 2, 2)
+        assert result_eq[0] is False  # 2 > 2 is False
+        assert result_eq[1] is False  # 2 > 2 is False
+        assert result_eq[2] is True  # 2 >= 2 is True
+
+    def test_greater_than_batch_2(self):
+        """Strong test - checks boundary."""
+        above, at_or_above = comparisons.greater_than_batch_2(15, 10)
+        assert above is True
+        assert at_or_above is True
+        # Test at boundary to distinguish > from >=
+        above_eq, at_eq = comparisons.greater_than_batch_2(10, 10)
+        assert above_eq is False  # 10 > 10 is False
+        assert at_eq is True  # 10 >= 10 is True
+
+    def test_greater_than_literals(self):
+        """Strong test - checks boundaries."""
+        result = comparisons.greater_than_literals(5)
+        assert result[0] is True  # gt_zero: 5 > 0
+        assert result[1] is False  # gt_ten: 5 > 10 is False
+        assert result[2] is True  # ge_zero: 5 >= 0
+        # Test at boundary 0 to distinguish > from >=
+        result_zero = comparisons.greater_than_literals(0)
+        assert result_zero[0] is False  # 0 > 0 is False
+        assert result_zero[2] is True  # 0 >= 0 is True
+
+    def test_identity_none(self):
+        """Strong test."""
+        is_none, is_not_none = comparisons.identity_none(None)
+        assert is_none is True
+        assert is_not_none is False
+
+    def test_identity_batch_1(self):
+        """Strong test - checks both values."""
+        obj = object()
+        same, different = comparisons.identity_batch_1(obj, obj)
+        assert same is True
+        assert different is False
+        # Test with different objects
+        obj2 = object()
+        same2, different2 = comparisons.identity_batch_1(obj, obj2)
+        assert same2 is False
+        assert different2 is True
+
+    def test_identity_checks(self):
+        """Coverage test."""
+        result = comparisons.identity_checks(5, 10)
+        assert result == 5
+
+    def test_membership_simple(self):
+        """Strong test."""
+        present, absent = comparisons.membership_simple(2, [1, 2, 3])
+        assert present is True
+        assert absent is False
+
+    def test_membership_batch_1(self):
+        """Strong test - checks both values."""
+        r1, r2 = comparisons.membership_batch_1(1, [1, 2, 3])
+        assert r1 is True  # 1 in [1, 2, 3]
+        assert r2 is False  # 1 not in [1, 2, 3] is False
+        # Test with missing item
+        r1_missing, r2_missing = comparisons.membership_batch_1(99, [1, 2, 3])
+        assert r1_missing is False  # 99 in [1, 2, 3] is False
+        assert r2_missing is True  # 99 not in [1, 2, 3]
+
+    def test_membership_string(self):
+        """Strong test."""
+        found, not_found = comparisons.membership_string("a", "abc")
+        assert found is True
+        assert not_found is False
+
+    def test_membership_dict(self):
+        """Strong test."""
+        has_key, missing_key = comparisons.membership_dict("a", {"a": 1})
+        assert has_key is True
+        assert missing_key is False
+
+    def test_boundary_check_1(self):
+        """Strong test - tests all boundaries."""
+        assert comparisons.boundary_check_1(-1) == "negative"
+        assert comparisons.boundary_check_1(0) == "zero"
+        assert comparisons.boundary_check_1(5) == "small"
+        assert comparisons.boundary_check_1(10) == "small"  # boundary: <= 10
+        assert comparisons.boundary_check_1(11) == "medium"  # boundary: > 10, < 100
+        assert comparisons.boundary_check_1(99) == "medium"  # boundary: < 100
+        assert comparisons.boundary_check_1(100) == "large"  # boundary: >= 100
+
+    def test_boundary_check_2(self):
+        """Strong test - tests all cases."""
+        assert comparisons.boundary_check_2(-1, 0, 10) == "below"  # < low
+        assert comparisons.boundary_check_2(15, 0, 10) == "above"  # > high
+        assert comparisons.boundary_check_2(0, 0, 10) == "at_low"  # == low
+        assert comparisons.boundary_check_2(10, 0, 10) == "at_high"  # == high
+        assert comparisons.boundary_check_2(5, 0, 10) == "within"  # in range
+
+    def test_range_check(self):
+        """Strong test - tests boundaries."""
+        assert comparisons.range_check(5, 0, 10) is True  # within
+        assert comparisons.range_check(0, 0, 10) is True  # at min (>= min_val)
+        assert comparisons.range_check(10, 0, 10) is True  # at max (<= max_val)
+        assert comparisons.range_check(-1, 0, 10) is False  # below min
+        assert comparisons.range_check(11, 0, 10) is False  # above max
+
+    def test_compare_all(self):
+        """Strong test - checks all comparison results."""
+        result = comparisons.compare_all(5, 3)
+        assert result["eq"] is False  # 5 == 3
+        assert result["ne"] is True  # 5 != 3
+        assert result["lt"] is False  # 5 < 3
+        assert result["le"] is False  # 5 <= 3
+        assert result["gt"] is True  # 5 > 3
+        assert result["ge"] is True  # 5 >= 3
+        # Test boundary to distinguish < from <=, > from >=
+        result_eq = comparisons.compare_all(5, 5)
+        assert result_eq["eq"] is True
+        assert result_eq["lt"] is False  # 5 < 5
+        assert result_eq["le"] is True  # 5 <= 5
+        assert result_eq["gt"] is False  # 5 > 5
+        assert result_eq["ge"] is True  # 5 >= 5
+
+    def test_chained_comparisons(self):
+        """Strong test - tests boundaries."""
+        in_lower, in_upper, below, above = comparisons.chained_comparisons(5, 0, 10, 20)
+        assert in_lower is True  # 0 <= 5 < 10
+        assert in_upper is False  # 10 <= 5 <= 20 is False
+        assert below is False
+        assert above is False
+        # Test at boundaries
+        # x=0: 0 <= 0 < 10 is True
+        in_lower_0, _, _, _ = comparisons.chained_comparisons(0, 0, 10, 20)
+        assert in_lower_0 is True
+        # x=10: 0 <= 10 < 10 is False (< 10 fails), 10 <= 10 <= 20 is True
+        in_lower_10, in_upper_10, _, _ = comparisons.chained_comparisons(10, 0, 10, 20)
+        assert in_lower_10 is False  # boundary: < 10 fails
+        assert in_upper_10 is True  # 10 <= 10 <= 20
+        # Test below/above
+        _, _, below_neg, _ = comparisons.chained_comparisons(-5, 0, 10, 20)
+        assert below_neg is True
+        _, _, _, above_30 = comparisons.chained_comparisons(30, 0, 10, 20)
+        assert above_30 is True
+
+    def test_multi_condition_check(self):
+        """Strong test - tests boundaries and all paths."""
+        all_above, any_above, all_equal, none_below = comparisons.multi_condition_check(5, 10, 15, 3)
+        assert all_above is True  # all > 3
+        assert any_above is True
+        assert all_equal is False  # 5 != 10 != 15
+        assert none_below is True  # all >= 3
+        # Test at threshold boundary (>= vs >)
+        all_above_t, any_above_t, _, none_below_t = comparisons.multi_condition_check(3, 3, 3, 3)
+        assert all_above_t is False  # 3 > 3 is False
+        assert any_above_t is False  # none > 3
+        assert none_below_t is True  # all >= 3
+        # Test with one above threshold
+        all_above_one, any_above_one, _, _ = comparisons.multi_condition_check(2, 2, 5, 3)
+        assert all_above_one is False  # not all > 3
+        assert any_above_one is True  # 5 > 3
+        # Test all equal
+        _, _, all_eq, _ = comparisons.multi_condition_check(5, 5, 5, 0)
+        assert all_eq is True
+
+    def test_sorted_check(self):
+        """Test sorted checks."""
+        asc, desc = comparisons.sorted_check(1, 2, 3)
+        assert asc is True
+        assert desc is False
diff --git a/e2e_projects/benchmark_1k/tests/test_complex.py b/e2e_projects/benchmark_1k/tests/test_complex.py
new file mode 100644
index 00000000..a21d5fbe
--- /dev/null
+++ b/e2e_projects/benchmark_1k/tests/test_complex.py
@@ -0,0 +1,121 @@
+"""Tests for complex.py module."""
+
+from benchmark import complex
+
+
+class TestComplex:
+    """Test complex call patterns."""
+
+    def test_chain1_entry(self):
+        """Strong test - exercises 10-level deep call chain."""
+        result = complex.chain1_entry(0)
+        assert result == 20  # (0 + 1*10) * 2
+
+    def test_factorial_tail(self):
+        """Strong test."""
+        assert complex.factorial_tail(5) == 120
+        assert complex.factorial_tail(0) == 1
+        assert complex.factorial_tail(1) == 1
+
+    def test_sum_tail(self):
+        """Strong test."""
+        assert complex.sum_tail(10) == 55
+
+    def test_power_tail(self):
+        """Strong test."""
+        assert complex.power_tail(2, 3) == 8
+        assert complex.power_tail(3, 2) == 9
+
+    def test_gcd_tail(self):
+        """Strong test."""
+        assert complex.gcd_tail(48, 18) == 6
+
+    def test_fibonacci(self):
+        """Strong test."""
+        assert complex.fibonacci(0) == 0
+        assert complex.fibonacci(1) == 1
+        assert complex.fibonacci(10) == 55
+
+    def test_flatten(self):
+        """Strong test."""
+        assert complex.flatten([1, [2, 3], [4, [5]]]) == [1, 2, 3, 4, 5]
+
+    def test_is_even(self):
+        """Strong test."""
+        assert complex.is_even(4) is True
+        assert complex.is_even(3) is False
+
+    def test_is_odd(self):
+        """Strong test."""
+        assert complex.is_odd(3) is True
+        assert complex.is_odd(4) is False
+
+    def test_descend_a(self):
+        """Strong test - checks exact value."""
+        # 5 -> b(4, 1) -> a(3, 3) -> b(2, 4) -> a(1, 6) -> b(0, 7) -> returns 7
+        assert complex.descend_a(5) == 7
+        # boundary: n=0 should return acc immediately
+        assert complex.descend_a(0) == 0
+
+    def test_apply_twice(self):
+        """Strong test."""
+        assert complex.apply_twice(lambda x: x + 1, 0) == 2
+
+    def test_apply_n_times(self):
+        """Strong test."""
+        assert complex.apply_n_times(lambda x: x * 2, 1, 3) == 8
+
+    def test_compose(self):
+        """Strong test."""
+        f = complex.compose(lambda x: x + 1, lambda x: x * 2)
+        assert f(3) == 7  # (3 * 2) + 1
+
+    def test_map_reduce(self):
+        """Strong test."""
+        result = complex.map_reduce([1, 2, 3], lambda x: x * 2, lambda acc, x: acc + x, 0)
+        assert result == 12  # (1*2) + (2*2) + (3*2)
+
+    def test_with_callback(self):
+        """Strong test."""
+        result = complex.with_callback("data", lambda d: f"success: {d}", lambda e: f"error: {e}")
+        assert result == "success: data"
+
+    def test_nested_loops(self):
+        """Strong test - checks exact values."""
+        # [[1, 2], [3, 4]] -> 1*2 + 2*2 + 3*2 + 4*2 = 20
+        assert complex.nested_loops([[1, 2], [3, 4]]) == 20
+        # Test with negative values: -1+1 + -2+1 = 0 + -1 = -1
+        assert complex.nested_loops([[-1, -2]]) == -1
+        # Test boundary: 0 is not > 0, so uses else branch: 0+1 = 1
+        assert complex.nested_loops([[0]]) == 1
+
+    def test_nested_conditions(self):
+        """Strong test - tests all paths."""
+        # x>0, y>0, z>0: x+y+z
+        assert complex.nested_conditions(1, 1, 1) == 3
+        # x>0, y>0, z<=0: x+y-z
+        assert complex.nested_conditions(1, 1, -1) == 3  # 1+1-(-1)=3
+        # x>0, y<=0, z>0: x-y+z
+        assert complex.nested_conditions(1, -1, 1) == 3  # 1-(-1)+1=3
+        # x>0, y<=0, z<=0: x-y-z
+        assert complex.nested_conditions(1, -1, -1) == 3  # 1-(-1)-(-1)=3
+        # x<=0, y>0: y+z
+        assert complex.nested_conditions(-1, 1, 1) == 2
+        # x<=0, y<=0: z
+        assert complex.nested_conditions(-1, -1, 5) == 5
+        # Test boundary: x=0 takes else branch
+        assert complex.nested_conditions(0, 1, 1) == 2
+
+    def test_accumulate_with_filter(self):
+        """Strong test."""
+        result = complex.accumulate_with_filter([1, 2, 3, 4, 5], lambda x: x % 2 == 0, lambda x: x * 10)
+        assert result == 60  # (2*10) + (4*10)
+
+    def test_calculate_backoff(self):
+        """Strong test - exponential backoff calculation."""
+        assert complex.calculate_backoff(0) == 0.0
+        assert complex.calculate_backoff(1) == 1.0
+        assert complex.calculate_backoff(2) == 2.0
+        assert complex.calculate_backoff(3) == 4.0
+        # Test max_delay cap
+        assert complex.calculate_backoff(10, max_delay=10.0) == 10.0
diff --git a/e2e_projects/benchmark_1k/tests/test_numbers.py b/e2e_projects/benchmark_1k/tests/test_numbers.py
new file mode 100644
index 00000000..2883a43e
--- /dev/null
+++ b/e2e_projects/benchmark_1k/tests/test_numbers.py
@@ -0,0 +1,56 @@
+"""Tests for numbers.py module."""
+
+from benchmark import numbers
+
+
+class TestNumbers:
+    """Test number-heavy functions."""
+
+    def test_constants_batch_1(self):
+        """Test constants."""
+        result = numbers.constants_batch_1()
+        assert result == 3  # 0+1+2
+
+    def test_float_constants_1(self):
+        """Test float constants."""
+        result = numbers.float_constants_1()
+        assert 1.5 < result < 2.5
+
+    def test_negative_constants(self):
+        """Test negative constants."""
+        result = numbers.negative_constants()
+        assert result < 0
+
+    def test_arithmetic_simple(self):
+        """Test arithmetic."""
+        assert numbers.arithmetic_simple(0) == 1  # 0+1
+
+    def test_loop_range_1(self):
+        """Test loop range."""
+        result = numbers.loop_range_1()
+        assert result == 15  # sum(i+1 for i in range(5))
+
+    def test_threshold_check_1(self):
+        """Test threshold check."""
+        assert numbers.threshold_check_1(-1) == 0
+        assert numbers.threshold_check_1(5) == 1
+
+    def test_array_indices(self):
+        """Test array indices."""
+        assert numbers.array_indices([1, 2, 3, 4]) == 3  # items[0]+items[1]
+
+    def test_multipliers(self):
+        """Test multipliers."""
+        result = numbers.multipliers(10)
+        assert result == 50  # 10*2 + 10*3 = 50
+
+    def test_offsets(self):
+        """Test offsets."""
+        result = numbers.offsets(100)
+        assert len(result) == 1
+        assert result[0] == 101
+
+    def test_dimensions(self):
+        """Test dimensions."""
+        result = numbers.dimensions()
+        assert result == (100, 200)
diff --git a/e2e_projects/benchmark_1k/tests/test_operators.py b/e2e_projects/benchmark_1k/tests/test_operators.py
new file mode 100644
index 00000000..28965f2e
--- /dev/null
+++ b/e2e_projects/benchmark_1k/tests/test_operators.py
@@ -0,0 +1,94 @@
+"""Tests for operators.py module."""
+
+from benchmark import operators
+
+
+class TestOperators:
+    """Test operator functions."""
+
+    def test_add_sub_1(self):
+        """Test add/sub."""
+        add, sub = operators.add_sub_1(10, 3)
+        assert add == 13
+        assert sub == 7
+
+    def test_mul_div_1(self):
+        """Test mul/div."""
+        mul, div = operators.mul_div_1(10, 2)
+        assert mul == 20
+        assert div == 5
+
+    def test_integer_ops_1(self):
+        """Test integer ops."""
+        floordiv, mod = operators.integer_ops_1(10, 3)
+        assert floordiv == 3
+        assert mod == 1
+
+    def test_mixed_arithmetic_1(self):
+        """Test mixed arithmetic."""
+        assert operators.mixed_arithmetic_1(2, 3, 4) == 14  # 2 + 3*4
+
+    def test_bitwise_shift_1(self):
+        """Test bitwise shift."""
+        lshift, rshift = operators.bitwise_shift_1(4)
+        assert lshift == 8
+        assert rshift == 2
+
+    def test_bitwise_and_or_1(self):
+        """Test bitwise and/or."""
+        band, bor = operators.bitwise_and_or_1(0b1100, 0b1010)
+        assert band == 0b1000
+        assert bor == 0b1110
+
+    def test_augmented_add_sub(self):
+        """Test augmented add/sub."""
+        result = operators.augmented_add_sub(10)
+        assert result == 10  # 10 + 1 - 1
+
+    def test_augmented_in_loop(self):
+        """Test augmented in loop."""
+        result = operators.augmented_in_loop()
+        assert result == 10  # sum(range(5))
+
+    def test_unary_not_1(self):
+        """Test unary not."""
+        assert operators.unary_not_1(True) is False
+        assert operators.unary_not_1(False) is True
+
+    def test_unary_invert_1(self):
+        """Test unary invert."""
+        assert operators.unary_invert_1(0) == -1
+
+    def test_unary_minus(self):
+        """Test unary minus."""
+        assert operators.unary_minus(5) == -5
+
+    def test_add_sub_2(self):
+        """Test more add/sub."""
+        r1, r2, r3 = operators.add_sub_2(10, 5, 3)
+        assert r1 == 18  # 10+5+3
+        assert r2 == 2  # 10-5-3
+        assert r3 == 12  # 10+5-3
+
+    def test_mul_div_2(self):
+        """Test more mul/div."""
+        r1, r2, r3 = operators.mul_div_2(2, 3, 4)
+        assert r1 == 24  # 2*3*4
+        assert r3 == 1.5  # 2*3/4
+
+    def test_integer_ops_2(self):
+        """Test more integer ops."""
+        r1, r2, r3, r4, r5 = operators.integer_ops_2(10, 11)
+        assert r1 == 5  # 10 // 2
+        assert r2 == 0  # 10 % 2
+        assert r3 == 100  # 10 ** 2
+
+    def test_augmented_batch(self):
+        """Test augmented batch."""
+        result = operators.augmented_batch(10)
+        assert result == 10  # (10+10-5)*2//3 = 30//3 = 10
+
+    def test_bitwise_xor_ops(self):
+        """Test bitwise XOR."""
+        r1, r2, r3 = operators.bitwise_xor_ops(0b1010, 0b1100)
+        assert r1 == 0b0110  # 1010 ^ 1100
diff --git a/e2e_projects/benchmark_1k/tests/test_returns.py b/e2e_projects/benchmark_1k/tests/test_returns.py
new file mode 100644
index 00000000..84b98a2c
--- /dev/null
+++ b/e2e_projects/benchmark_1k/tests/test_returns.py
@@ -0,0 +1,72 @@
+"""Tests for returns.py module."""
+
+from benchmark import returns
+
+
+class TestReturns:
+    """Test return/assignment functions."""
+
+    def test_simple_return_integers(self):
+        """Test simple integer return."""
+        assert returns.simple_return_integers() == 42
+
+    def test_assign_integers(self):
+        """Test integer assignments."""
+        result = returns.assign_integers()
+        assert result == (1, 2)
+
+    def test_assign_strings(self):
+        """Test string assignments."""
+        result = returns.assign_strings()
+        assert result[0] == "hello"
+
+    def test_assign_lists(self):
+        """Test list assignments."""
+        result = returns.assign_lists()
+        assert result[0] == [1, 2, 3]
+
+    def test_assign_mixed(self):
+        """Test mixed assignments."""
+        result = returns.assign_mixed()
+        assert result == (42, "answer")
+
+    def test_assign_none_batch_1(self):
+        """Test None assignments."""
+        result = returns.assign_none_batch_1()
+        assert all(r is None for r in result)
+
+    def test_typed_int(self):
+        """Test typed int."""
+        result = returns.typed_int()
+        assert result[0] == 42
+
+    def test_typed_str(self):
+        """Test typed str."""
+        result = returns.typed_str()
+        assert result[0] == "test"
+
+    def test_lambda_integers(self):
+        """Test lambda integers."""
+        f1, f2 = returns.lambda_integers()
+        assert f1() == 1
+        assert f2() == 2
+
+    def test_lambda_strings(self):
+        """Test lambda strings."""
+        result = returns.lambda_strings()
+        assert result[0]() == "hello"
+
+    def test_lambda_with_args(self):
+        """Test lambda with args."""
+        result = returns.lambda_with_args()
+        assert result[0](5) == 6
+
+    def test_lambda_none_batch_1(self):
+        """Test lambda None."""
+        f1, f2 = returns.lambda_none_batch_1()
+        assert f1() is None
+
+    def test_conditional_assign_1(self):
+        """Test conditional assignment."""
+        assert returns.conditional_assign_1(True) == "yes"
+        assert returns.conditional_assign_1(False) == "no"
diff --git a/e2e_projects/benchmark_1k/tests/test_strings.py b/e2e_projects/benchmark_1k/tests/test_strings.py
new file mode 100644
index 00000000..3071646a
--- /dev/null
+++ b/e2e_projects/benchmark_1k/tests/test_strings.py
@@ -0,0 +1,142 @@
+"""Tests for strings.py module."""
+
+from benchmark import strings
+
+
+class TestStrings:
+    """Test string-heavy functions."""
+
+    def test_messages_batch_1(self):
+        """Test message strings."""
+        result = strings.messages_batch_1()
+        assert result == ("hello", "world")
+
+    def test_labels_batch_1(self):
+        """Test label strings."""
+        result = strings.labels_batch_1()
+        assert result[0] == "name"
+
+    def test_states(self):
+        """Test state strings."""
+        result = strings.states()
+        assert result == ("pending", "active")
+
+    def test_format_name(self):
+        """Test f-string with name."""
+        assert strings.format_name("Alice") == "Name: Alice"
+
+    def test_format_count(self):
+        """Test f-string with count."""
+        assert strings.format_count(42) == "Count: 42"
+
+    def test_format_result(self):
+        """Test f-string with result."""
+        assert strings.format_result(10, "kg") == "Result: 10 kg"
+
+    def test_case_methods_1(self):
+        """Test case methods."""
+        lower, upper = strings.case_methods_1("HeLLo")
+        assert lower == "hello"
+        assert upper == "HELLO"
+
+    def test_strip_methods_1(self):
+        """Test strip methods."""
+        left, right = strings.strip_methods_1("  hello  ")
+        assert left == "hello  "
+        assert right == "  hello"
+
+    def test_find_methods_1(self):
+        """Test find methods."""
+        pos1, pos2 = strings.find_methods_1("hello world hello", "hello")
+        assert pos1 == 0
+        assert pos2 == 12
+
+    def test_split_methods_1(self):
+        """Test split methods."""
+        parts1, parts2 = strings.split_methods_1("a-b-c-d", "-")
+        assert parts1 == ["a", "b", "c-d"]
+        assert parts2 == ["a-b", "c", "d"]
+
+    def test_partition_methods(self):
+        """Test partition methods."""
+        p1, p2 = strings.partition_methods("hello-world", "-")
+        assert p1 == ("hello", "-", "world")
+        assert p2 == ("hello", "-", "world")
+
+    def test_messages_batch_2(self):
+        """Test batch 2 strings."""
+        result = strings.messages_batch_2()
+        assert result == ("start", "stop", "pause")
+
+    def test_messages_batch_3(self):
+        """Test batch 3 strings."""
+        result = strings.messages_batch_3()
+        assert result[0] == "error"
+
+    def test_symbols(self):
+        """Test symbol strings."""
+        result = strings.symbols()
+        assert result == ("alpha", "beta", "gamma")
+
+    def test_keywords(self):
+        """Test keyword strings."""
+        result = strings.keywords()
+        assert "true" in result
+
+    def test_format_error(self):
+        """Test error f-string."""
+        assert strings.format_error(404, "Not Found") == "Error 404: Not Found"
+
+    def test_format_coords(self):
+        """Test coords f-string."""
+        assert strings.format_coords(1, 2) == "(1, 2)"
+
+    def test_format_path(self):
+        """Test path f-string."""
+        assert strings.format_path("/home", "file.txt") == "/home/file.txt"
+
+    def test_format_greeting(self):
+        """Test greeting f-string."""
+        assert strings.format_greeting("Dr", "Smith") == "Hello, Dr Smith!"
+
+    def test_case_methods_2(self):
+        """Test more case methods."""
+        title, cap, swap = strings.case_methods_2("hELLO")
+        assert title == "Hello"
+        assert cap == "Hello"
+
+    def test_strip_methods_2(self):
+        """Test strip with chars."""
+        left, right, both = strings.strip_methods_2("xxhelloxx", "x")
+        assert left == "helloxx"
+        assert right == "xxhello"
+        assert both == "hello"
+
+    def test_find_methods_2(self):
+        """Test find with start."""
+        pos1, pos2 = strings.find_methods_2("hello world hello", "hello", 1)
+        assert pos1 == 12
+
+    def test_replace_methods(self):
+        """Test replace methods."""
+        r1, r2 = strings.replace_methods("a-b-c", "-", "_")
+        assert r1 == "a_b_c"
+        assert r2 == "a_b-c"
+
+    def test_justify_methods(self):
+        """Test justify methods."""
+        left, right, center = strings.justify_methods("hi", 5)
+        assert len(left) == 5
+        assert len(right) == 5
+
+    def test_index_methods(self):
+        """Test index methods."""
+        i1, i2 = strings.index_methods("hello world hello", "hello")
+        assert i1 == 0
+        assert i2 == 12
+
+    def test_prefix_suffix_methods(self):
+        """Test prefix/suffix removal."""
+        r1, r2 = strings.prefix_suffix_methods("pre_test_suf")
+        assert r1 == "test_suf"
+        assert r2 == "pre_test"

From 8e4a9e0736d38d8cee95aa26faa83f2729f361bb Mon Sep 17 00:00:00 2001
From: nicklafleur <55208706+nicklafleur@users.noreply.github.com>
Date: Fri, 5 Jun 2026 23:22:07 -0400
Subject: [PATCH 4/8] feat: invalidate cache on config and dependency changes

Cached verdicts were only invalidated when a function body changed, so
changes to config or dependency files silently produced stale results.

- Config.config_fingerprint() hashes result-affecting config, grouped so
  we reset only what each change can affect:
  - timeout change -> reset only timeout verdicts
  - type_check_command change -> reset mutants whose type-check status
    flips (symmetric difference of old exit-37 and newly-caught)
  - pytest_add_cli_args / test-selection change -> reset all results and
    force full stats recollection
  - set-affecting config (source_paths, only_mutate, ...) is ignored:
    new mutants are uncached and dropped ones stop being walked
- compute_watched_file_hashes() hashes dependency/build files
  (pyproject.toml, setup.cfg/py, requirements*.txt, lockfiles) plus user
  globs from the new cache_invalidation_files config. The
  on_dependency_change config ("warn" | "rerun" | "ignore", default
  "warn") controls whether a change warns or resets all results.
- Fingerprints persist in mutmut-stats.json with pop-with-default, so
  old
  caches load and a missing fingerprint triggers no invalidation.
---
 README.rst                      |  43 +++++++
 src/mutmut/__main__.py          | 139 +++++++++++++++++++++-
 src/mutmut/configuration.py     |  28 +++++
 src/mutmut/state.py             |   5 +
 tests/mutation/test_mutation.py | 202 ++++++++++++++++++++++++++++++++
 tests/test_configuration.py     |   2 +
 6 files changed, 414 insertions(+), 5 deletions(-)

diff --git a/README.rst b/README.rst
index 42896a19..7f9c4f03 100644
--- a/README.rst
+++ b/README.rst
@@ -401,6 +401,49 @@ You can add and override pytest arguments:
     also_copy = ["mutmut_pytest.ini"]
 
 
+Detecting dependency and config changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Between runs, mutmut only re-tests mutants in functions whose source changed.
+Changes outside your Python source — a dependency upgrade, a data file, a
+config file — cannot be tied to a function, so they would otherwise be missed
+and you would get cached results that no longer reflect reality.
+
+To catch this, mutmut hashes a set of build and dependency files and warns you
+when any of them change since the last run. By default it watches:
+
+- `pyproject.toml`
+- `setup.cfg`
+- `setup.py`
+- `requirements*.txt`
+- `poetry.lock`
+- `uv.lock`
+- `Pipfile`
+- `Pipfile.lock`
+
+You can watch additional files (for example data files your tests depend on)
+with the `cache_invalidation_files` config, which accepts glob patterns
+resolved against the project root:
+
+.. code-block:: toml
+
+    cache_invalidation_files = [ "queries/*.sql", "config/*.yaml" ]
+
+When a watched file changes, `on_dependency_change` controls what happens:
+
+- `warn` (default): list the changed files and keep the cache.
+- `rerun`: re-test all mutants.
+- `ignore`: do nothing.
+
+.. code-block:: toml
+
+    on_dependency_change = "warn"
+
+Changes to mutmut's own result-affecting config (such as `pytest_add_cli_args`,
+`type_check_command`, or the timeout settings) are always detected and
+invalidate the affected cached results automatically.
+
+
 Unstable configs
 ~~~~~~~~~~~~~~~~
 
diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py
index 862acef9..8f98480b 100644
--- a/src/mutmut/__main__.py
+++ b/src/mutmut/__main__.py
@@ -22,6 +22,7 @@
 import ast
 import fnmatch
 import gc
+import hashlib
 import inspect
 import itertools
 import json
@@ -62,6 +63,7 @@
 from mutmut.code_coverage import get_covered_lines_for_file
 from mutmut.configuration import Config
 from mutmut.mutation.data import SourceFileMutationData
+from mutmut.mutation.file_mutation import FailedTypeCheckMutant
 from mutmut.mutation.file_mutation import filter_mutants_with_type_checker
 from mutmut.mutation.file_mutation import mutate_file_contents
 from mutmut.mutation.trampoline_templates import CLASS_NAME_SEPARATOR
@@ -819,10 +821,130 @@ def _invalidate_stale_dependency_edges() -> set[str]:
     return changed_functions
 
 
-def collect_or_load_stats(runner: TestRunner, invalidate_stale_callers: bool = True) -> None:
+# Dependency / build files whose changes the per-function source hashes cannot see.
+# Globs are resolved against the project root; missing files are skipped. Users can
+# extend this via the ``cache_invalidation_files`` config.
+_DEFAULT_WATCHED_FILES = (
+    "pyproject.toml",
+    "setup.cfg",
+    "setup.py",
+    "requirements*.txt",
+    "poetry.lock",
+    "uv.lock",
+    "Pipfile",
+    "Pipfile.lock",
+)
+
+
+def compute_watched_file_hashes() -> dict[str, str]:
+    """Map watched-file path -> content hash for the default set plus user globs."""
+    patterns = list(_DEFAULT_WATCHED_FILES) + list(Config.get().cache_invalidation_files)
+    hashes: dict[str, str] = {}
+    for pattern in patterns:
+        for path in sorted(Path(".").glob(pattern)):
+            if path.is_file():
+                hashes[str(path)] = hashlib.sha256(path.read_bytes()).hexdigest()[:12]
+    return hashes
+
+
+def _reset_mutant_results(should_reset: Callable[[str, int], bool]) -> int:
+    """Reset cached verdicts to ``None`` (forcing a re-test) where ``should_reset`` holds.
+
+    ``should_reset`` only sees already-decided mutants (``exit_code`` is not ``None``).
+    """
+    count = 0
+    for path in walk_mutatable_files():
+        meta_path = Path("mutants") / (str(path) + ".meta")
+        if not meta_path.exists():
+            continue
+        m = SourceFileMutationData(path=path)
+        m.load()
+        dirty = False
+        for key, exit_code in list(m.exit_code_by_key.items()):
+            if exit_code is not None and should_reset(key, exit_code):
+                m.exit_code_by_key[key] = None
+                dirty = True
+                count += 1
+        if dirty:
+            m.save()
+    return count
+
+
+def _report_watched_file_changes() -> bool:
+    """Surface changes to watched config/dependency files.
+
+    Returns True only when the configured policy is ``rerun`` and something changed,
+    asking the caller to reset all results. Silent when no prior hashes exist.
+    """
+    old = state().old_watched_file_hashes
+    if not old:
+        return False
+    new = compute_watched_file_hashes()
+    changed = sorted(p for p in old.keys() | new.keys() if old.get(p) != new.get(p))
+    if not changed:
+        return False
+
+    policy = Config.get().on_dependency_change
+    if policy == "ignore":
+        return False
+    if policy == "rerun":
+        print(f"    {len(changed)} watched file(s) changed; rerunning all mutants: {', '.join(changed)}")
+        return True
+    # default: warn but keep the cache
+    print(f"    Warning: {len(changed)} watched file(s) changed since the last run: {', '.join(changed)}")
+    print("    These cannot be tracked for behavioral changes, so cached results were kept.")
+    print('    If the changes affect your tests, delete the mutants/ directory or set on_dependency_change = "rerun".')
+    return False
+
+
+def _apply_config_change_invalidation(mutants_caught_by_type_checker: dict[str, object]) -> bool:
+    """Reset only the cached verdicts a config / dependency change could have invalidated.
+
+    Returns True if a full stats recollection is required (a global pytest config change
+    or an opt-in dependency rerun), in which case all results have already been reset.
+    """
+    old_fp = state().old_config_fingerprint
+    new_fp = Config.get().config_fingerprint()
+    changed_groups = {g for g in new_fp if old_fp.get(g) != new_fp[g]} if old_fp else set()
+
+    dependency_rerun = _report_watched_file_changes()
+
+    # Global groups change how *every* test runs / which tests map to a function, so no
+    # subset of results is safe to keep -> full reset and full stats recollection.
+    if changed_groups & {"test_execution", "test_selection"} or dependency_rerun:
+        _reset_mutant_results(lambda key, exit_code: True)
+        mutmut.duration_by_test.clear()
+        mutmut.tests_by_mangled_function_name.clear()
+        state().function_dependencies.clear()
+        return True
+
+    # Timeout config only reclassifies timeouts; keep every other verdict.
+    if "timeout" in changed_groups:
+        _reset_mutant_results(lambda key, exit_code: status_by_exit_code[exit_code] == "timeout")
+
+    # The type-check pre-filter runs fresh every run; only verdicts whose type-check
+    # status flips are stale -> reset the symmetric difference of old (==37) and new.
+    if "type_check" in changed_groups:
+        caught = set(mutants_caught_by_type_checker)
+        _reset_mutant_results(lambda key, exit_code: (exit_code == 37) != (key in caught))
+
+    return False
+
+
+def collect_or_load_stats(
+    runner: TestRunner,
+    *,
+    mutants_caught_by_type_checker: dict[str, Any] | None = None,
+    apply_config_invalidation: bool = False,
+    invalidate_stale_callers: bool = True,
+) -> None:
     did_load = load_stats()
 
-    if not did_load:
+    force_full = False
+    if did_load and apply_config_invalidation:
+        force_full = _apply_config_change_invalidation(mutants_caught_by_type_checker or {})
+
+    if not did_load or force_full:
         # Run full stats
         run_stats_collection(runner)
     else:
@@ -862,6 +984,8 @@ def load_stats() -> bool:
             state().old_function_hashes = data.pop("function_hashes", {})
             for k, v in data.pop("function_dependencies", {}).items():
                 state().function_dependencies[k] = set(v)
+            state().old_config_fingerprint = data.pop("config_fingerprint", {})
+            state().old_watched_file_hashes = data.pop("watched_file_hashes", {})
             assert not data, data
             did_load = True
     except (FileNotFoundError, JSONDecodeError):
@@ -878,6 +1002,8 @@ def save_stats() -> None:
                 stats_time=mutmut.stats_time,
                 function_hashes=state().current_function_hashes,
                 function_dependencies={k: list(v) for k, v in state().function_dependencies.items()},
+                config_fingerprint=Config.get().config_fingerprint(),
+                watched_file_hashes=compute_watched_file_hashes(),
             ),
             f,
             indent=4,
@@ -1101,11 +1227,10 @@ def _run(mutant_names: tuple[str, ...] | list[str], max_children: int | None) ->
         f"    done in {round(time.total_seconds() * 1000)}ms ({stats.mutated} files mutated, {stats.ignored} ignored, {stats.unmodified} unmodified)",
     )
 
+    mutants_caught_by_type_checker: dict[str, FailedTypeCheckMutant] = {}
     if Config.get().type_check_command:
         with CatchOutput(spinner_title="Filtering mutations with type checker"):
             mutants_caught_by_type_checker = filter_mutants_with_type_checker()
-    else:
-        mutants_caught_by_type_checker = {}
 
     # TODO: config/option for runner
     # runner = HammettRunner()
@@ -1114,7 +1239,11 @@ def _run(mutant_names: tuple[str, ...] | list[str], max_children: int | None) ->
 
     # TODO: run these steps only if we have mutants to test
 
-    collect_or_load_stats(runner)
+    collect_or_load_stats(
+        runner,
+        mutants_caught_by_type_checker=mutants_caught_by_type_checker,
+        apply_config_invalidation=True,
+    )
 
     mutants, source_file_mutation_data_by_path = collect_source_file_mutation_data(mutant_names=mutant_names)
 
diff --git a/src/mutmut/configuration.py b/src/mutmut/configuration.py
index 9145d4f8..98fce390 100644
--- a/src/mutmut/configuration.py
+++ b/src/mutmut/configuration.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import fnmatch
+import hashlib
 import os
 import platform
 import sys
@@ -144,6 +145,8 @@ def _load_config() -> Config:
         ),  # False on Mac, true otherwise as default (https://github.com/boxed/mutmut/pull/450#issuecomment-4002571055)
         track_dependencies=s("track_dependencies", True),
         dependency_tracking_depth=s("dependency_tracking_depth", None),
+        cache_invalidation_files=s("cache_invalidation_files", []),
+        on_dependency_change=s("on_dependency_change", "warn"),
     )
 
 
@@ -168,6 +171,31 @@ class Config:
     use_setproctitle: bool
     track_dependencies: bool
     dependency_tracking_depth: int | None
+    cache_invalidation_files: list[str]
+    on_dependency_change: str
+
+    def config_fingerprint(self) -> dict[str, str]:
+        """Hash the config fields that can change cached mutant *results*, grouped so the
+        caller can invalidate only the verdict classes each group can affect.
+
+        Fields that only change *which* mutants exist (source_paths, only_mutate, etc.)
+        are deliberately excluded: new mutants are born uncached and dropped ones simply
+        stop being walked, so they need no result invalidation.
+        """
+
+        def _hash(value: object) -> str:
+            return hashlib.sha256(repr(value).encode()).hexdigest()[:12]
+
+        return {
+            # global pytest behaviour: a change can flip any verdict
+            "test_execution": _hash(tuple(self.pytest_add_cli_args)),
+            # which tests cover which function: a change reshapes the stats mapping
+            "test_selection": _hash(tuple(self.pytest_add_cli_args_test_selection)),
+            # only reclassifies timeouts
+            "timeout": _hash((self.timeout_multiplier, self.timeout_constant)),
+            # only changes the type-check pre-filter
+            "type_check": _hash(tuple(self.type_check_command)),
+        }
 
     def should_mutate(self, path: Path | str) -> bool:
         return self._should_include_for_mutation(path) and not self._should_ignore_for_mutation(path)
diff --git a/src/mutmut/state.py b/src/mutmut/state.py
index 6a774e31..c53a3ec4 100644
--- a/src/mutmut/state.py
+++ b/src/mutmut/state.py
@@ -8,6 +8,11 @@ class MutmutState:
     old_function_hashes: dict[str, str] = field(default_factory=dict)
     current_function_hashes: dict[str, str] = field(default_factory=dict)
     function_dependencies: defaultdict[str, set[str]] = field(default_factory=lambda: defaultdict(set))
+    # Fingerprints loaded from the previous run, used to detect config / dependency
+    # changes the per-function source hashes cannot see. Empty when absent (pre-upgrade
+    # cache or first run), in which case no invalidation is triggered.
+    old_config_fingerprint: dict[str, str] = field(default_factory=dict)
+    old_watched_file_hashes: dict[str, str] = field(default_factory=dict)
 
 
 _state: MutmutState | None = None
diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py
index 7aa18ce1..9be27484 100644
--- a/tests/mutation/test_mutation.py
+++ b/tests/mutation/test_mutation.py
@@ -11,8 +11,12 @@
 import mutmut
 from mutmut.__main__ import CatchOutput
 from mutmut.__main__ import MutmutProgrammaticFailException
+from mutmut.__main__ import _apply_config_change_invalidation
 from mutmut.__main__ import _cleanup_stale_stats
 from mutmut.__main__ import _invalidate_stale_dependency_edges
+from mutmut.__main__ import _report_watched_file_changes
+from mutmut.__main__ import _reset_mutant_results
+from mutmut.__main__ import compute_watched_file_hashes
 from mutmut.__main__ import get_diff_for_mutant
 from mutmut.__main__ import mangled_name_from_mutant_name
 from mutmut.__main__ import orig_function_and_class_names_from_key
@@ -1293,3 +1297,201 @@ def test_invalidate_stale_dependency_edges_no_old_hashes_returns_empty():
 
     assert changed == set()
     reset_state()
+
+
+# --- config / dependency change invalidation tests (Tier 1 & 2) ---
+
+
+def _config_for_invalidation(**overrides):
+    base = dict(
+        also_copy=[],
+        only_mutate=[],
+        do_not_mutate=[],
+        do_not_mutate_patterns=[],
+        max_stack_depth=-1,
+        debug=False,
+        source_paths=[pathlib.Path("src")],
+        pytest_add_cli_args=[],
+        pytest_add_cli_args_test_selection=[],
+        mutate_only_covered_lines=False,
+        timeout_multiplier=15.0,
+        timeout_constant=1.0,
+        type_check_command=[],
+        use_setproctitle=False,
+        track_dependencies=True,
+        dependency_tracking_depth=None,
+        cache_invalidation_files=[],
+        on_dependency_change="warn",
+    )
+    base.update(overrides)
+    return Config(**base)
+
+
+def _write_meta(exit_code_by_key, src_rel="src/mymod.py"):
+    """Create a source file under a mutatable source dir plus its .meta, return the path."""
+    src = pathlib.Path(src_rel)
+    src.parent.mkdir(parents=True, exist_ok=True)
+    src.write_text("def foo():\n    return 1\n")
+    m = SourceFileMutationData(path=src)
+    m.exit_code_by_key = dict(exit_code_by_key)
+    m.meta_path.parent.mkdir(parents=True, exist_ok=True)
+    m.save()
+    return src
+
+
+def _load_results(src_rel="src/mymod.py"):
+    m = SourceFileMutationData(path=pathlib.Path(src_rel))
+    m.load()
+    return m.exit_code_by_key
+
+
+def test_reset_mutant_results_resets_only_matching(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation())
+    _write_meta({"a": 36, "b": 0, "c": None})  # timeout, survived, uncached
+
+    reset = _reset_mutant_results(lambda key, exit_code: exit_code == 36)
+
+    assert reset == 1
+    results = _load_results()
+    assert results == {"a": None, "b": 0, "c": None}
+
+
+def test_timeout_config_change_resets_only_timeouts(tmp_path, monkeypatch):
+    """Changing timeout config invalidates timeout verdicts but keeps killed/survived."""
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    old_cfg = _config_for_invalidation()
+    state().old_config_fingerprint = old_cfg.config_fingerprint()
+
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(timeout_multiplier=30.0))
+    _write_meta({"timed_out": 36, "killed": 1, "survived": 0})
+
+    force_full = _apply_config_change_invalidation({})
+
+    assert force_full is False
+    assert _load_results() == {"timed_out": None, "killed": 1, "survived": 0}
+    reset_state()
+
+
+def test_type_check_config_change_resets_symmetric_difference(tmp_path, monkeypatch):
+    """A type-check command change re-tests verdicts whose type-check status flips."""
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    old_cfg = _config_for_invalidation(type_check_command=["old"])
+    state().old_config_fingerprint = old_cfg.config_fingerprint()
+
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(type_check_command=["new"]))
+    # was_caught: cached 37 but no longer caught -> reset; now_caught: survived but newly caught -> reset;
+    # still_caught: 37 and still caught -> keep; untouched: survived and not caught -> keep
+    _write_meta({"was_caught": 37, "now_caught": 0, "still_caught": 37, "untouched": 0})
+
+    force_full = _apply_config_change_invalidation({"now_caught": object(), "still_caught": object()})
+
+    assert force_full is False
+    assert _load_results() == {
+        "was_caught": None,
+        "now_caught": None,
+        "still_caught": 37,
+        "untouched": 0,
+    }
+    reset_state()
+
+
+def test_global_pytest_change_forces_full_rerun(tmp_path, monkeypatch):
+    """A pytest-arg change resets all results and requests full stats recollection."""
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    state().old_config_fingerprint = _config_for_invalidation().config_fingerprint()
+    mutmut.duration_by_test["test_x"] = 1.0
+    mutmut.tests_by_mangled_function_name["mod.x_foo"] = {"test_x"}
+
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(pytest_add_cli_args=["-x"]))
+    _write_meta({"a": 1, "b": 0, "c": 36})
+
+    force_full = _apply_config_change_invalidation({})
+
+    assert force_full is True
+    assert all(v is None for v in _load_results().values())
+    assert not mutmut.duration_by_test
+    assert not mutmut.tests_by_mangled_function_name
+    reset_state()
+
+
+def test_no_config_change_keeps_all_results(tmp_path, monkeypatch):
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    cfg = _config_for_invalidation()
+    state().old_config_fingerprint = cfg.config_fingerprint()
+    monkeypatch.setattr(Config, "get", lambda: cfg)
+    _write_meta({"a": 1, "b": 0, "c": 36})
+
+    force_full = _apply_config_change_invalidation({})
+
+    assert force_full is False
+    assert _load_results() == {"a": 1, "b": 0, "c": 36}
+    reset_state()
+
+
+def test_absent_fingerprint_is_silent(tmp_path, monkeypatch):
+    """A pre-upgrade cache (no stored fingerprint) triggers no invalidation."""
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    # old_config_fingerprint left empty
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(pytest_add_cli_args=["-x"]))
+    _write_meta({"a": 1, "b": 0})
+
+    force_full = _apply_config_change_invalidation({})
+
+    assert force_full is False
+    assert _load_results() == {"a": 1, "b": 0}
+    reset_state()
+
+
+def test_watched_file_change_warn_keeps_cache(tmp_path, monkeypatch, capsys):
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation())
+    pathlib.Path("pyproject.toml").write_text("[project]\nname='x'\n")
+    state().old_watched_file_hashes = {"pyproject.toml": "deadbeef0000"}
+
+    rerun = _report_watched_file_changes()
+
+    assert rerun is False
+    assert "pyproject.toml" in capsys.readouterr().out
+    reset_state()
+
+
+def test_watched_file_change_rerun_policy(tmp_path, monkeypatch):
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(on_dependency_change="rerun"))
+    pathlib.Path("uv.lock").write_text("changed")
+    state().old_watched_file_hashes = {"uv.lock": "deadbeef0000"}
+
+    assert _report_watched_file_changes() is True
+    reset_state()
+
+
+def test_watched_file_absent_old_hashes_is_silent(tmp_path, monkeypatch, capsys):
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation())
+    pathlib.Path("pyproject.toml").write_text("[project]\nname='x'\n")
+    # old_watched_file_hashes left empty
+
+    assert _report_watched_file_changes() is False
+    assert capsys.readouterr().out == ""
+    reset_state()
+
+
+def test_compute_watched_file_hashes_includes_user_globs(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(cache_invalidation_files=["*.sql"]))
+    pathlib.Path("pyproject.toml").write_text("x")
+    pathlib.Path("query.sql").write_text("select 1")
+
+    hashes = compute_watched_file_hashes()
+
+    assert "pyproject.toml" in hashes
+    assert "query.sql" in hashes
diff --git a/tests/test_configuration.py b/tests/test_configuration.py
index bb9feb5c..469d2f47 100644
--- a/tests/test_configuration.py
+++ b/tests/test_configuration.py
@@ -73,6 +73,8 @@ def _get_config(only_mutate: list[str], do_not_mutate: list[str]) -> Config:
             use_setproctitle=False,
             track_dependencies=True,
             dependency_tracking_depth=None,
+            cache_invalidation_files=[],
+            on_dependency_change="warn",
         )
 
     def test_ignores_non_python_files(self):

From d81c127cfb2c950749c92500eb5c57b173302b2b Mon Sep 17 00:00:00 2001
From: nicklafleur <55208706+nicklafleur@users.noreply.github.com>
Date: Sat, 6 Jun 2026 10:49:35 -0400
Subject: [PATCH 5/8] feat: use git to detect non-Python dependency file
 changes

Replace the fixed watched-file list with git-based change detection.
mutmut now uses `git diff`/`git ls-files` to find every non-.py file
changed since the last full run, falling back to the curated list when
git is unavailable. A default exclude set (*.md, *.rst, docs/, LICENSE,
etc.) drops files that never affect tests; users can extend it with
`cache_invalidation_exclude`. The git commit and file hashes are
persisted together as a baseline so a later git-less environment (e.g.
a separate CI stage) can still detect changes to previously-tracked
files by re-hashing them. New options: `use_git_change_detection`
(default true) and `cache_invalidation_exclude`.
---
 README.rst                      |  34 +++++-
 src/mutmut/__main__.py          | 182 +++++++++++++++++++++++++---
 src/mutmut/configuration.py     |   4 +
 src/mutmut/state.py             |   6 +
 tests/mutation/test_mutation.py | 205 ++++++++++++++++++++++++++++++++
 tests/test_configuration.py     |   2 +
 6 files changed, 415 insertions(+), 18 deletions(-)

diff --git a/README.rst b/README.rst
index 7f9c4f03..c0f76865 100644
--- a/README.rst
+++ b/README.rst
@@ -409,8 +409,19 @@ Changes outside your Python source — a dependency upgrade, a data file, a
 config file — cannot be tied to a function, so they would otherwise be missed
 and you would get cached results that no longer reflect reality.
 
-To catch this, mutmut hashes a set of build and dependency files and warns you
-when any of them change since the last run. By default it watches:
+To catch this, mutmut detects non-Python files that changed since the last full
+run and warns you about them. If your project is a git repository and git is
+installed, mutmut uses git (a soft dependency no extra package is required) to
+find every changed non-Python file, respecting your `.gitignore`. Python files
+are excluded because their changes are already tracked per function.
+
+On a full run with git available, mutmut also records the content hashes of the
+tracked non-Python files. This means a later run in an environment without git
+(for example a different CI stage) can still detect changes to that known set of
+files, even though it cannot discover brand-new ones.
+
+When git is unavailable, mutmut falls back to hashing a curated set of build and
+dependency files:
 
 - `pyproject.toml`
 - `setup.cfg`
@@ -423,12 +434,22 @@ when any of them change since the last run. By default it watches:
 
 You can watch additional files (for example data files your tests depend on)
 with the `cache_invalidation_files` config, which accepts glob patterns
-resolved against the project root:
+resolved against the project root. These are checked even when git ignores them,
+and are never dropped by the exclusions below:
 
 .. code-block:: toml
 
     cache_invalidation_files = [ "queries/*.sql", "config/*.yaml" ]
 
+Git detection reports every changed non-Python file, so mutmut drops files that
+practically never affect tests (markdown, `LICENSE`, `CHANGELOG`, `docs/`, git
+and editor metadata, ...). Exclude additional noisy files with
+`cache_invalidation_exclude` (glob patterns, `*` spans directories):
+
+.. code-block:: toml
+
+    cache_invalidation_exclude = [ "*.json", "fixtures/snapshots/*" ]
+
 When a watched file changes, `on_dependency_change` controls what happens:
 
 - `warn` (default): list the changed files and keep the cache.
@@ -439,6 +460,13 @@ When a watched file changes, `on_dependency_change` controls what happens:
 
     on_dependency_change = "warn"
 
+Git detection is on by default; disable it (forcing the curated-list fallback)
+with:
+
+.. code-block:: toml
+
+    use_git_change_detection = false
+
 Changes to mutmut's own result-affecting config (such as `pytest_add_cli_args`,
 `type_check_command`, or the timeout settings) are always detected and
 invalidate the affected cached results automatically.
diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py
index 8f98480b..01afe7fe 100644
--- a/src/mutmut/__main__.py
+++ b/src/mutmut/__main__.py
@@ -835,18 +835,166 @@ def _invalidate_stale_dependency_edges() -> set[str]:
     "Pipfile.lock",
 )
 
+# Files that practically never affect test behavior. Git change detection otherwise
+# surfaces every non-.py file in the repo, so these are dropped to cut the noise.
+# Users extend this via the ``cache_invalidation_exclude`` config; anything they
+# explicitly register in ``cache_invalidation_files`` is never excluded. Patterns are
+# matched with fnmatch (``*`` spans path separators).
+_DEFAULT_INVALIDATION_EXCLUDE = (
+    "*.md",
+    "*.rst",
+    "LICENSE*",
+    "COPYING*",
+    "NOTICE*",
+    "AUTHORS*",
+    "CHANGELOG*",
+    "CHANGES*",
+    ".gitignore",
+    ".gitattributes",
+    ".editorconfig",
+    ".pre-commit-config.yaml",
+    "docs/*",
+    "doc/*",
+)
+
+
+def _hash_files(paths: Iterable[str]) -> dict[str, str]:
+    """Content hash each existing path; missing files are simply omitted."""
+    hashes: dict[str, str] = {}
+    for p in paths:
+        path = Path(p)
+        if path.is_file():
+            hashes[p] = hashlib.sha256(path.read_bytes()).hexdigest()[:12]
+    return hashes
+
 
 def compute_watched_file_hashes() -> dict[str, str]:
     """Map watched-file path -> content hash for the default set plus user globs."""
     patterns = list(_DEFAULT_WATCHED_FILES) + list(Config.get().cache_invalidation_files)
-    hashes: dict[str, str] = {}
-    for pattern in patterns:
-        for path in sorted(Path(".").glob(pattern)):
-            if path.is_file():
-                hashes[str(path)] = hashlib.sha256(path.read_bytes()).hexdigest()[:12]
+    paths = [str(path) for pattern in patterns for path in sorted(Path(".").glob(pattern))]
+    return _hash_files(paths)
+
+
+def _run_git(args: list[str]) -> str | None:
+    """Run a git command at the project root. Returns stdout, or None on any failure
+    (git not installed, not a repo, unknown ref, ...). Git is a soft dependency: this
+    never raises so callers can silently fall back to content hashing.
+    """
+    try:
+        result = subprocess.run(["git", *args], capture_output=True, text=True, check=False)
+    except OSError:
+        return None
+    if result.returncode != 0:
+        return None
+    return result.stdout
+
+
+def git_head() -> str | None:
+    """The current HEAD commit, or None when git / a repo / a commit is unavailable."""
+    out = _run_git(["rev-parse", "HEAD"])
+    return out.strip() if out else None
+
+
+def git_changed_non_py_files(since_ref: str) -> set[str] | None:
+    """Non-.py files changed since ``since_ref`` (tracked diffs against the working tree,
+    including uncommitted edits, plus new untracked files). ``.py`` files are excluded
+    because the per-function hashes already track them. Returns None if git cannot answer.
+    """
+    diff = _run_git(["diff", "--name-only", since_ref, "--"])
+    if diff is None:
+        return None
+    untracked = _run_git(["ls-files", "--others", "--exclude-standard"]) or ""
+    files = {line for line in (diff + "\n" + untracked).splitlines() if line}
+    return {f for f in files if not f.endswith(".py")}
+
+
+def git_tracked_non_py_files() -> set[str] | None:
+    """Every non-.py file git knows about (tracked + untracked-not-ignored), or None if
+    git cannot answer. Recorded on a full run so a later git-less run can still detect
+    changes to these files by re-hashing them.
+    """
+    out = _run_git(["ls-files", "--cached", "--others", "--exclude-standard"])
+    if out is None:
+        return None
+    return {line for line in out.splitlines() if line and not line.endswith(".py")}
+
+
+def _changed_hashed_files(restrict_to: list[str] | None = None) -> set[str]:
+    """Baseline files whose content changed, by re-hashing them now.
+
+    Re-hashes every path in the stored baseline (which, after a full run with git, is
+    the comprehensive set of non-.py files) plus any newly-appearing curated/user-glob
+    files. This is how a git-less run still detects changes to files git discovered.
+    ``restrict_to`` limits the result to paths matching those glob patterns.
+    """
+    old = state().old_watched_file_hashes
+    if not old:
+        return set()
+    new = _hash_files(old.keys())
+    new.update(compute_watched_file_hashes())  # pick up newly-added curated/user files
+    changed = {p for p in old.keys() | new.keys() if old.get(p) != new.get(p)}
+    if restrict_to is not None:
+        changed = {p for p in changed if any(fnmatch.fnmatch(p, pat) for pat in restrict_to)}
+    return changed
+
+
+def _is_excluded(path: str, config: Config) -> bool:
+    """Whether ``path`` should be dropped from change reporting as noise.
+
+    Files explicitly registered in ``cache_invalidation_files`` are never excluded.
+    """
+    if any(fnmatch.fnmatch(path, pat) for pat in config.cache_invalidation_files):
+        return False
+    patterns = list(_DEFAULT_INVALIDATION_EXCLUDE) + list(config.cache_invalidation_exclude)
+    return any(fnmatch.fnmatch(path, pat) for pat in patterns)
+
+
+def _changed_dependency_files() -> set[str]:
+    """Files changed since the last full run that the per-function hashes cannot track.
+
+    Prefers git (catches every non-.py file in the repo and respects .gitignore) and
+    falls back to hashing a curated set of build/dependency files when git is
+    unavailable. Silent on the first run (no baseline to compare against). Noisy files
+    (see ``_DEFAULT_INVALIDATION_EXCLUDE`` and ``cache_invalidation_exclude``) are dropped.
+    """
+    config = Config.get()
+    old_commit = state().old_git_commit
+    if config.use_git_change_detection and old_commit is not None:
+        git_changed = git_changed_non_py_files(old_commit)
+        if git_changed is not None:
+            # also catch explicitly-registered files that git ignores
+            changed = git_changed | _changed_hashed_files(restrict_to=config.cache_invalidation_files)
+        else:
+            changed = _changed_hashed_files()
+    else:
+        changed = _changed_hashed_files()
+    return {p for p in changed if not _is_excluded(p, config)}
+
+
+def _compute_baseline_file_hashes() -> dict[str, str]:
+    """The set of non-.py files to track, hashed. Always includes the curated/user-glob
+    files; when git is available it also records every tracked non-.py file (minus noise)
+    so a later git-less run can still detect changes to them.
+    """
+    config = Config.get()
+    hashes = compute_watched_file_hashes()
+    if config.use_git_change_detection:
+        tracked = git_tracked_non_py_files()
+        if tracked is not None:
+            hashes.update(_hash_files(sorted(p for p in tracked if not _is_excluded(p, config))))
     return hashes
 
 
+def _refresh_change_detection_baseline() -> None:
+    """Snapshot the current git commit and tracked-file hashes as the new baseline.
+
+    Only called on a full run; cached runs keep the previous baseline so a ``warn``
+    keeps firing until the cache is actually rebuilt.
+    """
+    state().git_commit = git_head()
+    state().watched_file_hashes = _compute_baseline_file_hashes()
+
+
 def _reset_mutant_results(should_reset: Callable[[str, int], bool]) -> int:
     """Reset cached verdicts to ``None`` (forcing a re-test) where ``should_reset`` holds.
 
@@ -871,27 +1019,24 @@ def _reset_mutant_results(should_reset: Callable[[str, int], bool]) -> int:
 
 
 def _report_watched_file_changes() -> bool:
-    """Surface changes to watched config/dependency files.
+    """Surface non-Python files that changed since the last full run.
 
     Returns True only when the configured policy is ``rerun`` and something changed,
-    asking the caller to reset all results. Silent when no prior hashes exist.
+    asking the caller to reset all results. Silent when there is no baseline yet.
     """
-    old = state().old_watched_file_hashes
-    if not old:
-        return False
-    new = compute_watched_file_hashes()
-    changed = sorted(p for p in old.keys() | new.keys() if old.get(p) != new.get(p))
+    changed = _changed_dependency_files()
     if not changed:
         return False
 
     policy = Config.get().on_dependency_change
     if policy == "ignore":
         return False
+    listed = sorted(changed)
     if policy == "rerun":
-        print(f"    {len(changed)} watched file(s) changed; rerunning all mutants: {', '.join(changed)}")
+        print(f"    {len(listed)} non-Python file(s) changed; rerunning all mutants: {', '.join(listed)}")
         return True
     # default: warn but keep the cache
-    print(f"    Warning: {len(changed)} watched file(s) changed since the last run: {', '.join(changed)}")
+    print(f"    Warning: {len(listed)} non-Python file(s) changed since the last full run: {', '.join(listed)}")
     print("    These cannot be tracked for behavioral changes, so cached results were kept.")
     print('    If the changes affect your tests, delete the mutants/ directory or set on_dependency_change = "rerun".')
     return False
@@ -945,6 +1090,8 @@ def collect_or_load_stats(
         force_full = _apply_config_change_invalidation(mutants_caught_by_type_checker or {})
 
     if not did_load or force_full:
+        # A full run rebuilds the cache, so reset the change-detection baseline to "now".
+        _refresh_change_detection_baseline()
         # Run full stats
         run_stats_collection(runner)
     else:
@@ -986,6 +1133,10 @@ def load_stats() -> bool:
                 state().function_dependencies[k] = set(v)
             state().old_config_fingerprint = data.pop("config_fingerprint", {})
             state().old_watched_file_hashes = data.pop("watched_file_hashes", {})
+            state().old_git_commit = data.pop("git_commit", None)
+            # Preserve the loaded baseline; only a full run refreshes it.
+            state().watched_file_hashes = state().old_watched_file_hashes
+            state().git_commit = state().old_git_commit
             assert not data, data
             did_load = True
     except (FileNotFoundError, JSONDecodeError):
@@ -1003,7 +1154,8 @@ def save_stats() -> None:
                 function_hashes=state().current_function_hashes,
                 function_dependencies={k: list(v) for k, v in state().function_dependencies.items()},
                 config_fingerprint=Config.get().config_fingerprint(),
-                watched_file_hashes=compute_watched_file_hashes(),
+                watched_file_hashes=state().watched_file_hashes,
+                git_commit=state().git_commit,
             ),
             f,
             indent=4,
diff --git a/src/mutmut/configuration.py b/src/mutmut/configuration.py
index 98fce390..24b275a6 100644
--- a/src/mutmut/configuration.py
+++ b/src/mutmut/configuration.py
@@ -146,7 +146,9 @@ def _load_config() -> Config:
         track_dependencies=s("track_dependencies", True),
         dependency_tracking_depth=s("dependency_tracking_depth", None),
         cache_invalidation_files=s("cache_invalidation_files", []),
+        cache_invalidation_exclude=s("cache_invalidation_exclude", []),
         on_dependency_change=s("on_dependency_change", "warn"),
+        use_git_change_detection=s("use_git_change_detection", True),
     )
 
 
@@ -172,7 +174,9 @@ class Config:
     track_dependencies: bool
     dependency_tracking_depth: int | None
     cache_invalidation_files: list[str]
+    cache_invalidation_exclude: list[str]
     on_dependency_change: str
+    use_git_change_detection: bool
 
     def config_fingerprint(self) -> dict[str, str]:
         """Hash the config fields that can change cached mutant *results*, grouped so the
diff --git a/src/mutmut/state.py b/src/mutmut/state.py
index c53a3ec4..c1020898 100644
--- a/src/mutmut/state.py
+++ b/src/mutmut/state.py
@@ -12,7 +12,13 @@ class MutmutState:
     # changes the per-function source hashes cannot see. Empty when absent (pre-upgrade
     # cache or first run), in which case no invalidation is triggered.
     old_config_fingerprint: dict[str, str] = field(default_factory=dict)
+    # Change-detection baselines describe the state at the *last full run*. The ``old_``
+    # values are what we compare against; the others are what gets persisted (only
+    # refreshed on a full run, so a ``warn`` keeps firing until the cache is rebuilt).
     old_watched_file_hashes: dict[str, str] = field(default_factory=dict)
+    watched_file_hashes: dict[str, str] = field(default_factory=dict)
+    old_git_commit: str | None = None
+    git_commit: str | None = None
 
 
 _state: MutmutState | None = None
diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py
index 9be27484..d7a4d3ac 100644
--- a/tests/mutation/test_mutation.py
+++ b/tests/mutation/test_mutation.py
@@ -1,5 +1,7 @@
 import os
 import pathlib
+import shutil
+import subprocess
 import tempfile
 from collections import defaultdict
 from unittest.mock import Mock
@@ -12,12 +14,17 @@
 from mutmut.__main__ import CatchOutput
 from mutmut.__main__ import MutmutProgrammaticFailException
 from mutmut.__main__ import _apply_config_change_invalidation
+from mutmut.__main__ import _changed_dependency_files
 from mutmut.__main__ import _cleanup_stale_stats
 from mutmut.__main__ import _invalidate_stale_dependency_edges
+from mutmut.__main__ import _refresh_change_detection_baseline
 from mutmut.__main__ import _report_watched_file_changes
 from mutmut.__main__ import _reset_mutant_results
 from mutmut.__main__ import compute_watched_file_hashes
 from mutmut.__main__ import get_diff_for_mutant
+from mutmut.__main__ import git_changed_non_py_files
+from mutmut.__main__ import git_head
+from mutmut.__main__ import git_tracked_non_py_files
 from mutmut.__main__ import mangled_name_from_mutant_name
 from mutmut.__main__ import orig_function_and_class_names_from_key
 from mutmut.__main__ import record_trampoline_hit
@@ -1321,7 +1328,9 @@ def _config_for_invalidation(**overrides):
         track_dependencies=True,
         dependency_tracking_depth=None,
         cache_invalidation_files=[],
+        cache_invalidation_exclude=[],
         on_dependency_change="warn",
+        use_git_change_detection=True,
     )
     base.update(overrides)
     return Config(**base)
@@ -1495,3 +1504,199 @@ def test_compute_watched_file_hashes_includes_user_globs(tmp_path, monkeypatch):
 
     assert "pyproject.toml" in hashes
     assert "query.sql" in hashes
+
+
+# --- git-based change detection (soft dependency) ---
+
+_GIT = shutil.which("git")
+requires_git = pytest.mark.skipif(_GIT is None, reason="git not installed")
+
+
+def _git(args, cwd):
+    subprocess.run([_GIT, *args], cwd=cwd, check=True, capture_output=True, text=True)
+
+
+def _init_repo(path):
+    _git(["init"], path)
+    _git(["config", "user.email", "t@example.com"], path)
+    _git(["config", "user.name", "Test"], path)
+    _git(["config", "commit.gpgsign", "false"], path)
+
+
+def _commit_all(path, message="commit"):
+    _git(["add", "-A"], path)
+    _git(["commit", "-m", message], path)
+
+
+@requires_git
+def test_git_head_returns_commit(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "a.txt").write_text("1")
+    _commit_all(tmp_path)
+
+    head = git_head()
+
+    assert head and len(head) == 40
+
+
+def test_git_head_none_outside_repo(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    assert git_head() is None
+
+
+@requires_git
+def test_git_changed_non_py_files_detects_and_excludes_python(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "conf.yml").write_text("a: 1")
+    (tmp_path / "mod.py").write_text("x = 1")
+    _commit_all(tmp_path)
+    base = git_head()
+
+    (tmp_path / "conf.yml").write_text("a: 2")  # tracked non-.py modified
+    (tmp_path / "mod.py").write_text("x = 2")  # tracked .py modified (excluded)
+    (tmp_path / "data.sql").write_text("select 1")  # new untracked non-.py
+
+    assert git_changed_non_py_files(base) == {"conf.yml", "data.sql"}
+
+
+@requires_git
+def test_git_changed_non_py_files_bad_ref_returns_none(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "a.txt").write_text("1")
+    _commit_all(tmp_path)
+
+    assert git_changed_non_py_files("deadbeef" * 5) is None
+
+
+@requires_git
+def test_changed_dependency_files_prefers_git_over_curated_list(tmp_path, monkeypatch):
+    """Git catches a non-.py file that is not in the curated watched list."""
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "config.yaml").write_text("a: 1")
+    _commit_all(tmp_path)
+    state().old_git_commit = git_head()
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation())
+
+    (tmp_path / "config.yaml").write_text("a: 2")
+
+    assert "config.yaml" in _changed_dependency_files()
+    reset_state()
+
+
+@requires_git
+def test_use_git_change_detection_false_falls_back_to_curated(tmp_path, monkeypatch):
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "config.yaml").write_text("a: 1")
+    _commit_all(tmp_path)
+    state().old_git_commit = git_head()
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(use_git_change_detection=False))
+
+    (tmp_path / "config.yaml").write_text("a: 2")
+
+    # config.yaml is not in the curated list and git is disabled -> not reported
+    assert "config.yaml" not in _changed_dependency_files()
+    reset_state()
+
+
+@requires_git
+def test_default_exclude_drops_noisy_files(tmp_path, monkeypatch):
+    """Docs / markdown changes are dropped by the default exclude list."""
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "README.md").write_text("hi")
+    (tmp_path / "config.yaml").write_text("a: 1")
+    _commit_all(tmp_path)
+    state().old_git_commit = git_head()
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation())
+
+    (tmp_path / "README.md").write_text("changed")
+    (tmp_path / "config.yaml").write_text("a: 2")
+
+    changed = _changed_dependency_files()
+    assert "config.yaml" in changed
+    assert "README.md" not in changed
+    reset_state()
+
+
+@requires_git
+def test_user_exclude_pattern_drops_file(tmp_path, monkeypatch):
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "noisy.json").write_text("1")
+    _commit_all(tmp_path)
+    state().old_git_commit = git_head()
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(cache_invalidation_exclude=["*.json"]))
+
+    (tmp_path / "noisy.json").write_text("2")
+
+    assert "noisy.json" not in _changed_dependency_files()
+    reset_state()
+
+
+@requires_git
+def test_registered_file_is_immune_to_exclusion(tmp_path, monkeypatch):
+    """A file explicitly registered in cache_invalidation_files is never excluded."""
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "notes.md").write_text("a")  # *.md is excluded by default
+    _commit_all(tmp_path)
+    state().old_git_commit = git_head()
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(cache_invalidation_files=["notes.md"]))
+
+    (tmp_path / "notes.md").write_text("b")
+
+    assert "notes.md" in _changed_dependency_files()
+    reset_state()
+
+
+@requires_git
+def test_git_tracked_non_py_files_lists_tracked_and_excludes_python(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "config.yaml").write_text("a: 1")
+    (tmp_path / "mod.py").write_text("x = 1")
+    _commit_all(tmp_path)
+    (tmp_path / "data.sql").write_text("select 1")  # untracked but not ignored
+
+    tracked = git_tracked_non_py_files()
+
+    assert "config.yaml" in tracked
+    assert "data.sql" in tracked
+    assert "mod.py" not in tracked
+
+
+@requires_git
+def test_baseline_records_git_files_for_gitless_fallback(tmp_path, monkeypatch):
+    """A full run with git records all tracked non-.py files, so a later run without
+    git can still detect changes to them by re-hashing."""
+    reset_state()
+    monkeypatch.chdir(tmp_path)
+    _init_repo(tmp_path)
+    (tmp_path / "config.yaml").write_text("a: 1")
+    (tmp_path / "README.md").write_text("hi")  # excluded by default
+    _commit_all(tmp_path)
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation())
+
+    _refresh_change_detection_baseline()
+    baseline = state().watched_file_hashes
+    assert "config.yaml" in baseline  # recorded for the gitless fallback
+    assert "README.md" not in baseline  # noise stays out of the baseline
+
+    # simulate a later run in an environment without git
+    state().old_watched_file_hashes = baseline
+    state().old_git_commit = None
+    monkeypatch.setattr(Config, "get", lambda: _config_for_invalidation(use_git_change_detection=False))
+    (tmp_path / "config.yaml").write_text("a: 2")
+
+    assert "config.yaml" in _changed_dependency_files()
+    reset_state()
diff --git a/tests/test_configuration.py b/tests/test_configuration.py
index 469d2f47..7a240911 100644
--- a/tests/test_configuration.py
+++ b/tests/test_configuration.py
@@ -74,7 +74,9 @@ def _get_config(only_mutate: list[str], do_not_mutate: list[str]) -> Config:
             track_dependencies=True,
             dependency_tracking_depth=None,
             cache_invalidation_files=[],
+            cache_invalidation_exclude=[],
             on_dependency_change="warn",
+            use_git_change_detection=True,
         )
 
     def test_ignores_non_python_files(self):

From 19f9dcd15820e24978cb5a6ea70c6e01edb01529 Mon Sep 17 00:00:00 2001
From: nicklafleur <55208706+nicklafleur@users.noreply.github.com>
Date: Sat, 6 Jun 2026 11:05:12 -0400
Subject: [PATCH 6/8] HISTORY

---
 HISTORY.rst | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/HISTORY.rst b/HISTORY.rst
index 0c471f3d..d1a91a2f 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -1,6 +1,22 @@
 Changelog
 ---------
 
+Unreleased
+~~~~~~~~~~
+
+* Per-function source hashing for incremental cache invalidation — only re-test mutants in functions that changed
+
+* Cross-call dependency tracking — invalidate mutants in callers when a called function changes
+
+* Use git to detect non-Python dependency file changes; falls back to a curated file list when git is unavailable
+
+* Add `cache_invalidation_exclude` config to suppress noisy files from change detection
+
+* Add `use_git_change_detection` config (default true) to opt out of git-based detection
+
+* Invalidate cached results automatically when result-affecting config fields change
+
+
 3.6.0
 ~~~~~
 

From e18ec31486be069fad6d8b4c9240840711224cb2 Mon Sep 17 00:00:00 2001
From: nicklafleur <55208706+nicklafleur@users.noreply.github.com>
Date: Sat, 6 Jun 2026 11:07:30 -0400
Subject: [PATCH 7/8] lock

---
 uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index b956dad4..d17d7fe5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -375,7 +375,7 @@ wheels = [
 
 [[package]]
 name = "mutmut"
-version = "3.5.0"
+version = "3.6.0"
 source = { editable = "." }
 dependencies = [
     { name = "click" },

From f35583e17e9bade258f584add548c993325d9410 Mon Sep 17 00:00:00 2001
From: nicklafleur <55208706+nicklafleur@users.noreply.github.com>
Date: Tue, 9 Jun 2026 15:56:43 -0400
Subject: [PATCH 8/8] fix: address three cache-correctness bugs from Copilot
 review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Return cached function hashes for mtime-skipped files so
  _cleanup_stale_stats and _invalidate_stale_dependency_edges don't
  treat unchanged files as deleted; use get_mutant_name at both call
  sites instead of inlining the path→module conversion
- Change dependency_tracking_depth default from None to -1 so
  setup.cfg values are correctly coerced to int by the config loader;
  narrow type from int | None to int and drop the conditional in
  run_stats_collection
- Fix benchmark fixture to yield unconditionally so it doesn't fail
  when BENCHMARK_TEST_DELAY=0
---
 e2e_projects/benchmark_1k/tests/conftest.py |  2 +-
 src/mutmut/__main__.py                      | 16 +++++++++-------
 src/mutmut/configuration.py                 |  4 ++--
 tests/mutation/test_mutation.py             |  2 +-
 tests/test_configuration.py                 |  4 ++--
 5 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/e2e_projects/benchmark_1k/tests/conftest.py b/e2e_projects/benchmark_1k/tests/conftest.py
index acf81987..2b7a61f9 100644
--- a/e2e_projects/benchmark_1k/tests/conftest.py
+++ b/e2e_projects/benchmark_1k/tests/conftest.py
@@ -28,4 +28,4 @@ def benchmark_test_delay():
         jittered = random.gauss(_test_delay, _test_delay * 0.1)
         # Clamp to 0.01s
         time.sleep(max(0.01, jittered))
-        yield
+    yield
diff --git a/src/mutmut/__main__.py b/src/mutmut/__main__.py
index 01afe7fe..9ade02e8 100644
--- a/src/mutmut/__main__.py
+++ b/src/mutmut/__main__.py
@@ -305,7 +305,12 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe
         # source_mtime == mutant_mtime: only copied, otherwise the mutant file is untouched
         # source_mtime < mutant_mtime: the mutations have been saved after copying; source file untouched
         if source_mtime < mutant_mtime:
-            return FileMutationResult(unmodified=True)
+            data = SourceFileMutationData(path=filename)
+            data.load()
+            return FileMutationResult(
+                unmodified=True,
+                current_hashes={get_mutant_name(filename, func): h for func, h in data.hash_by_function_name.items()},
+            )
     except OSError:
         pass
 
@@ -347,11 +352,8 @@ def create_mutants_for_file(filename: Path, output_path: Path) -> FileMutationRe
     data.hash_by_function_name = hash_by_function_name
     data.save()
 
-    module_name = strip_prefix(str(filename)[: -len(filename.suffix)].replace(os.sep, "."), prefix="src.")
-    current_hashes_qualified = {
-        f"{module_name}.{func}".replace(".__init__.", "."): h for func, h in hash_by_function_name.items()
-    }
-    changed_functions_qualified = {f"{module_name}.{func}".replace(".__init__.", ".") for func in changed}
+    current_hashes_qualified = {get_mutant_name(filename, func): h for func, h in hash_by_function_name.items()}
+    changed_functions_qualified = {get_mutant_name(filename, func) for func in changed}
 
     return FileMutationResult(
         warnings=warnings,
@@ -744,7 +746,7 @@ def run_stats_collection(runner: TestRunner, tests: Iterable[str] | None = None)
     os.environ["MUTANT_UNDER_TEST"] = "stats"
     os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1"
     depth = Config.get().dependency_tracking_depth
-    os.environ["MUTMUT_DEPENDENCY_DEPTH"] = str(depth) if depth is not None else "-1"
+    os.environ["MUTMUT_DEPENDENCY_DEPTH"] = str(depth)
     start_cpu_time = process_time()
 
     with CatchOutput(spinner_title="Running stats") as output_catcher:
diff --git a/src/mutmut/configuration.py b/src/mutmut/configuration.py
index 24b275a6..ec9ef8b7 100644
--- a/src/mutmut/configuration.py
+++ b/src/mutmut/configuration.py
@@ -144,7 +144,7 @@ def _load_config() -> Config:
             "use_setproctitle", not platform.system() == "Darwin"
         ),  # False on Mac, true otherwise as default (https://github.com/boxed/mutmut/pull/450#issuecomment-4002571055)
         track_dependencies=s("track_dependencies", True),
-        dependency_tracking_depth=s("dependency_tracking_depth", None),
+        dependency_tracking_depth=s("dependency_tracking_depth", -1),
         cache_invalidation_files=s("cache_invalidation_files", []),
         cache_invalidation_exclude=s("cache_invalidation_exclude", []),
         on_dependency_change=s("on_dependency_change", "warn"),
@@ -172,7 +172,7 @@ class Config:
     type_check_command: list[str]
     use_setproctitle: bool
     track_dependencies: bool
-    dependency_tracking_depth: int | None
+    dependency_tracking_depth: int
     cache_invalidation_files: list[str]
     cache_invalidation_exclude: list[str]
     on_dependency_change: str
diff --git a/tests/mutation/test_mutation.py b/tests/mutation/test_mutation.py
index d7a4d3ac..579a381b 100644
--- a/tests/mutation/test_mutation.py
+++ b/tests/mutation/test_mutation.py
@@ -1326,7 +1326,7 @@ def _config_for_invalidation(**overrides):
         type_check_command=[],
         use_setproctitle=False,
         track_dependencies=True,
-        dependency_tracking_depth=None,
+        dependency_tracking_depth=-1,
         cache_invalidation_files=[],
         cache_invalidation_exclude=[],
         on_dependency_change="warn",
diff --git a/tests/test_configuration.py b/tests/test_configuration.py
index 7a240911..bb834783 100644
--- a/tests/test_configuration.py
+++ b/tests/test_configuration.py
@@ -72,7 +72,7 @@ def _get_config(only_mutate: list[str], do_not_mutate: list[str]) -> Config:
             type_check_command=[],
             use_setproctitle=False,
             track_dependencies=True,
-            dependency_tracking_depth=None,
+            dependency_tracking_depth=-1,
             cache_invalidation_files=[],
             cache_invalidation_exclude=[],
             on_dependency_change="warn",
@@ -349,7 +349,7 @@ def test_uses_defaults_when_no_config(self, in_tmp_dir: Path):
         assert config.timeout_constant == 1.0
         assert config.type_check_command == []
         assert config.track_dependencies is True
-        assert config.dependency_tracking_depth is None
+        assert config.dependency_tracking_depth == -1
 
     def test_also_copy_includes_defaults(self, in_tmp_dir: Path):
         (in_tmp_dir / "src").mkdir()