@@ -835,18 +835,166 @@ def _invalidate_stale_dependency_edges() -> set[str]:
835835 "Pipfile.lock" ,
836836)
837837
838+ # Files that practically never affect test behavior. Git change detection otherwise
839+ # surfaces every non-.py file in the repo, so these are dropped to cut the noise.
840+ # Users extend this via the ``cache_invalidation_exclude`` config; anything they
841+ # explicitly register in ``cache_invalidation_files`` is never excluded. Patterns are
842+ # matched with fnmatch (``*`` spans path separators).
843+ _DEFAULT_INVALIDATION_EXCLUDE = (
844+ "*.md" ,
845+ "*.rst" ,
846+ "LICENSE*" ,
847+ "COPYING*" ,
848+ "NOTICE*" ,
849+ "AUTHORS*" ,
850+ "CHANGELOG*" ,
851+ "CHANGES*" ,
852+ ".gitignore" ,
853+ ".gitattributes" ,
854+ ".editorconfig" ,
855+ ".pre-commit-config.yaml" ,
856+ "docs/*" ,
857+ "doc/*" ,
858+ )
859+
860+
861+ def _hash_files (paths : Iterable [str ]) -> dict [str , str ]:
862+ """Content hash each existing path; missing files are simply omitted."""
863+ hashes : dict [str , str ] = {}
864+ for p in paths :
865+ path = Path (p )
866+ if path .is_file ():
867+ hashes [p ] = hashlib .sha256 (path .read_bytes ()).hexdigest ()[:12 ]
868+ return hashes
869+
838870
839871def compute_watched_file_hashes () -> dict [str , str ]:
840872 """Map watched-file path -> content hash for the default set plus user globs."""
841873 patterns = list (_DEFAULT_WATCHED_FILES ) + list (Config .get ().cache_invalidation_files )
842- hashes : dict [str , str ] = {}
843- for pattern in patterns :
844- for path in sorted (Path ("." ).glob (pattern )):
845- if path .is_file ():
846- hashes [str (path )] = hashlib .sha256 (path .read_bytes ()).hexdigest ()[:12 ]
874+ paths = [str (path ) for pattern in patterns for path in sorted (Path ("." ).glob (pattern ))]
875+ return _hash_files (paths )
876+
877+
878+ def _run_git (args : list [str ]) -> str | None :
879+ """Run a git command at the project root. Returns stdout, or None on any failure
880+ (git not installed, not a repo, unknown ref, ...). Git is a soft dependency: this
881+ never raises so callers can silently fall back to content hashing.
882+ """
883+ try :
884+ result = subprocess .run (["git" , * args ], capture_output = True , text = True , check = False )
885+ except OSError :
886+ return None
887+ if result .returncode != 0 :
888+ return None
889+ return result .stdout
890+
891+
892+ def git_head () -> str | None :
893+ """The current HEAD commit, or None when git / a repo / a commit is unavailable."""
894+ out = _run_git (["rev-parse" , "HEAD" ])
895+ return out .strip () if out else None
896+
897+
898+ def git_changed_non_py_files (since_ref : str ) -> set [str ] | None :
899+ """Non-.py files changed since ``since_ref`` (tracked diffs against the working tree,
900+ including uncommitted edits, plus new untracked files). ``.py`` files are excluded
901+ because the per-function hashes already track them. Returns None if git cannot answer.
902+ """
903+ diff = _run_git (["diff" , "--name-only" , since_ref , "--" ])
904+ if diff is None :
905+ return None
906+ untracked = _run_git (["ls-files" , "--others" , "--exclude-standard" ]) or ""
907+ files = {line for line in (diff + "\n " + untracked ).splitlines () if line }
908+ return {f for f in files if not f .endswith (".py" )}
909+
910+
911+ def git_tracked_non_py_files () -> set [str ] | None :
912+ """Every non-.py file git knows about (tracked + untracked-not-ignored), or None if
913+ git cannot answer. Recorded on a full run so a later git-less run can still detect
914+ changes to these files by re-hashing them.
915+ """
916+ out = _run_git (["ls-files" , "--cached" , "--others" , "--exclude-standard" ])
917+ if out is None :
918+ return None
919+ return {line for line in out .splitlines () if line and not line .endswith (".py" )}
920+
921+
922+ def _changed_hashed_files (restrict_to : list [str ] | None = None ) -> set [str ]:
923+ """Baseline files whose content changed, by re-hashing them now.
924+
925+ Re-hashes every path in the stored baseline (which, after a full run with git, is
926+ the comprehensive set of non-.py files) plus any newly-appearing curated/user-glob
927+ files. This is how a git-less run still detects changes to files git discovered.
928+ ``restrict_to`` limits the result to paths matching those glob patterns.
929+ """
930+ old = state ().old_watched_file_hashes
931+ if not old :
932+ return set ()
933+ new = _hash_files (old .keys ())
934+ new .update (compute_watched_file_hashes ()) # pick up newly-added curated/user files
935+ changed = {p for p in old .keys () | new .keys () if old .get (p ) != new .get (p )}
936+ if restrict_to is not None :
937+ changed = {p for p in changed if any (fnmatch .fnmatch (p , pat ) for pat in restrict_to )}
938+ return changed
939+
940+
941+ def _is_excluded (path : str , config : Config ) -> bool :
942+ """Whether ``path`` should be dropped from change reporting as noise.
943+
944+ Files explicitly registered in ``cache_invalidation_files`` are never excluded.
945+ """
946+ if any (fnmatch .fnmatch (path , pat ) for pat in config .cache_invalidation_files ):
947+ return False
948+ patterns = list (_DEFAULT_INVALIDATION_EXCLUDE ) + list (config .cache_invalidation_exclude )
949+ return any (fnmatch .fnmatch (path , pat ) for pat in patterns )
950+
951+
952+ def _changed_dependency_files () -> set [str ]:
953+ """Files changed since the last full run that the per-function hashes cannot track.
954+
955+ Prefers git (catches every non-.py file in the repo and respects .gitignore) and
956+ falls back to hashing a curated set of build/dependency files when git is
957+ unavailable. Silent on the first run (no baseline to compare against). Noisy files
958+ (see ``_DEFAULT_INVALIDATION_EXCLUDE`` and ``cache_invalidation_exclude``) are dropped.
959+ """
960+ config = Config .get ()
961+ old_commit = state ().old_git_commit
962+ if config .use_git_change_detection and old_commit is not None :
963+ git_changed = git_changed_non_py_files (old_commit )
964+ if git_changed is not None :
965+ # also catch explicitly-registered files that git ignores
966+ changed = git_changed | _changed_hashed_files (restrict_to = config .cache_invalidation_files )
967+ else :
968+ changed = _changed_hashed_files ()
969+ else :
970+ changed = _changed_hashed_files ()
971+ return {p for p in changed if not _is_excluded (p , config )}
972+
973+
974+ def _compute_baseline_file_hashes () -> dict [str , str ]:
975+ """The set of non-.py files to track, hashed. Always includes the curated/user-glob
976+ files; when git is available it also records every tracked non-.py file (minus noise)
977+ so a later git-less run can still detect changes to them.
978+ """
979+ config = Config .get ()
980+ hashes = compute_watched_file_hashes ()
981+ if config .use_git_change_detection :
982+ tracked = git_tracked_non_py_files ()
983+ if tracked is not None :
984+ hashes .update (_hash_files (sorted (p for p in tracked if not _is_excluded (p , config ))))
847985 return hashes
848986
849987
988+ def _refresh_change_detection_baseline () -> None :
989+ """Snapshot the current git commit and tracked-file hashes as the new baseline.
990+
991+ Only called on a full run; cached runs keep the previous baseline so a ``warn``
992+ keeps firing until the cache is actually rebuilt.
993+ """
994+ state ().git_commit = git_head ()
995+ state ().watched_file_hashes = _compute_baseline_file_hashes ()
996+
997+
850998def _reset_mutant_results (should_reset : Callable [[str , int ], bool ]) -> int :
851999 """Reset cached verdicts to ``None`` (forcing a re-test) where ``should_reset`` holds.
8521000
@@ -871,27 +1019,24 @@ def _reset_mutant_results(should_reset: Callable[[str, int], bool]) -> int:
8711019
8721020
8731021def _report_watched_file_changes () -> bool :
874- """Surface changes to watched config/dependency files .
1022+ """Surface non-Python files that changed since the last full run .
8751023
8761024 Returns True only when the configured policy is ``rerun`` and something changed,
877- asking the caller to reset all results. Silent when no prior hashes exist .
1025+ asking the caller to reset all results. Silent when there is no baseline yet .
8781026 """
879- old = state ().old_watched_file_hashes
880- if not old :
881- return False
882- new = compute_watched_file_hashes ()
883- changed = sorted (p for p in old .keys () | new .keys () if old .get (p ) != new .get (p ))
1027+ changed = _changed_dependency_files ()
8841028 if not changed :
8851029 return False
8861030
8871031 policy = Config .get ().on_dependency_change
8881032 if policy == "ignore" :
8891033 return False
1034+ listed = sorted (changed )
8901035 if policy == "rerun" :
891- print (f" { len (changed )} watched file(s) changed; rerunning all mutants: { ', ' .join (changed )} " )
1036+ print (f" { len (listed )} non-Python file(s) changed; rerunning all mutants: { ', ' .join (listed )} " )
8921037 return True
8931038 # default: warn but keep the cache
894- print (f" Warning: { len (changed )} watched file(s) changed since the last run: { ', ' .join (changed )} " )
1039+ print (f" Warning: { len (listed )} non-Python file(s) changed since the last full run: { ', ' .join (listed )} " )
8951040 print (" These cannot be tracked for behavioral changes, so cached results were kept." )
8961041 print (' If the changes affect your tests, delete the mutants/ directory or set on_dependency_change = "rerun".' )
8971042 return False
@@ -945,6 +1090,8 @@ def collect_or_load_stats(
9451090 force_full = _apply_config_change_invalidation (mutants_caught_by_type_checker or {})
9461091
9471092 if not did_load or force_full :
1093+ # A full run rebuilds the cache, so reset the change-detection baseline to "now".
1094+ _refresh_change_detection_baseline ()
9481095 # Run full stats
9491096 run_stats_collection (runner )
9501097 else :
@@ -986,6 +1133,10 @@ def load_stats() -> bool:
9861133 state ().function_dependencies [k ] = set (v )
9871134 state ().old_config_fingerprint = data .pop ("config_fingerprint" , {})
9881135 state ().old_watched_file_hashes = data .pop ("watched_file_hashes" , {})
1136+ state ().old_git_commit = data .pop ("git_commit" , None )
1137+ # Preserve the loaded baseline; only a full run refreshes it.
1138+ state ().watched_file_hashes = state ().old_watched_file_hashes
1139+ state ().git_commit = state ().old_git_commit
9891140 assert not data , data
9901141 did_load = True
9911142 except (FileNotFoundError , JSONDecodeError ):
@@ -1003,7 +1154,8 @@ def save_stats() -> None:
10031154 function_hashes = state ().current_function_hashes ,
10041155 function_dependencies = {k : list (v ) for k , v in state ().function_dependencies .items ()},
10051156 config_fingerprint = Config .get ().config_fingerprint (),
1006- watched_file_hashes = compute_watched_file_hashes (),
1157+ watched_file_hashes = state ().watched_file_hashes ,
1158+ git_commit = state ().git_commit ,
10071159 ),
10081160 f ,
10091161 indent = 4 ,
0 commit comments