Skip to content

Commit 7852b22

Browse files
committed
perf: use git ls-files to find .gitignore files instead of os.walk
os.walk traversed 22K+ directories (including gitignored test-repos with linux/pytorch/react-native) to find .gitignore files. Now uses git ls-files which respects gitignore and returns only tracked files. Falls back to os.walk for non-git directories. Measured: 8.2s → 2.1s on treemapper repo (4x speedup).
1 parent 8c85941 commit 7852b22

1 file changed

Lines changed: 40 additions & 12 deletions

File tree

src/treemapper/ignore.py

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -112,26 +112,54 @@ def _aggregate_all_ignore_patterns(root: Path, ignore_filenames: list[str]) -> l
112112
out: list[str] = []
113113
filenames_set = set(ignore_filenames)
114114

115-
for dirpath, dirnames, filenames in os.walk(root, topdown=True):
116-
dirnames[:] = sorted(d for d in dirnames if d not in PRUNE_DIRS and not _is_cache_dir(d))
115+
ignore_files = _find_ignore_files_via_git(root, ignore_filenames)
116+
if ignore_files is not None:
117+
for ignore_path in ignore_files:
118+
ignore_dir = ignore_path.parent
119+
rel = "" if ignore_dir == root else ignore_dir.relative_to(root).as_posix()
120+
for line in read_ignore_file(ignore_path):
121+
out.append(_process_ignore_line(line, rel))
122+
else:
123+
for dirpath, dirnames, filenames in os.walk(root, topdown=True):
124+
dirnames[:] = sorted(d for d in dirnames if d not in PRUNE_DIRS and not _is_cache_dir(d))
117125

118-
ignore_dir = Path(dirpath)
119-
rel = "" if ignore_dir == root else ignore_dir.relative_to(root).as_posix()
126+
ignore_dir = Path(dirpath)
127+
rel = "" if ignore_dir == root else ignore_dir.relative_to(root).as_posix()
120128

121-
found_files = filenames_set & set(filenames)
122-
for ignore_filename in sorted(found_files):
123-
for line in read_ignore_file(ignore_dir / ignore_filename):
124-
out.append(_process_ignore_line(line, rel))
129+
found_files = filenames_set & set(filenames)
130+
for ignore_filename in sorted(found_files):
131+
for line in read_ignore_file(ignore_dir / ignore_filename):
132+
out.append(_process_ignore_line(line, rel))
125133

126-
config_ignore = ignore_dir / TREEMAPPER_CONFIG_DIR / TREEMAPPER_DIR_IGNORE
127-
if config_ignore.is_file():
128-
for line in read_ignore_file(config_ignore):
129-
out.append(_process_ignore_line(line, rel))
134+
config_ignore = ignore_dir / TREEMAPPER_CONFIG_DIR / TREEMAPPER_DIR_IGNORE
135+
if config_ignore.is_file():
136+
for line in read_ignore_file(config_ignore):
137+
out.append(_process_ignore_line(line, rel))
130138

131139
logger.debug("Aggregated %d ignore patterns from %s", len(out), root)
132140
return out
133141

134142

143+
def _find_ignore_files_via_git(root: Path, ignore_filenames: list[str]) -> list[Path] | None:
144+
import subprocess
145+
146+
try:
147+
result = subprocess.run(
148+
["git", "ls-files", "-z", "--cached", "--others", "--exclude-standard", "--", *ignore_filenames],
149+
cwd=root,
150+
capture_output=True,
151+
text=False,
152+
timeout=10,
153+
)
154+
if result.returncode != 0:
155+
return None
156+
out = result.stdout.decode("utf-8", errors="surrogateescape")
157+
paths = [root / f for f in out.split("\0") if f and any(f.endswith(name) for name in ignore_filenames)]
158+
return sorted(paths)
159+
except (subprocess.SubprocessError, OSError):
160+
return None
161+
162+
135163
def _transform_pattern(pat: str, rel_to_root: str) -> str | None:
136164
prefix = rel_to_root + "/"
137165
if pat.startswith(prefix):

0 commit comments

Comments
 (0)