Skip to content

Commit 35091cd

Browse files
committed
feat: replace rglob with os.walk and per-language directory pruning
File discovery used Path.rglob per extension, traversing excluded dirs (e.g. .venv, node_modules) before filtering. Switch to a single os.walk pass with in-place dirs[:] pruning. Each language now declares its own dir_excludes patterns (exact, prefix*, *suffix) on the LanguageSupport protocol, parsed by parse_dir_excludes() at walk time.
1 parent efd215d commit 35091cd

4 files changed

Lines changed: 77 additions & 26 deletions

File tree

codeflash/discovery/functions_to_optimize.py

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,29 @@ def generic_visit(self, node: ast.AST) -> None:
146146
# Multi-language support helpers
147147
# =============================================================================
148148

149+
_VCS_EXCLUDES = frozenset({".git", ".hg", ".svn"})
150+
151+
152+
def parse_dir_excludes(
153+
patterns: frozenset[str],
154+
) -> tuple[frozenset[str], tuple[str, ...], tuple[str, ...]]:
155+
"""Split glob patterns into exact names, prefixes, and suffixes.
156+
157+
Patterns ending with ``*`` become prefix matches, patterns starting with ``*``
158+
become suffix matches, and plain strings become exact matches.
159+
"""
160+
exact: set[str] = set()
161+
prefixes: list[str] = []
162+
suffixes: list[str] = []
163+
for p in patterns:
164+
if p.endswith("*"):
165+
prefixes.append(p[:-1])
166+
elif p.startswith("*"):
167+
suffixes.append(p[1:])
168+
else:
169+
exact.add(p)
170+
return frozenset(exact), tuple(prefixes), tuple(suffixes)
171+
149172

150173
def get_files_for_language(
151174
module_root_path: Path, ignore_paths: list[Path] | None = None, language: Language | None = None
@@ -167,34 +190,39 @@ def get_files_for_language(
167190
if language is not None:
168191
support = get_language_support(language)
169192
extensions = support.file_extensions
193+
all_patterns = support.dir_excludes | _VCS_EXCLUDES
170194
else:
171195
extensions = tuple(get_supported_extensions())
172-
173-
# Default directory patterns to always exclude for JS/TS
174-
js_ts_default_excludes = {
175-
"node_modules",
176-
"dist",
177-
"build",
178-
".next",
179-
".nuxt",
180-
"coverage",
181-
".cache",
182-
".turbo",
183-
".vercel",
184-
"__pycache__",
185-
}
186-
187-
files = []
188-
for ext in extensions:
189-
pattern = f"*{ext}"
190-
for file_path in module_root_path.rglob(pattern):
191-
# Check explicit ignore paths
192-
if any(file_path.is_relative_to(ignore_path) for ignore_path in ignore_paths):
193-
continue
194-
# Check default JS/TS excludes in path parts
195-
if any(part in js_ts_default_excludes for part in file_path.parts):
196-
continue
197-
files.append(file_path)
196+
all_patterns: frozenset[str] = _VCS_EXCLUDES
197+
for lang in Language:
198+
if is_language_supported(lang):
199+
all_patterns = all_patterns | get_language_support(lang).dir_excludes
200+
201+
dir_excludes, prefixes, suffixes = parse_dir_excludes(all_patterns)
202+
203+
ignore_dirs: set[str] = set()
204+
ignore_files: set[Path] = set()
205+
for p in ignore_paths:
206+
if p.is_file():
207+
ignore_files.add(p)
208+
else:
209+
ignore_dirs.add(str(p))
210+
211+
files: list[Path] = []
212+
for dirpath, dirnames, filenames in os.walk(module_root_path):
213+
dirnames[:] = [
214+
d
215+
for d in dirnames
216+
if d not in dir_excludes
217+
and not (prefixes and d.startswith(prefixes))
218+
and not (suffixes and d.endswith(suffixes))
219+
and str(Path(dirpath) / d) not in ignore_dirs
220+
]
221+
for fname in filenames:
222+
if fname.endswith(extensions):
223+
fpath = Path(dirpath, fname)
224+
if fpath not in ignore_files:
225+
files.append(fpath)
198226
return files
199227

200228

codeflash/languages/base.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,14 @@ def comment_prefix(self) -> str:
254254
"""Like # or //."""
255255
...
256256

257+
@property
258+
def dir_excludes(self) -> frozenset[str]:
259+
"""Directory name patterns to skip during file discovery.
260+
261+
Supports glob wildcards: "name" for exact, "prefix*" for startswith, "*suffix" for endswith.
262+
"""
263+
...
264+
257265
# === Discovery ===
258266

259267
def discover_functions(

codeflash/languages/javascript/support.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ def test_framework(self) -> str:
6363
def comment_prefix(self) -> str:
6464
return "//"
6565

66+
@property
67+
def dir_excludes(self) -> frozenset[str]:
68+
return frozenset({"node_modules", "dist", "build", ".next", ".nuxt", "coverage", ".cache", ".turbo", ".vercel"})
69+
6670
# === Discovery ===
6771

6872
def discover_functions(

codeflash/languages/python/support.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,17 @@ def test_framework(self) -> str:
7575
def comment_prefix(self) -> str:
7676
return "#"
7777

78+
@property
79+
def dir_excludes(self) -> frozenset[str]:
80+
return frozenset({
81+
"__pycache__", ".venv", "venv", ".tox", ".nox", ".eggs",
82+
".mypy_cache", ".ruff_cache", ".pytest_cache", ".hypothesis",
83+
"htmlcov", ".pytype", ".pyre", ".pybuilder",
84+
".ipynb_checkpoints", ".codeflash", ".cache", ".complexipy_cache",
85+
"build", "dist", "sdist",
86+
".coverage*", ".pyright*", "*.egg-info",
87+
})
88+
7889
# === Discovery ===
7990

8091
def discover_functions(

0 commit comments

Comments
 (0)