Skip to content

Commit 47f5887

Browse files
authored
Merge pull request #1539 from codeflash-ai/faster-file-discovery
feat: replace rglob with os.walk and per-language directory pruning
2 parents efd215d + d91ee0a commit 47f5887

10 files changed

Lines changed: 540 additions & 421 deletions

File tree

.codex/config.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[mcp_servers.tessl]
2+
type = "stdio"
3+
command = "tessl"
4+
args = [ "mcp", "start" ]

.codex/skills/.gitignore

Lines changed: 0 additions & 2 deletions
This file was deleted.

.gemini/settings.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"mcpServers": {
3+
"tessl": {
4+
"type": "stdio",
5+
"command": "tessl",
6+
"args": [
7+
"mcp",
8+
"start"
9+
]
10+
}
11+
}
12+
}

.gemini/skills/.gitignore

Lines changed: 0 additions & 2 deletions
This file was deleted.

codeflash/discovery/functions_to_optimize.py

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,27 @@ def generic_visit(self, node: ast.AST) -> None:
146146
# Multi-language support helpers
147147
# =============================================================================
148148

149+
_VCS_EXCLUDES = frozenset({".git", ".hg", ".svn"})
150+
151+
152+
def parse_dir_excludes(patterns: frozenset[str]) -> tuple[frozenset[str], tuple[str, ...], tuple[str, ...]]:
153+
"""Split glob patterns into exact names, prefixes, and suffixes.
154+
155+
Patterns ending with ``*`` become prefix matches, patterns starting with ``*``
156+
become suffix matches, and plain strings become exact matches.
157+
"""
158+
exact: set[str] = set()
159+
prefixes: list[str] = []
160+
suffixes: list[str] = []
161+
for p in patterns:
162+
if p.endswith("*"):
163+
prefixes.append(p[:-1])
164+
elif p.startswith("*"):
165+
suffixes.append(p[1:])
166+
else:
167+
exact.add(p)
168+
return frozenset(exact), tuple(prefixes), tuple(suffixes)
169+
149170

150171
def get_files_for_language(
151172
module_root_path: Path, ignore_paths: list[Path] | None = None, language: Language | None = None
@@ -164,37 +185,44 @@ def get_files_for_language(
164185
if ignore_paths is None:
165186
ignore_paths = []
166187

188+
all_patterns: frozenset[str]
167189
if language is not None:
168190
support = get_language_support(language)
169191
extensions = support.file_extensions
192+
all_patterns = support.dir_excludes | _VCS_EXCLUDES
170193
else:
171194
extensions = tuple(get_supported_extensions())
172-
173-
# Default directory patterns to always exclude for JS/TS
174-
js_ts_default_excludes = {
175-
"node_modules",
176-
"dist",
177-
"build",
178-
".next",
179-
".nuxt",
180-
"coverage",
181-
".cache",
182-
".turbo",
183-
".vercel",
184-
"__pycache__",
185-
}
186-
187-
files = []
188-
for ext in extensions:
189-
pattern = f"*{ext}"
190-
for file_path in module_root_path.rglob(pattern):
191-
# Check explicit ignore paths
192-
if any(file_path.is_relative_to(ignore_path) for ignore_path in ignore_paths):
193-
continue
194-
# Check default JS/TS excludes in path parts
195-
if any(part in js_ts_default_excludes for part in file_path.parts):
196-
continue
197-
files.append(file_path)
195+
all_patterns = _VCS_EXCLUDES
196+
for lang in Language:
197+
if is_language_supported(lang):
198+
all_patterns = all_patterns | get_language_support(lang).dir_excludes
199+
200+
dir_excludes, prefixes, suffixes = parse_dir_excludes(all_patterns)
201+
202+
ignore_dirs: set[str] = set()
203+
ignore_files: set[Path] = set()
204+
for p in ignore_paths:
205+
p = Path(p) if not isinstance(p, Path) else p
206+
if p.is_file():
207+
ignore_files.add(p)
208+
else:
209+
ignore_dirs.add(str(p))
210+
211+
files: list[Path] = []
212+
for dirpath, dirnames, filenames in os.walk(module_root_path):
213+
dirnames[:] = [
214+
d
215+
for d in dirnames
216+
if d not in dir_excludes
217+
and not (prefixes and d.startswith(prefixes))
218+
and not (suffixes and d.endswith(suffixes))
219+
and str(Path(dirpath) / d) not in ignore_dirs
220+
]
221+
for fname in filenames:
222+
if fname.endswith(extensions):
223+
fpath = Path(dirpath, fname)
224+
if fpath not in ignore_files:
225+
files.append(fpath)
198226
return files
199227

200228

codeflash/languages/base.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,14 @@ def comment_prefix(self) -> str:
254254
"""Like # or //."""
255255
...
256256

257+
@property
258+
def dir_excludes(self) -> frozenset[str]:
259+
"""Directory name patterns to skip during file discovery.
260+
261+
Supports glob wildcards: "name" for exact, "prefix*" for startswith, "*suffix" for endswith.
262+
"""
263+
...
264+
257265
# === Discovery ===
258266

259267
def discover_functions(

codeflash/languages/javascript/support.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ def test_framework(self) -> str:
6363
def comment_prefix(self) -> str:
6464
return "//"
6565

66+
@property
67+
def dir_excludes(self) -> frozenset[str]:
68+
return frozenset({"node_modules", "dist", "build", ".next", ".nuxt", "coverage", ".cache", ".turbo", ".vercel"})
69+
6670
# === Discovery ===
6771

6872
def discover_functions(

codeflash/languages/python/support.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,37 @@ def test_framework(self) -> str:
7575
def comment_prefix(self) -> str:
7676
return "#"
7777

78+
@property
79+
def dir_excludes(self) -> frozenset[str]:
80+
return frozenset(
81+
{
82+
"__pycache__",
83+
".venv",
84+
"venv",
85+
".tox",
86+
".nox",
87+
".eggs",
88+
".mypy_cache",
89+
".ruff_cache",
90+
".pytest_cache",
91+
".hypothesis",
92+
"htmlcov",
93+
".pytype",
94+
".pyre",
95+
".pybuilder",
96+
".ipynb_checkpoints",
97+
".codeflash",
98+
".cache",
99+
".complexipy_cache",
100+
"build",
101+
"dist",
102+
"sdist",
103+
".coverage*",
104+
".pyright*",
105+
"*.egg-info",
106+
}
107+
)
108+
78109
# === Discovery ===
79110

80111
def discover_functions(

tessl.json

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"version": "0.13.0"
2121
},
2222
"tessl/pypi-pydantic": {
23-
"version": "1.10.0"
23+
"version": "2.11.0"
2424
},
2525
"tessl/pypi-humanize": {
2626
"version": "4.13.0"
@@ -35,7 +35,7 @@
3535
"version": "3.4.0"
3636
},
3737
"tessl/pypi-sentry-sdk": {
38-
"version": "1.45.0"
38+
"version": "2.36.0"
3939
},
4040
"tessl/pypi-parameterized": {
4141
"version": "0.9.0"
@@ -44,10 +44,10 @@
4444
"version": "0.4.0"
4545
},
4646
"tessl/pypi-rich": {
47-
"version": "13.9.0"
47+
"version": "14.1.0"
4848
},
4949
"tessl/pypi-lxml": {
50-
"version": "5.4.0"
50+
"version": "6.0.0"
5151
},
5252
"tessl/pypi-crosshair-tool": {
5353
"version": "0.0.0"
@@ -64,17 +64,20 @@
6464
"tessl/pypi-filelock": {
6565
"version": "3.19.0"
6666
},
67-
"codeflash/codeflash-rules": {
68-
"version": "0.1.0"
67+
"tessl/pypi-ipython": {
68+
"version": "9.5.0"
6969
},
70-
"codeflash/codeflash-docs": {
71-
"version": "0.1.0"
70+
"tessl/pypi-mypy": {
71+
"version": "1.17.0"
7272
},
73-
"codeflash/codeflash-skills": {
74-
"version": "0.2.0"
73+
"tessl/pypi-ty": {
74+
"version": "0.0.0"
75+
},
76+
"tessl/pypi-types-jsonschema": {
77+
"version": "3.2.0"
7578
},
76-
"tessl-labs/tessl-skill-eval-scenarios": {
77-
"version": "0.0.5"
79+
"tessl/pypi-uv": {
80+
"version": "0.8.0"
7881
}
7982
}
8083
}

0 commit comments

Comments
 (0)