Skip to content

Commit 1cfcc3a

Browse files
fix: cap wildcard import expansion to avoid token explosion and 5-minute stalls
Wildcard imports like `import org.jooq.*` expand to 870+ types, causing 5 minutes of disk I/O per function before the token budget check kicks in. 89% of jOOQ functions were skipped due to this. When a wildcard expands to >50 types, filter to only types referenced in the target method's code. This turns a 5-minute failure into a <1 second resolution with only the relevant types included. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3c38a80 commit 1cfcc3a

3 files changed

Lines changed: 78 additions & 16 deletions

File tree

codeflash/languages/java/context.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,10 @@ def extract_class_context(file_path: Path, class_name: str, analyzer: JavaAnalyz
863863

864864
# Maximum token budget for imported type skeletons to avoid bloating testgen context
865865
IMPORTED_SKELETON_TOKEN_BUDGET = 4000
866+
# Maximum types to expand from a single wildcard import before filtering to referenced types only.
867+
# Packages with more types than this (e.g. org.jooq with 870+) would waste minutes of disk I/O
868+
# and almost always exceed the token budget.
869+
MAX_WILDCARD_TYPES_UNFILTERED = 50
866870

867871

868872
def _extract_type_names_from_code(code: str, analyzer: JavaAnalyzer) -> set[str]:
@@ -932,11 +936,29 @@ def get_java_imported_type_skeletons(
932936
resolved_imports: list = []
933937
for imp in imports:
934938
if imp.is_wildcard:
935-
# Expand wildcard imports (e.g., com.aerospike.client.policy.*) into individual types
936-
expanded = resolver.expand_wildcard_import(imp.import_path)
939+
# First try unfiltered expansion with a cap. If the package is small enough, take all types.
940+
# If it's huge (e.g. org.jooq.* with 870+ types), filter to only types referenced in the target code.
941+
expanded = resolver.expand_wildcard_import(imp.import_path, max_types=MAX_WILDCARD_TYPES_UNFILTERED + 1)
942+
if len(expanded) > MAX_WILDCARD_TYPES_UNFILTERED:
943+
if priority_types:
944+
expanded = resolver.expand_wildcard_import(imp.import_path, filter_names=priority_types)
945+
logger.debug(
946+
"Wildcard %s.* exceeds %d types, filtered to %d referenced types",
947+
imp.import_path,
948+
MAX_WILDCARD_TYPES_UNFILTERED,
949+
len(expanded),
950+
)
951+
else:
952+
expanded = expanded[:MAX_WILDCARD_TYPES_UNFILTERED]
953+
logger.debug(
954+
"Wildcard %s.* exceeds %d types, capped (no target types to filter by)",
955+
imp.import_path,
956+
MAX_WILDCARD_TYPES_UNFILTERED,
957+
)
958+
elif expanded:
959+
logger.debug("Expanded wildcard import %s.* into %d types", imp.import_path, len(expanded))
937960
if expanded:
938961
resolved_imports.extend(expanded)
939-
logger.debug("Expanded wildcard import %s.* into %d types", imp.import_path, len(expanded))
940962
continue
941963

942964
resolved = resolver.resolve_import(imp)

codeflash/languages/java/import_resolver.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -220,14 +220,20 @@ def _extract_class_name(self, import_path: str) -> str | None:
220220
return last_part
221221
return None
222222

223-
def expand_wildcard_import(self, import_path: str) -> list[ResolvedImport]:
223+
def expand_wildcard_import(
224+
self, import_path: str, max_types: int = 0, filter_names: set[str] | None = None
225+
) -> list[ResolvedImport]:
224226
"""Expand a wildcard import (e.g., com.example.utils.*) to individual class imports.
225227
226228
Resolves the package path to a directory and returns a ResolvedImport for each
227229
.java file found in that directory.
230+
231+
Args:
232+
import_path: The package path (without the trailing .*).
233+
max_types: Maximum number of types to return. 0 means no limit.
234+
filter_names: If provided, only include types whose class name is in this set.
235+
228236
"""
229-
# Convert package path to directory path
230-
# e.g., "com.example.utils" -> "com/example/utils"
231237
relative_dir = import_path.replace(".", "/")
232238

233239
resolved: list[ResolvedImport] = []
@@ -237,17 +243,21 @@ def expand_wildcard_import(self, import_path: str) -> list[ResolvedImport]:
237243
if candidate_dir.is_dir():
238244
for java_file in candidate_dir.glob("*.java"):
239245
class_name = java_file.stem
240-
# Only include files that look like class names (start with uppercase)
241-
if class_name and class_name[0].isupper():
242-
resolved.append(
243-
ResolvedImport(
244-
import_path=f"{import_path}.{class_name}",
245-
file_path=java_file,
246-
is_external=False,
247-
is_wildcard=False,
248-
class_name=class_name,
249-
)
246+
if not class_name or not class_name[0].isupper():
247+
continue
248+
if filter_names is not None and class_name not in filter_names:
249+
continue
250+
resolved.append(
251+
ResolvedImport(
252+
import_path=f"{import_path}.{class_name}",
253+
file_path=java_file,
254+
is_external=False,
255+
is_wildcard=False,
256+
class_name=class_name,
250257
)
258+
)
259+
if max_types and len(resolved) >= max_types:
260+
return resolved
251261

252262
return resolved
253263

tests/test_languages/test_java/test_context.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2530,6 +2530,36 @@ def test_wildcard_imports_are_expanded(self):
25302530
# Wildcard imports should now be expanded to individual classes found in the package directory
25312531
assert "MathHelper" in result
25322532

2533+
def test_large_wildcard_is_filtered_to_referenced_types(self, tmp_path: Path):
2534+
"""When wildcard expands to >50 types, only types referenced in target code are included."""
2535+
from codeflash.languages.java.context import MAX_WILDCARD_TYPES_UNFILTERED
2536+
2537+
# Create a minimal Maven project structure so the resolver finds source roots
2538+
(tmp_path / "pom.xml").write_text("<project/>", encoding="utf-8")
2539+
pkg_dir = tmp_path / "src" / "main" / "java" / "com" / "bigpkg"
2540+
pkg_dir.mkdir(parents=True)
2541+
for i in range(MAX_WILDCARD_TYPES_UNFILTERED + 20):
2542+
(pkg_dir / f"Type{i:03d}.java").write_text(
2543+
f"package com.bigpkg;\npublic class Type{i:03d} {{ public int val() {{ return {i}; }} }}\n",
2544+
encoding="utf-8",
2545+
)
2546+
2547+
analyzer = get_java_analyzer()
2548+
# Target code references Type000 and Type001 only
2549+
target_code = "Type000 a = new Type000(); Type001 b = a.transform();"
2550+
source = "package com.example;\nimport com.bigpkg.*;\npublic class Foo { void bar() {} }"
2551+
imports = analyzer.find_imports(source)
2552+
2553+
result = get_java_imported_type_skeletons(
2554+
imports, tmp_path, tmp_path / "src" / "main" / "java", analyzer, target_code=target_code
2555+
)
2556+
2557+
# Only referenced types should appear, not all 70
2558+
assert "Type000" in result
2559+
assert "Type001" in result
2560+
# Types not referenced in target code should be excluded
2561+
assert "Type050" not in result
2562+
25332563
def test_import_to_nonexistent_class_in_file(self):
25342564
"""When an import resolves to a file but the class doesn't exist in it, skeleton extraction returns None."""
25352565
analyzer = get_java_analyzer()

0 commit comments

Comments
 (0)