Skip to content

Commit f54f818

Browse files
DvirDukhanCopilot
andcommitted
fix(analyzer): only index Python files for IMPORTS resolution
build_import_index received every analyzed file (all languages) via self.files, indexing them by dotted module name regardless of extension. A Python `import pkg.mod` could then resolve to a same-named non-Python file (e.g. pkg/mod.java) and create spurious IMPORTS edges. Restrict the index to .py files. Add a regression test asserting a .java sibling at the same dotted path is never indexed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 0967507 commit f54f818

2 files changed

Lines changed: 29 additions & 0 deletions

File tree

api/analyzers/python/analyzer.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,16 @@ def build_import_index(self, files: dict[Path, File], root: Path) -> object:
155155
suffix map tolerates ``src/``/``lib/`` layouts where the import name
156156
(``matplotlib.axes``) differs from the path-from-root
157157
(``lib.matplotlib.axes``).
158+
159+
Only Python files are indexed; ``files`` carries every analyzed
160+
source file, and a Python ``import pkg.mod`` must not resolve to a
161+
same-named non-Python file such as ``pkg/mod.java``.
158162
"""
159163
exact: dict[str, File] = {}
160164
suffix: dict[str, File] = {}
161165
for fpath, file in files.items():
166+
if fpath.suffix != '.py':
167+
continue
162168
if self.is_dependency(str(fpath)):
163169
continue
164170
parts = self._module_parts(fpath, root)

tests/analyzers/test_tree_sitter_base.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,26 @@ def test_tree_sitter_multilanguage_fixture_graph_counts():
7575
{"Class": 3, "Function": 4, "Method": 2}
7676
)
7777
assert Counter(edge[0] for edge in graph.edges) == Counter({"DEFINES": 9})
78+
79+
80+
def test_build_import_index_skips_non_python_files():
81+
"""A Python ``import pkg.mod`` must not resolve to ``pkg/mod.java``.
82+
83+
``build_import_index`` receives every analyzed file (all languages), so it
84+
must only index ``.py`` files; otherwise a same-named non-Python file with
85+
the same dotted path would create spurious ``IMPORTS`` edges.
86+
"""
87+
analyzer = PythonAnalyzer()
88+
root = Path("/repo")
89+
py_file = File(root / "pkg" / "mod.py", None)
90+
java_file = File(root / "pkg" / "mod.java", None)
91+
files = {py_file.path: py_file, java_file.path: java_file}
92+
93+
index = analyzer.build_import_index(files, root)
94+
95+
assert index["exact"]["pkg.mod"] is py_file
96+
assert index["suffix"]["pkg.mod"] is py_file
97+
assert index["suffix"]["mod"] is py_file
98+
# The .java file must not have been indexed under any dotted name.
99+
assert java_file not in index["exact"].values()
100+
assert java_file not in index["suffix"].values()

0 commit comments

Comments
 (0)