Skip to content

Commit 23e382c

Browse files
DvirDukhanCopilot
andcommitted
feat(analyzers): tree-sitter Python symbol resolver (T18 #689)
Replace jedi-based resolution with a pure tree-sitter static resolver behind CODE_GRAPH_PY_RESOLVER=tree_sitter. Default remains jedi for backwards compatibility. Benchmark on pytest-dev/pytest-6202 (204 files): - jedi: 247.1s wall, CALLS=1976, EXTENDS=71 - tree-sitter: 6.9s wall, CALLS=4833, EXTENDS=83 ~36x speedup, broader call recall (jedi returns None ~80% of the time). Mechanism: - TreeSitterPythonResolver builds a project-wide symbol table (top-level funcs/classes/assigns, class methods, import maps) keyed by id(files) for lazy construction. - Resolution: head lookup (local module -> import map -> cross-project bare-name fallback) + tail walk through attributes and class methods. - Handles relative imports, aliased imports, import-of-package, Optional[T]/generic_type subscript unwrapping. - AbstractAnalyzer.needs_lsp() hook + PythonAnalyzer override let source_analyzer skip LSP startup and venv setup entirely when the static resolver is active. This is where the wall-time win actually lives (jedi warm-up was ~240s of the 247s baseline). Closes #689. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent bf339b6 commit 23e382c

5 files changed

Lines changed: 824 additions & 3 deletions

File tree

api/analyzers/analyzer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ def resolve(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: P
5858
return [(files[Path(self.resolve_path(location['absolutePath'], path))], files[Path(self.resolve_path(location['absolutePath'], path))].tree.root_node.descendant_for_point_range(Point(location['range']['start']['line'], location['range']['start']['character']), Point(location['range']['end']['line'], location['range']['end']['character']))) for location in locations if location and Path(self.resolve_path(location['absolutePath'], path)) in files]
5959
except Exception:
6060
return []
61+
62+
def needs_lsp(self) -> bool:
63+
"""Whether this analyzer needs an LSP server started in second_pass.
64+
65+
Defaults to True for backward compatibility with the original
66+
jedi/multilspy-backed analyzers. Subclasses that resolve symbols
67+
statically (e.g. the tree-sitter resolver in #689) override to
68+
return False so the orchestrator can skip the expensive LSP
69+
warm-up.
70+
"""
71+
return True
6172

6273
@abstractmethod
6374
def add_dependencies(self, path: Path, files: list[Path]):

api/analyzers/python/analyzer.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,24 @@
55
import tomllib
66
from typing import Optional
77

8+
from multilspy import SyncLanguageServer
9+
810
from ...entities.entity import Entity
11+
from ...entities.file import File
912
from ..tree_sitter_base import TreeSitterAnalyzer
13+
from .ts_resolver import TreeSitterPythonResolver
1014

1115
import tree_sitter_python as tspython
1216
from tree_sitter import Language, Node
1317

1418
import logging
1519
logger = logging.getLogger('code_graph')
1620

21+
22+
_RESOLVER_ENV = "CODE_GRAPH_PY_RESOLVER"
23+
_RESOLVER_TREE_SITTER = "tree_sitter"
24+
25+
1726
class PythonAnalyzer(TreeSitterAnalyzer):
1827
entity_node_types = {
1928
'class_definition': "Class",
@@ -26,8 +35,48 @@ class PythonAnalyzer(TreeSitterAnalyzer):
2635

2736
def __init__(self) -> None:
2837
super().__init__(Language(tspython.language()))
38+
# Resolver selection: 'tree_sitter' opts into the static project-wide
39+
# resolver (issue #689). Default is the historical jedi/LSP path so
40+
# behaviour is unchanged until explicitly enabled.
41+
resolver_choice = os.environ.get(_RESOLVER_ENV, "").strip().lower()
42+
if resolver_choice == _RESOLVER_TREE_SITTER:
43+
self._ts_resolver: Optional[TreeSitterPythonResolver] = (
44+
TreeSitterPythonResolver(self.language)
45+
)
46+
logger.info("PythonAnalyzer: tree-sitter static resolver enabled")
47+
else:
48+
self._ts_resolver = None
49+
50+
def resolve(
51+
self,
52+
files: dict[Path, File],
53+
lsp: SyncLanguageServer,
54+
file_path: Path,
55+
path: Path,
56+
node: Node,
57+
) -> list[tuple[File, Node]]:
58+
"""Resolve a name node to ``(File, def_node)`` pairs.
59+
60+
When ``CODE_GRAPH_PY_RESOLVER=tree_sitter`` is set, bypass the LSP
61+
and use the project-wide static resolver. Otherwise fall through to
62+
the default jedi-backed implementation in ``AbstractAnalyzer``.
63+
"""
64+
if self._ts_resolver is not None:
65+
return self._ts_resolver.resolve(files, file_path, path, node)
66+
return super().resolve(files, lsp, file_path, path, node)
67+
68+
def needs_lsp(self) -> bool:
69+
# When the tree-sitter resolver is active we don't touch the LSP, so
70+
# the orchestrator can skip starting one.
71+
return self._ts_resolver is None
2972

3073
def add_dependencies(self, path: Path, files: list[Path]):
74+
# When the tree-sitter resolver is active, we resolve statically
75+
# against the in-project files only — installing the project's
76+
# transitive Python deps just to feed jedi adds 10s–10min of
77+
# zero-value pip work. Short-circuit it.
78+
if self._ts_resolver is not None:
79+
return
3180
if Path(f"{path}/venv").is_dir():
3281
return
3382
subprocess.run(["python3", "-m", "venv", "venv"], cwd=str(path))

0 commit comments

Comments
 (0)