Skip to content

Commit ac93bbe

Browse files
DvirDukhanCopilot
andcommitted
refactor(analyzers): extract TreeSitterAnalyzer base class (T15 #663)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 6681f7e commit ac93bbe

9 files changed

Lines changed: 267 additions & 143 deletions

File tree

api/analyzers/javascript/analyzer.py

Lines changed: 20 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,8 @@
33
from pathlib import Path
44
from typing import Optional
55

6-
from multilspy import SyncLanguageServer
76
from ...entities.entity import Entity
8-
from ...entities.file import File
9-
from ..analyzer import AbstractAnalyzer
7+
from ..tree_sitter_base import TreeSitterAnalyzer
108

119
import tree_sitter_javascript as tsjs
1210
from tree_sitter import Language, Node
@@ -15,13 +13,28 @@
1513
logger = logging.getLogger('code_graph')
1614

1715

18-
class JavaScriptAnalyzer(AbstractAnalyzer):
16+
class JavaScriptAnalyzer(TreeSitterAnalyzer):
1917
"""Analyzer for JavaScript source files using tree-sitter.
2018
2119
Extracts functions, classes, and methods from JavaScript code.
2220
Resolves class inheritance (extends) and function/method call references.
2321
"""
2422

23+
entity_node_types = {
24+
'function_declaration': "Function",
25+
'class_declaration': "Class",
26+
'method_definition': "Method",
27+
}
28+
type_definition_node_types = ('class_declaration',)
29+
callable_definition_node_types = (
30+
'function_declaration',
31+
'method_definition',
32+
'class_declaration',
33+
)
34+
callable_exclude_node_types = ('class_declaration',)
35+
type_resolution_keys = ("base_class",)
36+
method_resolution_keys = ("call",)
37+
2538
def __init__(self) -> None:
2639
"""Initialize the JavaScript analyzer with the tree-sitter JS grammar."""
2740
super().__init__(Language(tsjs.language()))
@@ -33,26 +46,6 @@ def add_dependencies(self, path: Path, files: list[Path]) -> None:
3346
"""
3447
pass
3548

36-
def get_entity_label(self, node: Node) -> str:
37-
"""Return the graph label for a given AST node type.
38-
39-
Args:
40-
node: A tree-sitter AST node representing a JavaScript entity.
41-
42-
Returns:
43-
One of 'Function', 'Class', or 'Method'.
44-
45-
Raises:
46-
ValueError: If the node type is not a recognised entity.
47-
"""
48-
if node.type == 'function_declaration':
49-
return "Function"
50-
elif node.type == 'class_declaration':
51-
return "Class"
52-
elif node.type == 'method_definition':
53-
return "Method"
54-
raise ValueError(f"Unknown entity type: {node.type}")
55-
5649
def get_entity_name(self, node: Node) -> str:
5750
"""Extract the declared name from a JavaScript entity node.
5851
@@ -92,10 +85,6 @@ def get_entity_docstring(self, node: Node) -> Optional[str]:
9285
return None
9386
raise ValueError(f"Unknown entity type: {node.type}")
9487

95-
def get_entity_types(self) -> list[str]:
96-
"""Return the tree-sitter node types recognised as JavaScript entities."""
97-
return ['function_declaration', 'class_declaration', 'method_definition']
98-
9988
def add_symbols(self, entity: Entity) -> None:
10089
"""Extract symbols (references) from a JavaScript entity.
10190
@@ -128,45 +117,12 @@ def is_dependency(self, file_path: str) -> bool:
128117
"""
129118
return "node_modules" in Path(file_path).parts
130119

131-
def resolve_path(self, file_path: str, path: Path) -> str:
132-
"""Resolve an import path relative to the project root."""
133-
return file_path
134-
135-
def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
136-
"""Resolve a type reference to its class declaration entity."""
137-
res = []
138-
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
139-
type_dec = self.find_parent(resolved_node, ['class_declaration'])
140-
if type_dec in file.entities:
141-
res.append(file.entities[type_dec])
142-
return res
143-
144-
def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
145-
"""Resolve a call expression to the target function or method entity."""
146-
res = []
120+
def _extract_call_target(self, node: Node) -> Optional[Node]:
121+
"""Extract the callable target from a JavaScript call expression."""
147122
if node.type == 'call_expression':
148123
func_node = node.child_by_field_name('function')
149124
if func_node and func_node.type == 'member_expression':
150125
func_node = func_node.child_by_field_name('property')
151126
if func_node:
152127
node = func_node
153-
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
154-
method_dec = self.find_parent(resolved_node, ['function_declaration', 'method_definition', 'class_declaration'])
155-
if method_dec and method_dec.type == 'class_declaration':
156-
continue
157-
if method_dec in file.entities:
158-
res.append(file.entities[method_dec])
159-
return res
160-
161-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
162-
"""Dispatch symbol resolution based on the symbol category.
163-
164-
Routes ``base_class`` symbols to type resolution and ``call`` symbols
165-
to method resolution.
166-
"""
167-
if key == "base_class":
168-
return self.resolve_type(files, lsp, file_path, path, symbol)
169-
elif key == "call":
170-
return self.resolve_method(files, lsp, file_path, path, symbol)
171-
else:
172-
raise ValueError(f"Unknown key {key}")
128+
return node

api/analyzers/kotlin/analyzer.py

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from ...entities.entity import Entity
33
from ...entities.file import File
44
from typing import Optional
5-
from ..analyzer import AbstractAnalyzer
5+
from ..tree_sitter_base import TreeSitterAnalyzer
66

77
from multilspy import SyncLanguageServer
88

@@ -12,7 +12,27 @@
1212
import logging
1313
logger = logging.getLogger('code_graph')
1414

15-
class KotlinAnalyzer(AbstractAnalyzer):
15+
class KotlinAnalyzer(TreeSitterAnalyzer):
16+
entity_node_types = {
17+
'class_declaration': "Class",
18+
'object_declaration': "Object",
19+
'function_declaration': "Function",
20+
}
21+
type_definition_node_types = ('class_declaration', 'object_declaration')
22+
callable_definition_node_types = (
23+
'function_declaration',
24+
'class_declaration',
25+
'object_declaration',
26+
)
27+
callable_exclude_node_types = ('class_declaration', 'object_declaration')
28+
type_resolution_keys = (
29+
"implement_interface",
30+
"base_class",
31+
"parameters",
32+
"return_type",
33+
)
34+
method_resolution_keys = ("call",)
35+
1636
def __init__(self) -> None:
1737
super().__init__(Language(tskotlin.language()))
1838

@@ -44,7 +64,7 @@ def get_entity_name(self, node: Node) -> str:
4464
if child.type == 'identifier':
4565
return child.text.decode('utf-8')
4666
raise ValueError(f"Cannot extract name from entity type: {node.type}")
47-
67+
4868
def get_entity_docstring(self, node: Node) -> Optional[str]:
4969
if node.type in ['class_declaration', 'object_declaration', 'function_declaration']:
5070
# Check for KDoc comment (/** ... */) before the node
@@ -54,14 +74,11 @@ def get_entity_docstring(self, node: Node) -> Optional[str]:
5474
if comment_text.startswith('/**'):
5575
return comment_text
5676
return None
57-
raise ValueError(f"Unknown entity type: {node.type}")
77+
raise ValueError(f"Unknown entity type: {node.type}")
5878

59-
def get_entity_types(self) -> list[str]:
60-
return ['class_declaration', 'object_declaration', 'function_declaration']
61-
6279
def _get_delegation_types(self, entity: Entity) -> list[tuple]:
6380
"""Extract type identifiers from delegation specifiers in order.
64-
81+
6582
Returns list of (node, is_constructor_invocation) tuples.
6683
constructor_invocation indicates a superclass; plain user_type indicates an interface.
6784
"""
@@ -91,25 +108,25 @@ def add_symbols(self, entity: Entity) -> None:
91108
entity.add_symbol("base_class", node)
92109
else:
93110
entity.add_symbol("implement_interface", node)
94-
111+
95112
elif entity.node.type == 'object_declaration':
96113
types = self._get_delegation_types(entity)
97114
for node, _ in types:
98115
entity.add_symbol("implement_interface", node)
99-
116+
100117
elif entity.node.type == 'function_declaration':
101118
# Find function calls
102119
captures = self._captures("(call_expression) @reference.call", entity.node)
103120
if 'reference.call' in captures:
104121
for caller in captures['reference.call']:
105122
entity.add_symbol("call", caller)
106-
123+
107124
# Find parameters with types
108125
captures = self._captures("(parameter (user_type (identifier) @parameter))", entity.node)
109126
if 'parameter' in captures:
110127
for parameter in captures['parameter']:
111128
entity.add_symbol("parameters", parameter)
112-
129+
113130
# Find return type
114131
captures = self._captures("(function_declaration (user_type (identifier) @return_type))", entity.node)
115132
if 'return_type' in captures:
@@ -120,18 +137,6 @@ def is_dependency(self, file_path: str) -> bool:
120137
# Check if file is in a dependency directory (e.g., build, .gradle cache)
121138
return "build/" in file_path or ".gradle/" in file_path or "/cache/" in file_path
122139

123-
def resolve_path(self, file_path: str, path: Path) -> str:
124-
# For Kotlin, just return the file path as-is for now
125-
return file_path
126-
127-
def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
128-
res = []
129-
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
130-
type_dec = self.find_parent(resolved_node, ['class_declaration', 'object_declaration'])
131-
if type_dec in file.entities:
132-
res.append(file.entities[type_dec])
133-
return res
134-
135140
def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
136141
res = []
137142
# For call expressions, we need to extract the function name
@@ -147,11 +152,3 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
147152
res.append(file.entities[method_dec])
148153
break
149154
return res
150-
151-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
152-
if key in ["implement_interface", "base_class", "parameters", "return_type"]:
153-
return self.resolve_type(files, lsp, file_path, path, symbol)
154-
elif key in ["call"]:
155-
return self.resolve_method(files, lsp, file_path, path, symbol)
156-
else:
157-
raise ValueError(f"Unknown key {key}")

api/analyzers/python/analyzer.py

Lines changed: 23 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,32 @@
11
import os
22
import subprocess
3-
from multilspy import SyncLanguageServer
43
from pathlib import Path
54

65
import tomllib
7-
from ...entities import *
86
from typing import Optional
9-
from ..analyzer import AbstractAnalyzer
7+
8+
from ...entities.entity import Entity
9+
from ..tree_sitter_base import TreeSitterAnalyzer
1010

1111
import tree_sitter_python as tspython
1212
from tree_sitter import Language, Node
1313

1414
import logging
1515
logger = logging.getLogger('code_graph')
1616

17-
class PythonAnalyzer(AbstractAnalyzer):
17+
class PythonAnalyzer(TreeSitterAnalyzer):
18+
entity_node_types = {
19+
'class_definition': "Class",
20+
'function_definition': "Function",
21+
}
22+
type_definition_node_types = ('class_definition',)
23+
callable_definition_node_types = ('function_definition', 'class_definition')
24+
type_resolution_keys = ("base_class", "parameters", "return_type")
25+
method_resolution_keys = ("call",)
26+
1827
def __init__(self) -> None:
1928
super().__init__(Language(tspython.language()))
20-
29+
2130
def add_dependencies(self, path: Path, files: list[Path]):
2231
if Path(f"{path}/venv").is_dir():
2332
return
@@ -40,30 +49,20 @@ def add_dependencies(self, path: Path, files: list[Path]):
4049
for requirement in requirements:
4150
files.extend(Path(f"{path}/venv/lib/").rglob(f"**/site-packages/{requirement}/*.py"))
4251

43-
def get_entity_label(self, node: Node) -> str:
44-
if node.type == 'class_definition':
45-
return "Class"
46-
elif node.type == 'function_definition':
47-
return "Function"
48-
raise ValueError(f"Unknown entity type: {node.type}")
49-
5052
def get_entity_name(self, node: Node) -> str:
5153
if node.type in ['class_definition', 'function_definition']:
5254
return node.child_by_field_name('name').text.decode('utf-8')
5355
raise ValueError(f"Unknown entity type: {node.type}")
54-
56+
5557
def get_entity_docstring(self, node: Node) -> Optional[str]:
5658
if node.type in ['class_definition', 'function_definition']:
5759
body = node.child_by_field_name('body')
5860
if body.child_count > 0 and body.children[0].type == 'expression_statement':
5961
docstring_node = body.children[0].child(0)
6062
return docstring_node.text.decode('utf-8')
6163
return None
62-
raise ValueError(f"Unknown entity type: {node.type}")
63-
64-
def get_entity_types(self) -> list[str]:
65-
return ['class_definition', 'function_definition']
66-
64+
raise ValueError(f"Unknown entity type: {node.type}")
65+
6766
def add_symbols(self, entity: Entity) -> None:
6867
if entity.node.type == 'class_definition':
6968
superclasses = entity.node.child_by_field_name("superclasses")
@@ -88,37 +87,14 @@ def add_symbols(self, entity: Entity) -> None:
8887
def is_dependency(self, file_path: str) -> bool:
8988
return "venv" in file_path
9089

91-
def resolve_path(self, file_path: str, path: Path) -> str:
92-
return file_path
93-
94-
def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path, node: Node) -> list[Entity]:
95-
res = []
90+
def _extract_type_target(self, node: Node) -> Optional[Node]:
9691
if node.type == 'attribute':
97-
node = node.child_by_field_name('attribute')
98-
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
99-
type_dec = self.find_parent(resolved_node, ['class_definition'])
100-
if type_dec in file.entities:
101-
res.append(file.entities[type_dec])
102-
return res
92+
return node.child_by_field_name('attribute')
93+
return node
10394

104-
def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
105-
res = []
95+
def _extract_call_target(self, node: Node) -> Optional[Node]:
10696
if node.type == 'call':
10797
node = node.child_by_field_name('function')
108-
if node.type == 'attribute':
98+
if node and node.type == 'attribute':
10999
node = node.child_by_field_name('attribute')
110-
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
111-
method_dec = self.find_parent(resolved_node, ['function_definition', 'class_definition'])
112-
if not method_dec:
113-
continue
114-
if method_dec in file.entities:
115-
res.append(file.entities[method_dec])
116-
return res
117-
118-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
119-
if key in ["base_class", "parameters", "return_type"]:
120-
return self.resolve_type(files, lsp, file_path, path, symbol)
121-
elif key in ["call"]:
122-
return self.resolve_method(files, lsp, file_path, path, symbol)
123-
else:
124-
raise ValueError(f"Unknown key {key}")
100+
return node

0 commit comments

Comments
 (0)