Skip to content

Commit 3619924

Browse files
gkorlandCopilot
andcommitted
Add Kotlin language support
Migrated from FalkorDB/code-graph-backend PR #95. Original issue: FalkorDB/code-graph-backend#93 Resolves #531 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent c372a5e commit 3619924

8 files changed

Lines changed: 186 additions & 33 deletions

File tree

api/analyzers/analyzer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def add_symbols(self, entity: Entity) -> None:
133133
pass
134134

135135
@abstractmethod
136-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
136+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
137137
"""
138138
Resolve a symbol to an entity.
139139
@@ -144,7 +144,7 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
144144
symbol (Node): The symbol node.
145145
146146
Returns:
147-
list[Entity]: The resolved entities.
147+
Entity: The entity.
148148
"""
149149

150150
pass

api/analyzers/java/analyzer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
120120
res.append(file.entities[method_dec])
121121
return res
122122

123-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
123+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
124124
if key in ["implement_interface", "base_class", "extend_interface", "parameters", "return_type"]:
125125
return self.resolve_type(files, lsp, file_path, path, symbol)
126126
elif key in ["call"]:

api/analyzers/kotlin/__init__.py

Whitespace-only changes.

api/analyzers/kotlin/analyzer.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
from pathlib import Path
2+
from ...entities import *
3+
from typing import Optional
4+
from ..analyzer import AbstractAnalyzer
5+
6+
from multilspy import SyncLanguageServer
7+
8+
import tree_sitter_kotlin as tskotlin
9+
from tree_sitter import Language, Node
10+
11+
import logging
12+
logger = logging.getLogger('code_graph')
13+
14+
class KotlinAnalyzer(AbstractAnalyzer):
15+
def __init__(self) -> None:
16+
super().__init__(Language(tskotlin.language()))
17+
18+
def add_dependencies(self, path: Path, files: list[Path]):
19+
# For now, we skip dependency resolution for Kotlin
20+
# In the future, this could parse build.gradle or pom.xml for Kotlin projects
21+
pass
22+
23+
def get_entity_label(self, node: Node) -> str:
24+
if node.type == 'class_declaration':
25+
# Check if it's an interface by looking for interface keyword
26+
for child in node.children:
27+
if child.type == 'interface':
28+
return "Interface"
29+
return "Class"
30+
elif node.type == 'object_declaration':
31+
return "Object"
32+
elif node.type == 'function_declaration':
33+
# Check if this is a method (inside a class) or a top-level function
34+
parent = node.parent
35+
if parent and parent.type == 'class_body':
36+
return "Method"
37+
return "Function"
38+
raise ValueError(f"Unknown entity type: {node.type}")
39+
40+
def get_entity_name(self, node: Node) -> str:
41+
if node.type in ['class_declaration', 'object_declaration']:
42+
# Find the type_identifier child
43+
for child in node.children:
44+
if child.type == 'type_identifier':
45+
return child.text.decode('utf-8')
46+
elif node.type == 'function_declaration':
47+
# Find the simple_identifier child
48+
for child in node.children:
49+
if child.type == 'simple_identifier':
50+
return child.text.decode('utf-8')
51+
raise ValueError(f"Cannot extract name from entity type: {node.type}")
52+
53+
def get_entity_docstring(self, node: Node) -> Optional[str]:
54+
if node.type in ['class_declaration', 'object_declaration', 'function_declaration']:
55+
# Check for KDoc comment (/** ... */) before the node
56+
if node.prev_sibling and node.prev_sibling.type == "multiline_comment":
57+
comment_text = node.prev_sibling.text.decode('utf-8')
58+
# Only return if it's a KDoc comment (starts with /**)
59+
if comment_text.startswith('/**'):
60+
return comment_text
61+
return None
62+
raise ValueError(f"Unknown entity type: {node.type}")
63+
64+
def get_entity_types(self) -> list[str]:
65+
return ['class_declaration', 'object_declaration', 'function_declaration']
66+
67+
def add_symbols(self, entity: Entity) -> None:
68+
if entity.node.type == 'class_declaration':
69+
# Find superclass (extends)
70+
superclass_query = self.language.query("(delegation_specifier (user_type (type_identifier) @superclass))")
71+
superclass_captures = superclass_query.captures(entity.node)
72+
if 'superclass' in superclass_captures:
73+
for superclass in superclass_captures['superclass']:
74+
entity.add_symbol("base_class", superclass)
75+
76+
# Find interfaces (implements)
77+
# In Kotlin, both inheritance and interface implementation use the same syntax
78+
# We'll treat all as interfaces for now since Kotlin can only extend one class
79+
interface_query = self.language.query("(delegation_specifier (user_type (type_identifier) @interface))")
80+
interface_captures = interface_query.captures(entity.node)
81+
if 'interface' in interface_captures:
82+
for interface in interface_captures['interface']:
83+
entity.add_symbol("implement_interface", interface)
84+
85+
elif entity.node.type == 'object_declaration':
86+
# Objects can also have delegation specifiers
87+
interface_query = self.language.query("(delegation_specifier (user_type (type_identifier) @interface))")
88+
interface_captures = interface_query.captures(entity.node)
89+
if 'interface' in interface_captures:
90+
for interface in interface_captures['interface']:
91+
entity.add_symbol("implement_interface", interface)
92+
93+
elif entity.node.type == 'function_declaration':
94+
# Find function calls
95+
query = self.language.query("(call_expression) @reference.call")
96+
captures = query.captures(entity.node)
97+
if 'reference.call' in captures:
98+
for caller in captures['reference.call']:
99+
entity.add_symbol("call", caller)
100+
101+
# Find parameters with types
102+
param_query = self.language.query("(parameter type: (user_type (type_identifier) @parameter))")
103+
param_captures = param_query.captures(entity.node)
104+
if 'parameter' in param_captures:
105+
for parameter in param_captures['parameter']:
106+
entity.add_symbol("parameters", parameter)
107+
108+
# Find return type
109+
return_type_query = self.language.query("(function_declaration type: (user_type (type_identifier) @return_type))")
110+
return_type_captures = return_type_query.captures(entity.node)
111+
if 'return_type' in return_type_captures:
112+
for return_type in return_type_captures['return_type']:
113+
entity.add_symbol("return_type", return_type)
114+
115+
def is_dependency(self, file_path: str) -> bool:
116+
# Check if file is in a dependency directory (e.g., build, .gradle cache)
117+
return "build/" in file_path or ".gradle/" in file_path or "/cache/" in file_path
118+
119+
def resolve_path(self, file_path: str, path: Path) -> str:
120+
# For Kotlin, just return the file path as-is for now
121+
return file_path
122+
123+
def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
124+
res = []
125+
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
126+
type_dec = self.find_parent(resolved_node, ['class_declaration', 'object_declaration'])
127+
if type_dec in file.entities:
128+
res.append(file.entities[type_dec])
129+
return res
130+
131+
def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
132+
res = []
133+
# For call expressions, we need to extract the function name
134+
if node.type == 'call_expression':
135+
# Find the identifier being called
136+
for child in node.children:
137+
if child.type in ['simple_identifier', 'navigation_expression']:
138+
for file, resolved_node in self.resolve(files, lsp, file_path, path, child):
139+
method_dec = self.find_parent(resolved_node, ['function_declaration', 'class_declaration', 'object_declaration'])
140+
if method_dec and method_dec.type in ['class_declaration', 'object_declaration']:
141+
continue
142+
if method_dec in file.entities:
143+
res.append(file.entities[method_dec])
144+
break
145+
return res
146+
147+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
148+
if key in ["implement_interface", "base_class", "parameters", "return_type"]:
149+
return self.resolve_type(files, lsp, file_path, path, symbol)
150+
elif key in ["call"]:
151+
return self.resolve_method(files, lsp, file_path, path, symbol)
152+
else:
153+
raise ValueError(f"Unknown key {key}")

api/analyzers/python/analyzer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
115115
res.append(file.entities[method_dec])
116116
return res
117117

118-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
118+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
119119
if key in ["base_class", "parameters", "return_type"]:
120120
return self.resolve_type(files, lsp, file_path, path, symbol)
121121
elif key in ["call"]:

api/analyzers/source_analyzer.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
from .analyzer import AbstractAnalyzer
1010
# from .c.analyzer import CAnalyzer
1111
from .java.analyzer import JavaAnalyzer
12+
from .kotlin.analyzer import KotlinAnalyzer
1213
from .python.analyzer import PythonAnalyzer
13-
from .csharp.analyzer import CSharpAnalyzer
1414

1515
from multilspy import SyncLanguageServer
1616
from multilspy.multilspy_config import MultilspyConfig
@@ -26,7 +26,8 @@
2626
# '.h': CAnalyzer(),
2727
'.py': PythonAnalyzer(),
2828
'.java': JavaAnalyzer(),
29-
'.cs': CSharpAnalyzer()}
29+
'.kt': KotlinAnalyzer(),
30+
'.kts': KotlinAnalyzer()}
3031

3132
class NullLanguageServer:
3233
def start_server(self):
@@ -138,12 +139,14 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
138139
lsps[".py"] = SyncLanguageServer.create(config, logger, str(path))
139140
else:
140141
lsps[".py"] = NullLanguageServer()
141-
if any(path.rglob('*.cs')):
142-
config = MultilspyConfig.from_dict({"code_language": "csharp"})
143-
lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path))
142+
if any(path.rglob('*.kt')) or any(path.rglob('*.kts')):
143+
# For now, use NullLanguageServer for Kotlin as we need to set up kotlin-language-server
144+
lsps[".kt"] = NullLanguageServer()
145+
lsps[".kts"] = NullLanguageServer()
144146
else:
145-
lsps[".cs"] = NullLanguageServer()
146-
with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server():
147+
lsps[".kt"] = NullLanguageServer()
148+
lsps[".kts"] = NullLanguageServer()
149+
with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".kt"].start_server(), lsps[".kts"].start_server():
147150
files_len = len(self.files)
148151
for i, file_path in enumerate(files):
149152
file = self.files[file_path]
@@ -152,29 +155,25 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
152155
entity.resolved_symbol(lambda key, symbol, fp=file_path: analyzers[fp.suffix].resolve_symbol(self.files, lsps[fp.suffix], fp, path, key, symbol))
153156
for key, symbols in entity.symbols.items():
154157
for symbol in symbols:
155-
if len(symbol.resolved_symbol) == 0:
156-
continue
157-
resolved_symbol = next(iter(symbol.resolved_symbol))
158158
if key == "base_class":
159-
graph.connect_entities("EXTENDS", entity.id, resolved_symbol.id)
159+
graph.connect_entities("EXTENDS", entity.id, symbol.id)
160160
elif key == "implement_interface":
161-
graph.connect_entities("IMPLEMENTS", entity.id, resolved_symbol.id)
161+
graph.connect_entities("IMPLEMENTS", entity.id, symbol.id)
162162
elif key == "extend_interface":
163-
graph.connect_entities("EXTENDS", entity.id, resolved_symbol.id)
163+
graph.connect_entities("EXTENDS", entity.id, symbol.id)
164164
elif key == "call":
165-
graph.connect_entities("CALLS", entity.id, resolved_symbol.id, {"line": symbol.symbol.start_point.row, "text": symbol.symbol.text.decode("utf-8")})
165+
graph.connect_entities("CALLS", entity.id, symbol.id)
166166
elif key == "return_type":
167-
graph.connect_entities("RETURNS", entity.id, resolved_symbol.id)
167+
graph.connect_entities("RETURNS", entity.id, symbol.id)
168168
elif key == "parameters":
169-
graph.connect_entities("PARAMETERS", entity.id, resolved_symbol.id)
169+
graph.connect_entities("PARAMETERS", entity.id, symbol.id)
170170

171171
def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None:
172172
self.first_pass(path, files, [], graph)
173173
self.second_pass(graph, files, path)
174174

175175
def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None:
176-
path = path.resolve()
177-
files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs"))
176+
files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.kt")) + list(path.rglob("*.kts"))
178177
# First pass analysis of the source code
179178
self.first_pass(path, files, ignore, graph)
180179

api/entities/entity.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,31 @@
11
from typing import Callable, Self
22
from tree_sitter import Node
33

4-
class Symbol:
5-
def __init__(self, symbol: Node):
6-
self.symbol = symbol
7-
self.resolved_symbol = set()
8-
9-
def add_resolve_symbol(self, resolved_symbol):
10-
self.resolved_symbol.add(resolved_symbol)
114

125
class Entity:
136
def __init__(self, node: Node):
147
self.node = node
15-
self.symbols: dict[str, list[Symbol]] = {}
8+
self.symbols: dict[str, list[Node]] = {}
9+
self.resolved_symbols: dict[str, set[Self]] = {}
1610
self.children: dict[Node, Self] = {}
1711

1812
def add_symbol(self, key: str, symbol: Node):
1913
if key not in self.symbols:
2014
self.symbols[key] = []
21-
self.symbols[key].append(Symbol(symbol))
15+
self.symbols[key].append(symbol)
16+
17+
def add_resolved_symbol(self, key: str, symbol: Self):
18+
if key not in self.resolved_symbols:
19+
self.resolved_symbols[key] = set()
20+
self.resolved_symbols[key].add(symbol)
2221

2322
def add_child(self, child: Self):
2423
child.parent = self
2524
self.children[child.node] = child
2625

2726
def resolved_symbol(self, f: Callable[[str, Node], list[Self]]):
2827
for key, symbols in self.symbols.items():
28+
self.resolved_symbols[key] = set()
2929
for symbol in symbols:
30-
for resolved_symbol in f(key, symbol.symbol):
31-
symbol.add_resolve_symbol(resolved_symbol)
30+
for resolved_symbol in f(key, symbol):
31+
self.resolved_symbols[key].add(resolved_symbol)

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ dependencies = [
1313
"tree-sitter-c>=0.24.1,<0.25.0",
1414
"tree-sitter-python>=0.25.0,<0.26.0",
1515
"tree-sitter-java>=0.23.5,<0.24.0",
16+
"tree-sitter-kotlin>=1.1.0,<2.0.0",
1617
"tree-sitter-c-sharp>=0.23.1,<0.24.0",
1718
"fastapi>=0.115.0,<1.0.0",
1819
"uvicorn[standard]>=0.34.0,<1.0.0",

0 commit comments

Comments
 (0)