Skip to content
This repository was archived by the owner on Mar 10, 2026. It is now read-only.

Commit aeeb7cd

Browse files
authored
Merge branch 'main' into staging
2 parents 9730af2 + 4fefd9f commit aeeb7cd

File tree

11 files changed

+320
-8
lines changed

11 files changed

+320
-8
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ a graph representation of your source code, the graph name should be the same as
5656
the name of the folder you've requested to analyze, for the example above a graph named:
5757
"GraphRAG-SDK".
5858

59-
At the moment only the Python and C languages are supported, we do intend to support additional languages.
59+
At the moment Python, Java, and C# languages are supported, we do intend to support additional languages.
6060

6161
At this point you can explore and query your source code using various tools
6262
Here are several options:

api/analyzers/analyzer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def add_symbols(self, entity: Entity) -> None:
127127
pass
128128

129129
@abstractmethod
130-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
130+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
131131
"""
132132
Resolve a symbol to an entity.
133133
@@ -138,7 +138,7 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
138138
symbol (Node): The symbol node.
139139
140140
Returns:
141-
Entity: The entity.
141+
list[Entity]: The resolved entities.
142142
"""
143143

144144
pass

api/analyzers/csharp/__init__.py

Whitespace-only changes.

api/analyzers/csharp/analyzer.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import subprocess
2+
from pathlib import Path
3+
4+
from multilspy import SyncLanguageServer
5+
from ...entities.entity import Entity
6+
from ...entities.file import File
7+
from typing import Optional
8+
from ..analyzer import AbstractAnalyzer
9+
10+
import tree_sitter_c_sharp as tscsharp
11+
from tree_sitter import Language, Node, QueryCursor
12+
13+
import logging
14+
logger = logging.getLogger('code_graph')
15+
16+
class CSharpAnalyzer(AbstractAnalyzer):
17+
def __init__(self) -> None:
18+
super().__init__(Language(tscsharp.language()))
19+
20+
def _captures(self, pattern: str, node: Node) -> dict:
21+
"""Run a tree-sitter query and return captures dict."""
22+
query = self.language.query(pattern)
23+
cursor = QueryCursor(query)
24+
return cursor.captures(node)
25+
26+
def add_dependencies(self, path: Path, files: list[Path]):
27+
if Path(f"{path}/temp_deps_cs").is_dir():
28+
return
29+
if any(Path(f"{path}").glob("*.csproj")) or any(Path(f"{path}").glob("*.sln")):
30+
subprocess.run(["dotnet", "restore"], cwd=str(path))
31+
32+
def get_entity_label(self, node: Node) -> str:
33+
if node.type == 'class_declaration':
34+
return "Class"
35+
elif node.type == 'interface_declaration':
36+
return "Interface"
37+
elif node.type == 'enum_declaration':
38+
return "Enum"
39+
elif node.type == 'struct_declaration':
40+
return "Struct"
41+
elif node.type == 'method_declaration':
42+
return "Method"
43+
elif node.type == 'constructor_declaration':
44+
return "Constructor"
45+
raise ValueError(f"Unknown entity type: {node.type}")
46+
47+
def get_entity_name(self, node: Node) -> str:
48+
if node.type in ['class_declaration', 'interface_declaration', 'enum_declaration',
49+
'struct_declaration', 'method_declaration', 'constructor_declaration']:
50+
name_node = node.child_by_field_name('name')
51+
if name_node is None:
52+
return ''
53+
return name_node.text.decode('utf-8')
54+
raise ValueError(f"Unknown entity type: {node.type}")
55+
56+
def get_entity_docstring(self, node: Node) -> Optional[str]:
57+
if node.type in ['class_declaration', 'interface_declaration', 'enum_declaration',
58+
'struct_declaration', 'method_declaration', 'constructor_declaration']:
59+
# Walk back through contiguous comment siblings to collect
60+
# multi-line XML doc comments (each /// line is a separate node)
61+
lines = []
62+
sibling = node.prev_sibling
63+
while sibling and sibling.type == "comment":
64+
lines.insert(0, sibling.text.decode('utf-8'))
65+
sibling = sibling.prev_sibling
66+
return '\n'.join(lines) if lines else None
67+
raise ValueError(f"Unknown entity type: {node.type}")
68+
69+
def get_entity_types(self) -> list[str]:
70+
return ['class_declaration', 'interface_declaration', 'enum_declaration',
71+
'struct_declaration', 'method_declaration', 'constructor_declaration']
72+
73+
def add_symbols(self, entity: Entity) -> None:
74+
if entity.node.type in ['class_declaration', 'struct_declaration']:
75+
base_list_captures = self._captures("(base_list (_) @base_type)", entity.node)
76+
if 'base_type' in base_list_captures:
77+
first = True
78+
for base_type in base_list_captures['base_type']:
79+
if first and entity.node.type == 'class_declaration':
80+
# NOTE: Without semantic analysis, we cannot distinguish a base
81+
# class from an interface in C# base_list. By convention, the
82+
# base class is listed first; if a class only implements
83+
# interfaces, this will produce a spurious base_class edge that
84+
# the LSP resolution in second_pass can correct.
85+
entity.add_symbol("base_class", base_type)
86+
first = False
87+
else:
88+
entity.add_symbol("implement_interface", base_type)
89+
elif entity.node.type == 'interface_declaration':
90+
base_list_captures = self._captures("(base_list (_) @base_type)", entity.node)
91+
if 'base_type' in base_list_captures:
92+
for base_type in base_list_captures['base_type']:
93+
entity.add_symbol("extend_interface", base_type)
94+
elif entity.node.type in ['method_declaration', 'constructor_declaration']:
95+
captures = self._captures("(invocation_expression) @reference.call", entity.node)
96+
if 'reference.call' in captures:
97+
for caller in captures['reference.call']:
98+
entity.add_symbol("call", caller)
99+
captures = self._captures("(parameter_list (parameter type: (_) @parameter))", entity.node)
100+
if 'parameter' in captures:
101+
for parameter in captures['parameter']:
102+
entity.add_symbol("parameters", parameter)
103+
if entity.node.type == 'method_declaration':
104+
return_type = entity.node.child_by_field_name('type')
105+
if return_type:
106+
entity.add_symbol("return_type", return_type)
107+
108+
def is_dependency(self, file_path: str) -> bool:
109+
return "temp_deps_cs" in file_path
110+
111+
def resolve_path(self, file_path: str, path: Path) -> str:
112+
return file_path
113+
114+
def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
115+
res = []
116+
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
117+
type_dec = self.find_parent(resolved_node, ['class_declaration', 'interface_declaration', 'enum_declaration', 'struct_declaration'])
118+
if type_dec in file.entities:
119+
res.append(file.entities[type_dec])
120+
return res
121+
122+
def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
123+
res = []
124+
if node.type == 'invocation_expression':
125+
func_node = node.child_by_field_name('function')
126+
if func_node and func_node.type == 'member_access_expression':
127+
func_node = func_node.child_by_field_name('name')
128+
if func_node:
129+
node = func_node
130+
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
131+
method_dec = self.find_parent(resolved_node, ['method_declaration', 'constructor_declaration', 'class_declaration', 'interface_declaration', 'enum_declaration', 'struct_declaration'])
132+
if method_dec and method_dec.type in ['class_declaration', 'interface_declaration', 'enum_declaration', 'struct_declaration']:
133+
continue
134+
if method_dec in file.entities:
135+
res.append(file.entities[method_dec])
136+
return res
137+
138+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
139+
if key in ["implement_interface", "base_class", "extend_interface", "parameters", "return_type"]:
140+
return self.resolve_type(files, lsp, file_path, path, symbol)
141+
elif key in ["call"]:
142+
return self.resolve_method(files, lsp, file_path, path, symbol)
143+
else:
144+
raise ValueError(f"Unknown key {key}")

api/analyzers/java/analyzer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
125125
res.append(file.entities[method_dec])
126126
return res
127127

128-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
128+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
129129
if key in ["implement_interface", "base_class", "extend_interface", "parameters", "return_type"]:
130130
return self.resolve_type(files, lsp, file_path, path, symbol)
131131
elif key in ["call"]:

api/analyzers/python/analyzer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
114114
res.append(file.entities[method_dec])
115115
return res
116116

117-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
117+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
118118
if key in ["base_class", "parameters", "return_type"]:
119119
return self.resolve_type(files, lsp, file_path, path, symbol)
120120
elif key in ["call"]:

api/analyzers/source_analyzer.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# from .c.analyzer import CAnalyzer
1111
from .java.analyzer import JavaAnalyzer
1212
from .python.analyzer import PythonAnalyzer
13+
from .csharp.analyzer import CSharpAnalyzer
1314

1415
from multilspy import SyncLanguageServer
1516
from multilspy.multilspy_config import MultilspyConfig
@@ -24,7 +25,8 @@
2425
# '.c': CAnalyzer(),
2526
# '.h': CAnalyzer(),
2627
'.py': PythonAnalyzer(),
27-
'.java': JavaAnalyzer()}
28+
'.java': JavaAnalyzer(),
29+
'.cs': CSharpAnalyzer()}
2830

2931
class NullLanguageServer:
3032
def start_server(self):
@@ -136,7 +138,12 @@ def second_pass(self, graph: Graph, files: list[Path], path: Path) -> None:
136138
lsps[".py"] = SyncLanguageServer.create(config, logger, str(path))
137139
else:
138140
lsps[".py"] = NullLanguageServer()
139-
with lsps[".java"].start_server(), lsps[".py"].start_server():
141+
if any(path.rglob('*.cs')):
142+
config = MultilspyConfig.from_dict({"code_language": "csharp"})
143+
lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path))
144+
else:
145+
lsps[".cs"] = NullLanguageServer()
146+
with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server():
140147
files_len = len(self.files)
141148
for i, file_path in enumerate(files):
142149
file = self.files[file_path]
@@ -166,7 +173,8 @@ def analyze_files(self, files: list[Path], path: Path, graph: Graph) -> None:
166173
self.second_pass(graph, files, path)
167174

168175
def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None:
169-
files = list(path.rglob("*.java")) + list(path.rglob("*.py"))
176+
path = path.resolve()
177+
files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs"))
170178
# First pass analysis of the source code
171179
self.first_pass(path, files, ignore, graph)
172180

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ dependencies = [
1313
"tree-sitter-c>=0.24.1,<0.25.0",
1414
"tree-sitter-python>=0.25.0,<0.26.0",
1515
"tree-sitter-java>=0.23.5,<0.24.0",
16+
"tree-sitter-c-sharp>=0.23.1,<0.24.0",
1617
"flask>=3.1.0,<4.0.0",
1718
"python-dotenv>=1.0.1,<2.0.0",
1819
"multilspy @ git+https://github.com/AviAvni/multilspy.git@python-init-params",
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
using System;
2+
3+
namespace TestProject
4+
{
5+
public interface ILogger
6+
{
7+
void Log(string message);
8+
}
9+
10+
public class ConsoleLogger : ILogger
11+
{
12+
public void Log(string message)
13+
{
14+
Console.WriteLine(message);
15+
}
16+
}
17+
18+
/// <summary>
19+
/// Represents a task to be executed.
20+
/// </summary>
21+
public class Task
22+
{
23+
public string Name { get; set; }
24+
public int Duration { get; set; }
25+
26+
private ILogger _logger;
27+
28+
public Task(string name, int duration, ILogger logger)
29+
{
30+
Name = name;
31+
Duration = duration;
32+
_logger = logger;
33+
_logger.Log("Task created: " + name);
34+
}
35+
36+
public bool Execute()
37+
{
38+
_logger.Log("Executing: " + Name);
39+
return true;
40+
}
41+
42+
public void Abort(float delay)
43+
{
44+
_logger.Log("Aborting: " + Name);
45+
Execute();
46+
}
47+
}
48+
}

tests/test_csharp_analyzer.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import os
2+
import unittest
3+
4+
from api import SourceAnalyzer, Graph
5+
6+
7+
class Test_CSharp_Analyzer(unittest.TestCase):
8+
def setUp(self):
9+
self.g = Graph("csharp")
10+
11+
def tearDown(self):
12+
self.g.delete()
13+
14+
def test_analyzer(self):
15+
analyzer = SourceAnalyzer()
16+
17+
# Get the current file path
18+
current_file_path = os.path.abspath(__file__)
19+
20+
# Get the directory of the current file
21+
current_dir = os.path.dirname(current_file_path)
22+
23+
# Append 'source_files/csharp' to the current directory
24+
path = os.path.join(current_dir, 'source_files')
25+
path = os.path.join(path, 'csharp')
26+
path = str(path)
27+
28+
analyzer.analyze_local_folder(path, self.g)
29+
30+
# Verify ILogger interface was detected
31+
q = "MATCH (n:Interface {name: 'ILogger'}) RETURN n LIMIT 1"
32+
res = self.g._query(q).result_set
33+
self.assertEqual(len(res), 1)
34+
35+
# Verify ConsoleLogger class was detected
36+
q = "MATCH (n:Class {name: 'ConsoleLogger'}) RETURN n LIMIT 1"
37+
res = self.g._query(q).result_set
38+
self.assertEqual(len(res), 1)
39+
40+
# Verify Task class was detected
41+
q = "MATCH (n:Class {name: 'Task'}) RETURN n LIMIT 1"
42+
res = self.g._query(q).result_set
43+
self.assertEqual(len(res), 1)
44+
45+
# Verify methods were detected
46+
for method_name in ['Log', 'Execute', 'Abort']:
47+
q = "MATCH (n {name: $name}) RETURN n LIMIT 1"
48+
res = self.g._query(q, {'name': method_name}).result_set
49+
self.assertGreaterEqual(len(res), 1, f"Method {method_name} not found")
50+
51+
# Verify Constructor was detected
52+
q = "MATCH (n:Constructor {name: 'Task'}) RETURN n LIMIT 1"
53+
res = self.g._query(q).result_set
54+
self.assertEqual(len(res), 1)
55+
56+
# Verify DEFINES relationships exist (File -> Class/Interface)
57+
q = "MATCH (f:File)-[:DEFINES]->(n) RETURN count(n)"
58+
res = self.g._query(q).result_set
59+
self.assertGreater(res[0][0], 0)
60+
61+
# Verify class defines methods
62+
q = "MATCH (c:Class {name: 'Task'})-[:DEFINES]->(m) RETURN count(m)"
63+
res = self.g._query(q).result_set
64+
self.assertGreater(res[0][0], 0)
65+
66+
# Verify ConsoleLogger implements ILogger
67+
q = "MATCH (c:Class {name: 'ConsoleLogger'})-[:IMPLEMENTS]->(i:Interface {name: 'ILogger'}) RETURN c, i LIMIT 1"
68+
res = self.g._query(q).result_set
69+
self.assertEqual(len(res), 1)

0 commit comments

Comments
 (0)