Skip to content

Commit 5db867a

Browse files
DvirDukhanCopilot
andcommitted
Merge origin/staging into bench-combined; resolve conflicts
Conflict resolution (8 files): - api/llm.py -> OURS (bench): keep the graphrag-sdk 1.x text-to-Cypher rewrite. Required for consistency with the merged pyproject pin (graphrag-sdk>=1.1.1); staging's llm.py still imports KnowledgeGraph, which 1.x removed, so taking staging would ImportError at load. - api/analyzers/source_analyzer.py, analyzer.py, python/ts_resolver.py, api/mcp/tools/structural.py -> STAGING: canonical/superset versions (parallel index workers + missing-file guard, refined logging, per-match capture alignment + threading lock, impact_analysis limit). - tests/analyzers/test_ts_python_resolver.py, tests/mcp/fixtures/expected.yaml, tests/mcp/test_impact_analysis.py -> STAGING: refined fixtures and added regression tests. Align MCP surface to staging (drop the deliberately-removed `ask` tool): - Revert api/mcp/tools/__init__.py to structural-only registration. - Delete api/mcp/tools/ask.py, api/mcp/graphrag_init.py, api/mcp/code_prompts.py and their tests. The ask tool was dropped from staging via #702 and is broken under graphrag-sdk 1.x (needs the 0.8 KnowledgeGraph API and api.llm.define_ontology, both gone in the bench rewrite). Tests: tests/mcp + tests/analyzers (79) and bench/swebench suites pass (bench_runner needs the mini-swe-agent extra, installed in CI). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2 parents 403f958 + 126c672 commit 5db867a

35 files changed

Lines changed: 1286 additions & 746 deletions

.github/workflows/release-image.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
fi
2424
2525
- name: Login to DockerHub
26-
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4
26+
uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4
2727
with:
2828
username: ${{ secrets.DOCKER_USERNAME }}
2929
password: ${{ secrets.DOCKER_PASSWORD }}

api/analyzers/analyzer.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,13 @@ def resolve_path(self, file_path: str, path: Path) -> str:
6464
def resolve(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[tuple[File, Node]]:
6565
try:
6666
locations = lsp.request_definition(str(file_path), node.start_point.row, node.start_point.column)
67-
return [(files[Path(self.resolve_path(location['absolutePath'], path))], files[Path(self.resolve_path(location['absolutePath'], path))].tree.root_node.descendant_for_point_range(Point(location['range']['start']['line'], location['range']['start']['character']), Point(location['range']['end']['line'], location['range']['end']['character'])) ) for location in locations if location and Path(self.resolve_path(location['absolutePath'], path)) in files]
68-
except Exception as e:
67+
return [(files[Path(self.resolve_path(location['absolutePath'], path))], files[Path(self.resolve_path(location['absolutePath'], path))].tree.root_node.descendant_for_point_range(Point(location['range']['start']['line'], location['range']['start']['character']), Point(location['range']['end']['line'], location['range']['end']['character']))) for location in locations if location and Path(self.resolve_path(location['absolutePath'], path)) in files]
68+
except Exception:
6969
import logging
7070
logging.getLogger(__name__).warning(
71-
"resolve() failed for %s @%d:%d: %s",
72-
file_path, node.start_point.row, node.start_point.column, e,
71+
"resolve() failed for %s @%d:%d",
72+
file_path, node.start_point.row, node.start_point.column,
73+
exc_info=True,
7374
)
7475
return []
7576

@@ -84,6 +85,39 @@ def needs_lsp(self) -> bool:
8485
"""
8586
return True
8687

88+
def build_import_index(self, files: dict[Path, File], root: Path) -> object:
89+
"""
90+
Build a language-specific index used to resolve import statements to
91+
in-repo files. Returns an opaque structure consumed by
92+
``resolve_imports``. Default: no import resolution for this language.
93+
94+
Args:
95+
files (dict[Path, File]): All parsed files keyed by absolute path.
96+
root (Path): The analyzed repository root.
97+
98+
Returns:
99+
object: Opaque index, or ``None`` when unsupported.
100+
"""
101+
102+
return None
103+
104+
def resolve_imports(self, file: File, root: Path, index: object) -> list[File]:
105+
"""
106+
Resolve the import statements of ``file`` to the in-repo files they
107+
depend on. Purely syntactic by default (no LSP). Each returned File is
108+
connected to ``file`` with an ``IMPORTS`` edge by the orchestrator.
109+
110+
Args:
111+
file (File): The importing file (already parsed; ``file.tree`` set).
112+
root (Path): The analyzed repository root.
113+
index (object): The structure returned by ``build_import_index``.
114+
115+
Returns:
116+
list[File]: In-repo files imported by ``file`` (deduped, self excluded).
117+
"""
118+
119+
return []
120+
87121
@abstractmethod
88122
def add_dependencies(self, path: Path, files: list[Path]):
89123
"""

api/analyzers/java/analyzer.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,13 @@ def add_dependencies(self, path: Path, files: list[Path]):
2323
# if not Path("java-decompiler-engine-243.23654.153.jar").is_file():
2424
# subprocess.run(["wget", "https://www.jetbrains.com/intellij-repository/releases/com/jetbrains/intellij/java/java-decompiler-engine/243.23654.153/java-decompiler-engine-243.23654.153.jar"])
2525
subprocess.run(["rm", "-rf", f"{path}/temp_deps"])
26-
pom = ElementTree.parse(str(path) + '/pom.xml')
26+
pom_path = Path(path) / 'pom.xml'
27+
if not pom_path.is_file():
28+
# Non-Maven Java sources (e.g. Gradle, or polyglot repos with stray
29+
# .java files): no pom.xml means no Maven dependencies to resolve.
30+
logger.info("no pom.xml at %s; skipping Maven dependency resolution", path)
31+
return
32+
pom = ElementTree.parse(str(pom_path))
2733
for dependency in pom.findall('.//{http://maven.apache.org/POM/4.0.0}dependency'):
2834
groupId = dependency.find('{http://maven.apache.org/POM/4.0.0}groupId').text.replace('.', '/')
2935
artifactId = dependency.find('{http://maven.apache.org/POM/4.0.0}artifactId').text

api/analyzers/python/analyzer.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,116 @@ def add_symbols(self, entity: Entity) -> None:
136136
def is_dependency(self, file_path: str) -> bool:
137137
return "venv" in file_path
138138

139+
def _module_parts(self, file_path: Path, root: Path) -> Optional[list[str]]:
140+
"""Dotted module path components for ``file_path`` relative to ``root``."""
141+
try:
142+
rel = file_path.relative_to(root)
143+
except ValueError:
144+
return None
145+
parts = list(rel.with_suffix('').parts)
146+
if parts and parts[-1] == '__init__':
147+
parts = parts[:-1]
148+
return parts
149+
150+
def build_import_index(self, files: dict[Path, File], root: Path) -> object:
151+
"""Index in-repo files by dotted module name.
152+
153+
Two maps: ``exact`` keyed by the full dotted path from ``root`` and
154+
``suffix`` keyed by every trailing sub-path (first file wins). The
155+
suffix map tolerates ``src/``/``lib/`` layouts where the import name
156+
(``matplotlib.axes``) differs from the path-from-root
157+
(``lib.matplotlib.axes``).
158+
159+
Only Python files are indexed; ``files`` carries every analyzed
160+
source file, and a Python ``import pkg.mod`` must not resolve to a
161+
same-named non-Python file such as ``pkg/mod.java``.
162+
"""
163+
exact: dict[str, File] = {}
164+
suffix: dict[str, File] = {}
165+
for fpath, file in files.items():
166+
if fpath.suffix != '.py':
167+
continue
168+
if self.is_dependency(str(fpath)):
169+
continue
170+
parts = self._module_parts(fpath, root)
171+
if not parts:
172+
continue
173+
exact.setdefault('.'.join(parts), file)
174+
for i in range(len(parts)):
175+
suffix.setdefault('.'.join(parts[i:]), file)
176+
return {'exact': exact, 'suffix': suffix}
177+
178+
def _resolve_dotted(self, dotted: str, index: dict) -> Optional[File]:
179+
if not dotted:
180+
return None
181+
f = index['exact'].get(dotted) or index['suffix'].get(dotted)
182+
if f is None and '.' in dotted:
183+
# imported name may be a symbol inside a module; drop the last part.
184+
parent = dotted.rsplit('.', 1)[0]
185+
f = index['exact'].get(parent) or index['suffix'].get(parent)
186+
return f
187+
188+
def _import_requests(self, file: File) -> list[tuple[str, int]]:
189+
"""Extract (dotted, level) resolution requests from import statements."""
190+
requests: list[tuple[str, int]] = []
191+
captures = self._captures(
192+
"(import_statement) @i (import_from_statement) @f",
193+
file.tree.root_node,
194+
)
195+
for node in captures.get('i', []):
196+
for child in node.named_children:
197+
target = child
198+
if child.type == 'aliased_import':
199+
target = child.child_by_field_name('name')
200+
if target is not None and target.type == 'dotted_name':
201+
requests.append((target.text.decode('utf-8'), 0))
202+
for node in captures.get('f', []):
203+
module = node.child_by_field_name('module_name')
204+
level = 0
205+
base = ''
206+
if module is not None:
207+
if module.type == 'relative_import':
208+
prefix = next((c for c in module.children if c.type == 'import_prefix'), None)
209+
level = len(prefix.text.decode('utf-8')) if prefix is not None else 1
210+
dotted_part = next((c for c in module.named_children if c.type == 'dotted_name'), None)
211+
base = dotted_part.text.decode('utf-8') if dotted_part is not None else ''
212+
else:
213+
base = module.text.decode('utf-8')
214+
requests.append((base, level))
215+
for name_node in node.children_by_field_name('name'):
216+
leaf = name_node
217+
if name_node.type == 'aliased_import':
218+
leaf = name_node.child_by_field_name('name')
219+
if leaf is not None:
220+
name_txt = leaf.text.decode('utf-8')
221+
requests.append((f"{base}.{name_txt}" if base else name_txt, level))
222+
return requests
223+
224+
def resolve_imports(self, file: File, root: Path, index: object) -> list[File]:
225+
if not index:
226+
return []
227+
package_parts = self._module_parts(file.path, root)
228+
if package_parts is None:
229+
return []
230+
# Package of the importing file = its parent dotted path.
231+
package_parts = package_parts[:-1] if package_parts else []
232+
seen: set[Path] = set()
233+
targets: list[File] = []
234+
for dotted, level in self._import_requests(file):
235+
if level:
236+
base = package_parts[: len(package_parts) - (level - 1)] if level > 1 else list(package_parts)
237+
full = '.'.join([*base, dotted]) if dotted else '.'.join(base)
238+
else:
239+
full = dotted
240+
resolved = self._resolve_dotted(full, index)
241+
if resolved is None or resolved.path == file.path or resolved.path in seen:
242+
continue
243+
if self.is_dependency(str(resolved.path)):
244+
continue
245+
seen.add(resolved.path)
246+
targets.append(resolved)
247+
return targets
248+
139249
def _extract_type_target(self, node: Node) -> Optional[Node]:
140250
if node.type == 'attribute':
141251
return node.child_by_field_name('attribute')

0 commit comments

Comments
 (0)