Skip to content

Commit 58e35b3

Browse files
DvirDukhanCopilot
andcommitted
feat(mcp): query tools — get_callers/callees/deps, find_path, search_code
Bundles T5 (#653), T7 (#655), T8 (#656) into one PR; all three are thin async wrappers around existing AsyncGraphQuery operations and share the same _node_summary / _coerce_node_id / _project_arg helpers. - search_code: prefix search backed by the FalkorDB fulltext index. Surfaces flat {id, name, label, file, line} so agents can hand the id straight to the navigation tools. - get_callers / get_callees: incoming / outgoing CALLS edges. The shared _neighbors_payload inlines the IN-direction Cypher because AsyncGraphQuery.get_neighbors only walks OUT. - get_dependencies: same machinery, but aggregates a configurable set of relations (default CALLS/IMPORTS/DEFINES) and dedups by node id. - find_path: returns up to N CALLS-only paths between two symbols as a node sequence; strips encode_edge entries from the alternating [node, edge, ...] list produced by AsyncGraphQuery.find_paths. Helpers: - _node_summary flattens the encode_node shape (which nests data under 'properties' and includes the 'Searchable' fulltext-index label) into the {id, name, label, file, line} agents want. - _coerce_node_id accepts int or stringified-int and rejects bool. Tests (tests/mcp/test_query_tools.py, 13 tests): - search_code prefix happy/limit/no-match/serialisability paths. - get_callees(entrypoint) ⊇ {service}; get_callers(service) ⊇ {entrypoint}. - get_dependencies includes the CALLS relation. - Neighbor tools accept string ids; reject garbage. - find_path(entrypoint → db) ≥ 1; reverse direction returns []; max_paths is honored. - All five tools are registered on the MCP app. Also drops a stray venv/ that snuck into the fixture directory and was polluting the prefix-search results. Closes #653, #655, #656. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 18d3cc7 commit 58e35b3

2 files changed

Lines changed: 492 additions & 0 deletions

File tree

api/mcp/tools/structural.py

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,242 @@ def _payload(project) -> dict[str, Any]:
182182
}
183183

184184
return await loop.run_in_executor(None, _do_index)
185+
186+
187+
# ---------------------------------------------------------------------------
188+
# T5 — get_callers / get_callees / get_dependencies
189+
# ---------------------------------------------------------------------------
190+
191+
192+
def _project_arg(project: str, branch: Optional[str]):
193+
"""Return an :class:`AsyncGraphQuery` for ``(project, branch)``."""
194+
from api.graph import AsyncGraphQuery
195+
196+
return AsyncGraphQuery(project, branch=branch)
197+
198+
199+
def _node_summary(n: Any) -> dict[str, Any]:
200+
"""Normalize a FalkorDB Node (or already-encoded dict) to a flat payload.
201+
202+
``encode_node`` returns ``{id, labels, properties: {...}}`` because Node
203+
properties live on a nested attribute. Agents want a flat record, and
204+
they also want a single ``label`` (the meaningful one — File, Class,
205+
Function — not the fulltext-index marker ``Searchable``).
206+
"""
207+
if hasattr(n, "properties"):
208+
props = dict(n.properties or {})
209+
labels = list(n.labels or [])
210+
node_id = getattr(n, "id", None)
211+
else:
212+
d = dict(n)
213+
props = dict(d.get("properties") or {})
214+
labels = list(d.get("labels") or [])
215+
node_id = d.get("id")
216+
217+
label = next((lbl for lbl in labels if lbl != "Searchable"), None)
218+
return {
219+
"id": node_id,
220+
"name": props.get("name"),
221+
"label": label,
222+
"file": props.get("path"),
223+
"line": props.get("src_start"),
224+
}
225+
226+
227+
def _coerce_node_id(symbol_id: Any) -> int:
228+
"""Accept int or stringified int; raise ValueError otherwise.
229+
230+
The MCP wire format is JSON; agents sometimes hand back the id as a
231+
string. Be permissive on input, strict on type after parsing.
232+
"""
233+
if isinstance(symbol_id, bool): # bool is an int subclass; reject loudly
234+
raise ValueError(f"symbol_id must be an integer, got bool: {symbol_id!r}")
235+
if isinstance(symbol_id, int):
236+
return symbol_id
237+
if isinstance(symbol_id, str) and symbol_id.lstrip("-").isdigit():
238+
return int(symbol_id)
239+
raise ValueError(f"symbol_id must be an integer id, got: {symbol_id!r}")
240+
241+
242+
async def _neighbors_payload(
243+
project: str,
244+
branch: Optional[str],
245+
symbol_id: Any,
246+
rel: str,
247+
direction: str,
248+
limit: int,
249+
) -> list[dict[str, Any]]:
250+
"""Shared implementation for caller/callee/dependency tools.
251+
252+
``direction`` is ``IN`` (incoming edges, e.g. callers) or ``OUT``
253+
(outgoing edges, e.g. callees). When ``IN`` we run the inverse Cypher
254+
``(neighbor)-[:rel]->(target)``; ``AsyncGraphQuery.get_neighbors`` only
255+
walks outgoing edges, so we inline the Cypher here for symmetry.
256+
"""
257+
node_id = _coerce_node_id(symbol_id)
258+
g = _project_arg(project, branch)
259+
try:
260+
if direction == "OUT":
261+
q = (
262+
f"MATCH (n)-[e:{rel}]->(dest) "
263+
f"WHERE ID(n) = $sid "
264+
f"RETURN dest, type(e) AS rel "
265+
f"LIMIT $limit"
266+
)
267+
elif direction == "IN":
268+
q = (
269+
f"MATCH (src)-[e:{rel}]->(n) "
270+
f"WHERE ID(n) = $sid "
271+
f"RETURN src AS dest, type(e) AS rel "
272+
f"LIMIT $limit"
273+
)
274+
else:
275+
raise ValueError(f"direction must be IN or OUT, got: {direction!r}")
276+
277+
res = await g._query(q, {"sid": node_id, "limit": int(limit)})
278+
out: list[dict[str, Any]] = []
279+
for row in res.result_set:
280+
entry = _node_summary(row[0])
281+
entry["relation"] = row[1]
282+
entry["direction"] = direction
283+
out.append(entry)
284+
return out
285+
finally:
286+
await g.close()
287+
288+
289+
@app.tool(
290+
name="get_callers",
291+
description=(
292+
"Return functions that call the given symbol (incoming CALLS edges). "
293+
"`symbol_id` is the integer node id returned by `search_code` or "
294+
"other tools."
295+
),
296+
)
297+
async def get_callers(
298+
symbol_id: Any,
299+
project: str,
300+
branch: Optional[str] = None,
301+
limit: int = 50,
302+
) -> list[dict[str, Any]]:
303+
return await _neighbors_payload(project, branch, symbol_id, "CALLS", "IN", limit)
304+
305+
306+
@app.tool(
307+
name="get_callees",
308+
description=(
309+
"Return functions that the given symbol calls (outgoing CALLS edges)."
310+
),
311+
)
312+
async def get_callees(
313+
symbol_id: Any,
314+
project: str,
315+
branch: Optional[str] = None,
316+
limit: int = 50,
317+
) -> list[dict[str, Any]]:
318+
return await _neighbors_payload(project, branch, symbol_id, "CALLS", "OUT", limit)
319+
320+
321+
@app.tool(
322+
name="get_dependencies",
323+
description=(
324+
"Return outgoing neighbors of the given symbol across any of the "
325+
"specified relation types (default: IMPORTS, CALLS, DEFINES). "
326+
"Useful for 'what does this depend on' queries."
327+
),
328+
)
329+
async def get_dependencies(
330+
symbol_id: Any,
331+
project: str,
332+
branch: Optional[str] = None,
333+
rels: Optional[list[str]] = None,
334+
limit: int = 50,
335+
) -> list[dict[str, Any]]:
336+
if rels is None:
337+
rels = ["IMPORTS", "CALLS", "DEFINES"]
338+
# Aggregate across relations; preserve ordering and dedupe by id.
339+
seen: set[Any] = set()
340+
out: list[dict[str, Any]] = []
341+
for rel in rels:
342+
rows = await _neighbors_payload(project, branch, symbol_id, rel, "OUT", limit)
343+
for row in rows:
344+
key = (row.get("id"), row.get("relation"))
345+
if key in seen:
346+
continue
347+
seen.add(key)
348+
out.append(row)
349+
if len(out) >= limit:
350+
return out
351+
return out
352+
353+
354+
# ---------------------------------------------------------------------------
355+
# T7 — find_path
356+
# ---------------------------------------------------------------------------
357+
358+
359+
@app.tool(
360+
name="find_path",
361+
description=(
362+
"Return up to `max_paths` CALLS-path sequences from `source_id` to "
363+
"`dest_id`. Useful for 'how does A reach B' questions. Returns an "
364+
"empty list when no path exists."
365+
),
366+
)
367+
async def find_path(
368+
source_id: Any,
369+
dest_id: Any,
370+
project: str,
371+
branch: Optional[str] = None,
372+
max_paths: int = 10,
373+
) -> list[dict[str, Any]]:
374+
src = _coerce_node_id(source_id)
375+
dst = _coerce_node_id(dest_id)
376+
g = _project_arg(project, branch)
377+
try:
378+
raw = await g.find_paths(src, dst)
379+
finally:
380+
await g.close()
381+
382+
# ``AsyncGraphQuery.find_paths`` returns each path as an alternating
383+
# [node, edge, node, edge, ..., node] list; we strip edges and surface
384+
# only the node sequence — that's what agents typically want.
385+
paths: list[dict[str, Any]] = []
386+
for entry in raw[:max_paths]:
387+
node_seq = [
388+
_node_summary(x)
389+
for x in entry
390+
# Edges in the alternating list carry a top-level ``relation``
391+
# key (from ``encode_edge``); nodes carry ``properties``.
392+
if isinstance(x, dict) and "properties" in x
393+
]
394+
paths.append({"path": node_seq})
395+
return paths
396+
397+
398+
# ---------------------------------------------------------------------------
399+
# T8 — search_code
400+
# ---------------------------------------------------------------------------
401+
402+
403+
@app.tool(
404+
name="search_code",
405+
description=(
406+
"Prefix-search for symbols (functions, classes, files) whose name "
407+
"starts with `prefix`. Backed by FalkorDB's full-text index. The "
408+
"agent typically calls this first to discover symbol ids for the "
409+
"navigation tools (`get_callers`, `find_path`, ...)."
410+
),
411+
)
412+
async def search_code(
413+
prefix: str,
414+
project: str,
415+
branch: Optional[str] = None,
416+
limit: int = 20,
417+
) -> list[dict[str, Any]]:
418+
g = _project_arg(project, branch)
419+
try:
420+
raw = await g.prefix_search(prefix)
421+
finally:
422+
await g.close()
423+
return [_node_summary(node) for node in raw[:limit]]

0 commit comments

Comments
 (0)