Skip to content

Commit 6681f7e

Browse files
DvirDukhanCopilot
andcommitted
feat(mcp): impact_analysis tool — variable-depth Cypher (T6 #654)
Adds the only MCP tool with no pre-existing AsyncGraphQuery backing, inlining a [:CALLS*1..depth] traversal with DISTINCT for cycle safety. api/mcp/tools/structural.py - impact_analysis(symbol_id, project, branch, direction='IN', depth=3) - direction='IN' returns transitive upstream callers (what breaks if you change this symbol); direction='OUT' returns transitive callees. - depth is clamped to [1, IMPACT_MAX_DEPTH=10]; values above 10 are silently clamped, not rejected, so agents can ask for "very deep" without worrying about the cap. - _clamp_depth helper accepts int / stringified int, rejects bool / None / non-numeric strings. - Direction must be 'IN' or 'OUT'; raises before issuing the query. tests/mcp/test_impact_analysis.py (9 tests) - Unit: _clamp_depth normalization + garbage rejection; direction validation; tool registered on the MCP app. - Integration vs the sample-project fixture: upstream(db) and downstream(entrypoint) match the expected.yaml contract; depth=1 from db excludes the 3-hop entrypoint. - Cycle safety: throwaway A-B + C-A graph; DISTINCT ensures no duplicates and the query terminates. - JSON serialisability. tests/mcp/fixtures/expected.yaml - New impact section with upstream_includes_any_of for db and downstream_includes_any_of for entrypoint. Out of scope (per ticket): cross-rel traversal (CALLS only for v1), ranking, cross-branch / cross-repo impact. Closes #654. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 4135def commit 6681f7e

3 files changed

Lines changed: 353 additions & 0 deletions

File tree

api/mcp/tools/structural.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,3 +483,89 @@ async def search_code(
483483
finally:
484484
await g.close()
485485
return [_node_summary(node) for node in raw]
486+
487+
488+
# ---------------------------------------------------------------------------
489+
# T6 — impact_analysis (variable-depth Cypher with DISTINCT for cycle safety)
490+
# ---------------------------------------------------------------------------
491+
492+
493+
# Hard cap on traversal depth — passed values above this are silently
494+
# clamped. Prevents pathological queries (e.g. depth=999) from hammering
495+
# FalkorDB while still letting agents request "deep" impact without
496+
# hitting an error.
497+
IMPACT_MAX_DEPTH = 10
498+
499+
500+
def _clamp_depth(depth: Any) -> int:
501+
"""Coerce ``depth`` to ``1..IMPACT_MAX_DEPTH``. Strings accepted."""
502+
if isinstance(depth, bool):
503+
raise ValueError(f"depth must be an integer, got bool: {depth!r}")
504+
if isinstance(depth, str) and depth.lstrip("-").isdigit():
505+
depth = int(depth)
506+
if not isinstance(depth, int):
507+
raise ValueError(f"depth must be an integer, got: {depth!r}")
508+
if depth < 1:
509+
return 1
510+
if depth > IMPACT_MAX_DEPTH:
511+
return IMPACT_MAX_DEPTH
512+
return depth
513+
514+
515+
@app.tool(
516+
name="impact_analysis",
517+
description=(
518+
"Transitive call-graph impact for refactoring: "
519+
"`direction='IN'` returns all upstream callers (what breaks if you "
520+
"change this symbol); `direction='OUT'` returns all downstream "
521+
"callees (what this symbol indirectly depends on). Traverses only "
522+
f"CALLS edges. Depth is clamped to {IMPACT_MAX_DEPTH}; cycles are "
523+
"deduplicated via Cypher DISTINCT (each node appears at most once). "
524+
"`limit` bounds the number of impacted symbols returned."
525+
),
526+
)
527+
async def impact_analysis(
528+
symbol_id: int | str,
529+
project: str,
530+
branch: Optional[str] = None,
531+
direction: str = "IN",
532+
depth: int = 3,
533+
limit: int = 50,
534+
) -> list[dict[str, Any]]:
535+
node_id = _coerce_node_id(symbol_id)
536+
eff_depth = _clamp_depth(depth)
537+
538+
if direction == "IN":
539+
# Upstream callers: (impacted) -[:CALLS*]-> (n)
540+
q = (
541+
f"MATCH (n)<-[:CALLS*1..{eff_depth}]-(impacted) "
542+
f"WHERE ID(n) = $sid "
543+
f"RETURN DISTINCT impacted "
544+
f"LIMIT $limit"
545+
)
546+
elif direction == "OUT":
547+
# Downstream callees: (n) -[:CALLS*]-> (impacted)
548+
q = (
549+
f"MATCH (n)-[:CALLS*1..{eff_depth}]->(impacted) "
550+
f"WHERE ID(n) = $sid "
551+
f"RETURN DISTINCT impacted "
552+
f"LIMIT $limit"
553+
)
554+
else:
555+
raise ValueError(
556+
f"direction must be 'IN' (upstream) or 'OUT' (downstream), "
557+
f"got: {direction!r}"
558+
)
559+
560+
g = _project_arg(project, branch)
561+
try:
562+
res = await g._query(q, {"sid": node_id, "limit": int(limit)})
563+
finally:
564+
await g.close()
565+
566+
out: list[dict[str, Any]] = []
567+
for row in res.result_set:
568+
entry = _node_summary(row[0])
569+
entry["direction"] = direction
570+
out.append(entry)
571+
return out

tests/mcp/fixtures/expected.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,15 @@ search_prefixes:
5454
Base:
5555
# Case-insensitive prefix match against Searchable.name.
5656
must_include: ["BaseRepo"]
57+
58+
# Transitive impact (used by T6 — impact_analysis).
59+
# Names listed under ``upstream_includes_any_of`` MUST appear when running
60+
# impact_analysis(<symbol>, direction="IN", depth >= 3). Names under
61+
# ``downstream_includes_any_of`` MUST appear with direction="OUT".
62+
impact:
63+
db:
64+
# Anything that transitively calls db(): repo() variants and service().
65+
upstream_includes_any_of: ["repo", "service", "entrypoint"]
66+
entrypoint:
67+
# Anything entrypoint() transitively reaches.
68+
downstream_includes_any_of: ["service", "repo", "db"]

tests/mcp/test_impact_analysis.py

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
"""T6 — impact_analysis MCP tool tests."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import uuid
7+
8+
import pytest
9+
10+
11+
pytestmark = pytest.mark.anyio
12+
13+
14+
@pytest.fixture
15+
def anyio_backend() -> str:
16+
return "asyncio"
17+
18+
19+
# ---------------------------------------------------------------------------
20+
# Unit tests — no FalkorDB required
21+
# ---------------------------------------------------------------------------
22+
23+
24+
def test_clamp_depth_normalizes():
25+
from api.mcp.tools.structural import _clamp_depth, IMPACT_MAX_DEPTH
26+
27+
assert _clamp_depth(1) == 1
28+
assert _clamp_depth(5) == 5
29+
assert _clamp_depth(IMPACT_MAX_DEPTH) == IMPACT_MAX_DEPTH
30+
assert _clamp_depth(999) == IMPACT_MAX_DEPTH
31+
assert _clamp_depth(0) == 1
32+
assert _clamp_depth(-3) == 1
33+
assert _clamp_depth("4") == 4
34+
assert _clamp_depth("999") == IMPACT_MAX_DEPTH
35+
36+
37+
def test_clamp_depth_rejects_garbage():
38+
from api.mcp.tools.structural import _clamp_depth
39+
40+
with pytest.raises(ValueError, match="depth"):
41+
_clamp_depth("not-a-number")
42+
with pytest.raises(ValueError, match="depth"):
43+
_clamp_depth(True) # bool would silently pass the int check
44+
with pytest.raises(ValueError, match="depth"):
45+
_clamp_depth(None)
46+
47+
48+
async def test_impact_analysis_rejects_invalid_direction():
49+
from api.mcp.tools.structural import impact_analysis
50+
51+
with pytest.raises(ValueError, match="direction"):
52+
await impact_analysis(
53+
symbol_id=1,
54+
project="any",
55+
direction="BOTH",
56+
)
57+
58+
59+
async def test_impact_analysis_registered_via_app():
60+
from api.mcp.server import app
61+
62+
names = {t.name for t in await app.list_tools()}
63+
assert "impact_analysis" in names
64+
65+
66+
# ---------------------------------------------------------------------------
67+
# Integration — sample_project fixture (T3)
68+
# ---------------------------------------------------------------------------
69+
70+
71+
async def _find_id(indexed_fixture, name: str) -> int:
72+
from api.mcp.tools.structural import search_code
73+
74+
rows = await search_code(
75+
prefix=name,
76+
project=indexed_fixture.project,
77+
branch=indexed_fixture.branch,
78+
)
79+
for r in rows:
80+
if r["name"] == name:
81+
return r["id"]
82+
raise AssertionError(f"symbol {name!r} not found")
83+
84+
85+
async def test_impact_upstream_of_db(indexed_fixture, expected_contract):
86+
from api.mcp.tools.structural import impact_analysis
87+
88+
db_id = await _find_id(indexed_fixture, "db")
89+
upstream = await impact_analysis(
90+
symbol_id=db_id,
91+
project=indexed_fixture.project,
92+
branch=indexed_fixture.branch,
93+
direction="IN",
94+
depth=5,
95+
)
96+
names = {r["name"] for r in upstream}
97+
expected = set(
98+
expected_contract["impact"]["db"]["upstream_includes_any_of"]
99+
)
100+
assert names & expected, (
101+
f"db upstream {names} disjoint from expected {expected}"
102+
)
103+
# DISTINCT enforces no duplicate ids.
104+
ids = [r["id"] for r in upstream]
105+
assert len(ids) == len(set(ids))
106+
for r in upstream:
107+
assert r["direction"] == "IN"
108+
109+
110+
async def test_impact_downstream_of_entrypoint(indexed_fixture, expected_contract):
111+
from api.mcp.tools.structural import impact_analysis
112+
113+
entry_id = await _find_id(indexed_fixture, "entrypoint")
114+
downstream = await impact_analysis(
115+
symbol_id=entry_id,
116+
project=indexed_fixture.project,
117+
branch=indexed_fixture.branch,
118+
direction="OUT",
119+
depth=5,
120+
)
121+
names = {r["name"] for r in downstream}
122+
expected = set(
123+
expected_contract["impact"]["entrypoint"]["downstream_includes_any_of"]
124+
)
125+
assert names & expected, (
126+
f"entrypoint downstream {names} disjoint from expected {expected}"
127+
)
128+
for r in downstream:
129+
assert r["direction"] == "OUT"
130+
131+
132+
async def test_impact_depth_one_only_immediate_callers(indexed_fixture):
133+
"""depth=1 returns only direct callers — sufficient for db's caller chain,
134+
not transitive ancestors like entrypoint."""
135+
from api.mcp.tools.structural import impact_analysis
136+
137+
db_id = await _find_id(indexed_fixture, "db")
138+
upstream = await impact_analysis(
139+
symbol_id=db_id,
140+
project=indexed_fixture.project,
141+
branch=indexed_fixture.branch,
142+
direction="IN",
143+
depth=1,
144+
)
145+
names = {r["name"] for r in upstream}
146+
# entrypoint is 3 hops away — must NOT appear at depth=1.
147+
assert "entrypoint" not in names
148+
149+
150+
async def test_impact_response_serialisable(indexed_fixture):
151+
from api.mcp.tools.structural import impact_analysis
152+
153+
entry_id = await _find_id(indexed_fixture, "entrypoint")
154+
rows = await impact_analysis(
155+
symbol_id=entry_id,
156+
project=indexed_fixture.project,
157+
branch=indexed_fixture.branch,
158+
direction="OUT",
159+
depth=3,
160+
)
161+
json.dumps(rows)
162+
163+
164+
async def test_impact_respects_limit(indexed_fixture):
165+
"""``limit`` bounds the number of impacted symbols returned."""
166+
from api.mcp.tools.structural import impact_analysis
167+
168+
entry_id = await _find_id(indexed_fixture, "entrypoint")
169+
full = await impact_analysis(
170+
symbol_id=entry_id,
171+
project=indexed_fixture.project,
172+
branch=indexed_fixture.branch,
173+
direction="OUT",
174+
depth=5,
175+
)
176+
# entrypoint reaches several downstream symbols; cap to 1 and confirm
177+
# the result is bounded by the limit.
178+
assert len(full) > 1, "fixture should expose >1 downstream symbol"
179+
capped = await impact_analysis(
180+
symbol_id=entry_id,
181+
project=indexed_fixture.project,
182+
branch=indexed_fixture.branch,
183+
direction="OUT",
184+
depth=5,
185+
limit=1,
186+
)
187+
assert len(capped) == 1
188+
189+
190+
# ---------------------------------------------------------------------------
191+
# Cycle safety — small purpose-built graph
192+
# ---------------------------------------------------------------------------
193+
194+
195+
@pytest.fixture
196+
async def cycle_graph():
197+
"""Build a tiny graph with a 2-cycle (A↔B) plus an unrelated C → A edge.
198+
199+
Created with a unique branch so it's isolated from the shared
200+
sample-project fixture. Not torn down (matches the pattern used by
201+
``indexed_fixture``).
202+
"""
203+
from api.graph import Graph
204+
205+
project = "impact_cycle_test"
206+
branch = f"cycle-{uuid.uuid4().hex[:8]}"
207+
g = Graph(project, branch=branch)
208+
# CREATE three Function nodes with name + path; add CALLS edges
209+
# A → B, B → A (cycle), C → A.
210+
g.g.query(
211+
"""
212+
CREATE
213+
(a:Function:Searchable {name: 'A', path: '/tmp/cycle.py', src_start: 1}),
214+
(b:Function:Searchable {name: 'B', path: '/tmp/cycle.py', src_start: 2}),
215+
(c:Function:Searchable {name: 'C', path: '/tmp/cycle.py', src_start: 3}),
216+
(a)-[:CALLS]->(b),
217+
(b)-[:CALLS]->(a),
218+
(c)-[:CALLS]->(a)
219+
"""
220+
)
221+
yield project, branch
222+
223+
224+
async def test_impact_handles_cycles(cycle_graph):
225+
"""Variable-depth Cypher with DISTINCT must return each node once
226+
even when the graph has a cycle (A↔B). The traversal is depth-bounded
227+
(``*1..depth``) so it always terminates; without DISTINCT, though, a
228+
node reachable via multiple paths would be emitted as duplicate rows."""
229+
from api.graph import AsyncGraphQuery
230+
from api.mcp.tools.structural import impact_analysis
231+
232+
project, branch = cycle_graph
233+
234+
# Resolve A's id via Cypher (no Searchable index in this throwaway graph)
235+
g = AsyncGraphQuery(project, branch=branch)
236+
try:
237+
res = await g._query("MATCH (n:Function {name: 'A'}) RETURN ID(n)")
238+
a_id = res.result_set[0][0]
239+
finally:
240+
await g.close()
241+
242+
upstream = await impact_analysis(
243+
symbol_id=a_id,
244+
project=project,
245+
branch=branch,
246+
direction="IN",
247+
depth=5,
248+
)
249+
names = [r["name"] for r in upstream]
250+
# DISTINCT collapses the A->B->A->B->... cycle into single entries.
251+
# B (direct caller) and C (direct caller) must both be present;
252+
# A may also appear because A is reachable from itself through the
253+
# cycle. The crucial guarantee is no duplicates and no infinite loop.
254+
assert "B" in names and "C" in names
255+
assert len(names) == len(set(names)), f"duplicates in {names}"

0 commit comments

Comments
 (0)