Skip to content

Commit c23a206

Browse files
DvirDukhanCopilot
andcommitted
feat(mcp): impact_analysis tool — variable-depth Cypher (T6 #654)
Adds the only MCP tool with no pre-existing AsyncGraphQuery backing, inlining a [:CALLS*1..depth] traversal with DISTINCT for cycle safety. api/mcp/tools/structural.py - impact_analysis(symbol_id, project, branch, direction='IN', depth=3) - direction='IN' returns transitive upstream callers (what breaks if you change this symbol); direction='OUT' returns transitive callees. - depth is clamped to [1, IMPACT_MAX_DEPTH=10]; values above 10 are silently clamped, not rejected, so agents can ask for "very deep" without worrying about the cap. - _clamp_depth helper accepts int / stringified int, rejects bool / None / non-numeric strings. - Direction must be 'IN' or 'OUT'; raises before issuing the query. tests/mcp/test_impact_analysis.py (9 tests) - Unit: _clamp_depth normalization + garbage rejection; direction validation; tool registered on the MCP app. - Integration vs the sample-project fixture: upstream(db) and downstream(entrypoint) match the expected.yaml contract; depth=1 from db excludes the 3-hop entrypoint. - Cycle safety: throwaway A-B + C-A graph; DISTINCT ensures no duplicates and the query terminates. - JSON serialisability. tests/mcp/fixtures/expected.yaml - New impact section with upstream_includes_any_of for db and downstream_includes_any_of for entrypoint. Out of scope (per ticket): cross-rel traversal (CALLS only for v1), ranking, cross-branch / cross-repo impact. Closes #654. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 58e35b3 commit c23a206

3 files changed

Lines changed: 322 additions & 0 deletions

File tree

api/mcp/tools/structural.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,3 +421,85 @@ async def search_code(
421421
finally:
422422
await g.close()
423423
return [_node_summary(node) for node in raw[:limit]]
424+
425+
426+
# ---------------------------------------------------------------------------
427+
# T6 — impact_analysis (variable-depth Cypher with DISTINCT for cycle safety)
428+
# ---------------------------------------------------------------------------
429+
430+
431+
IMPACT_MAX_DEPTH = 10
432+
"""Hard cap on traversal depth — passed values above this are silently
433+
clamped. Prevents pathological queries (e.g. depth=999) from hammering
434+
FalkorDB while still letting agents request "deep" impact without
435+
hitting an error."""
436+
437+
438+
def _clamp_depth(depth: Any) -> int:
439+
"""Coerce ``depth`` to ``1..IMPACT_MAX_DEPTH``. Strings accepted."""
440+
if isinstance(depth, bool):
441+
raise ValueError(f"depth must be an integer, got bool: {depth!r}")
442+
if isinstance(depth, str) and depth.lstrip("-").isdigit():
443+
depth = int(depth)
444+
if not isinstance(depth, int):
445+
raise ValueError(f"depth must be an integer, got: {depth!r}")
446+
if depth < 1:
447+
return 1
448+
if depth > IMPACT_MAX_DEPTH:
449+
return IMPACT_MAX_DEPTH
450+
return depth
451+
452+
453+
@app.tool(
454+
name="impact_analysis",
455+
description=(
456+
"Transitive call-graph impact for refactoring: "
457+
"`direction='IN'` returns all upstream callers (what breaks if you "
458+
"change this symbol); `direction='OUT'` returns all downstream "
459+
"callees (what this symbol indirectly depends on). Traverses only "
460+
f"CALLS edges. Depth is clamped to {IMPACT_MAX_DEPTH}; cycles are "
461+
"deduplicated via Cypher DISTINCT (each node appears at most once)."
462+
),
463+
)
464+
async def impact_analysis(
465+
symbol_id: Any,
466+
project: str,
467+
branch: Optional[str] = None,
468+
direction: str = "IN",
469+
depth: int = 3,
470+
) -> list[dict[str, Any]]:
471+
node_id = _coerce_node_id(symbol_id)
472+
eff_depth = _clamp_depth(depth)
473+
474+
if direction == "IN":
475+
# Upstream callers: (impacted) -[:CALLS*]-> (n)
476+
q = (
477+
f"MATCH (n)<-[:CALLS*1..{eff_depth}]-(impacted) "
478+
f"WHERE ID(n) = $sid "
479+
f"RETURN DISTINCT impacted"
480+
)
481+
elif direction == "OUT":
482+
# Downstream callees: (n) -[:CALLS*]-> (impacted)
483+
q = (
484+
f"MATCH (n)-[:CALLS*1..{eff_depth}]->(impacted) "
485+
f"WHERE ID(n) = $sid "
486+
f"RETURN DISTINCT impacted"
487+
)
488+
else:
489+
raise ValueError(
490+
f"direction must be 'IN' (upstream) or 'OUT' (downstream), "
491+
f"got: {direction!r}"
492+
)
493+
494+
g = _project_arg(project, branch)
495+
try:
496+
res = await g._query(q, {"sid": node_id})
497+
finally:
498+
await g.close()
499+
500+
out: list[dict[str, Any]] = []
501+
for row in res.result_set:
502+
entry = _node_summary(row[0])
503+
entry["direction"] = direction
504+
out.append(entry)
505+
return out

tests/mcp/fixtures/expected.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,15 @@ search_prefixes:
4646
Repo:
4747
# case-insensitive prefix match on Searchable label
4848
must_include_any_of: ["BaseRepo", "UserRepo", "OrderRepo"]
49+
50+
# Transitive impact (used by T6 — impact_analysis).
51+
# Names listed under ``upstream_includes`` MUST appear when running
52+
# impact_analysis(<symbol>, direction="IN", depth=>=3). Names under
53+
# ``downstream_includes`` MUST appear with direction="OUT".
54+
impact:
55+
db:
56+
# Anything that transitively calls db(): repo() variants and service().
57+
upstream_includes_any_of: ["repo", "service", "entrypoint"]
58+
entrypoint:
59+
# Anything entrypoint() transitively reaches.
60+
downstream_includes_any_of: ["service", "repo", "db"]

tests/mcp/test_impact_analysis.py

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
"""T6 — impact_analysis MCP tool tests."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
import uuid
7+
8+
import pytest
9+
10+
11+
pytestmark = pytest.mark.anyio
12+
13+
14+
@pytest.fixture
15+
def anyio_backend() -> str:
16+
return "asyncio"
17+
18+
19+
# ---------------------------------------------------------------------------
20+
# Unit tests — no FalkorDB required
21+
# ---------------------------------------------------------------------------
22+
23+
24+
def test_clamp_depth_normalizes():
25+
from api.mcp.tools.structural import _clamp_depth, IMPACT_MAX_DEPTH
26+
27+
assert _clamp_depth(1) == 1
28+
assert _clamp_depth(5) == 5
29+
assert _clamp_depth(IMPACT_MAX_DEPTH) == IMPACT_MAX_DEPTH
30+
assert _clamp_depth(999) == IMPACT_MAX_DEPTH
31+
assert _clamp_depth(0) == 1
32+
assert _clamp_depth(-3) == 1
33+
assert _clamp_depth("4") == 4
34+
assert _clamp_depth("999") == IMPACT_MAX_DEPTH
35+
36+
37+
def test_clamp_depth_rejects_garbage():
38+
from api.mcp.tools.structural import _clamp_depth
39+
40+
with pytest.raises(ValueError, match="depth"):
41+
_clamp_depth("not-a-number")
42+
with pytest.raises(ValueError, match="depth"):
43+
_clamp_depth(True) # bool would silently pass the int check
44+
with pytest.raises(ValueError, match="depth"):
45+
_clamp_depth(None)
46+
47+
48+
async def test_impact_analysis_rejects_invalid_direction():
49+
from api.mcp.tools.structural import impact_analysis
50+
51+
with pytest.raises(ValueError, match="direction"):
52+
await impact_analysis(
53+
symbol_id=1,
54+
project="any",
55+
direction="BOTH",
56+
)
57+
58+
59+
async def test_impact_analysis_registered_via_app():
60+
from api.mcp.server import app
61+
62+
names = {t.name for t in await app.list_tools()}
63+
assert "impact_analysis" in names
64+
65+
66+
# ---------------------------------------------------------------------------
67+
# Integration — sample_project fixture (T3)
68+
# ---------------------------------------------------------------------------
69+
70+
71+
async def _find_id(indexed_fixture, name: str) -> int:
72+
from api.mcp.tools.structural import search_code
73+
74+
rows = await search_code(
75+
prefix=name,
76+
project=indexed_fixture.project,
77+
branch=indexed_fixture.branch,
78+
)
79+
for r in rows:
80+
if r["name"] == name:
81+
return r["id"]
82+
raise AssertionError(f"symbol {name!r} not found")
83+
84+
85+
async def test_impact_upstream_of_db(indexed_fixture, expected_contract):
86+
from api.mcp.tools.structural import impact_analysis
87+
88+
db_id = await _find_id(indexed_fixture, "db")
89+
upstream = await impact_analysis(
90+
symbol_id=db_id,
91+
project=indexed_fixture.project,
92+
branch=indexed_fixture.branch,
93+
direction="IN",
94+
depth=5,
95+
)
96+
names = {r["name"] for r in upstream}
97+
expected = set(
98+
expected_contract["impact"]["db"]["upstream_includes_any_of"]
99+
)
100+
assert names & expected, (
101+
f"db upstream {names} disjoint from expected {expected}"
102+
)
103+
# DISTINCT enforces no duplicate ids.
104+
ids = [r["id"] for r in upstream]
105+
assert len(ids) == len(set(ids))
106+
for r in upstream:
107+
assert r["direction"] == "IN"
108+
109+
110+
async def test_impact_downstream_of_entrypoint(indexed_fixture, expected_contract):
111+
from api.mcp.tools.structural import impact_analysis
112+
113+
entry_id = await _find_id(indexed_fixture, "entrypoint")
114+
downstream = await impact_analysis(
115+
symbol_id=entry_id,
116+
project=indexed_fixture.project,
117+
branch=indexed_fixture.branch,
118+
direction="OUT",
119+
depth=5,
120+
)
121+
names = {r["name"] for r in downstream}
122+
expected = set(
123+
expected_contract["impact"]["entrypoint"]["downstream_includes_any_of"]
124+
)
125+
assert names & expected, (
126+
f"entrypoint downstream {names} disjoint from expected {expected}"
127+
)
128+
for r in downstream:
129+
assert r["direction"] == "OUT"
130+
131+
132+
async def test_impact_depth_one_only_immediate_callers(indexed_fixture):
133+
"""depth=1 returns only direct callers — service for db's caller chain,
134+
not transitive ancestors like entrypoint."""
135+
from api.mcp.tools.structural import impact_analysis
136+
137+
db_id = await _find_id(indexed_fixture, "db")
138+
upstream = await impact_analysis(
139+
symbol_id=db_id,
140+
project=indexed_fixture.project,
141+
branch=indexed_fixture.branch,
142+
direction="IN",
143+
depth=1,
144+
)
145+
names = {r["name"] for r in upstream}
146+
# entrypoint is 3 hops away — must NOT appear at depth=1.
147+
assert "entrypoint" not in names
148+
149+
150+
async def test_impact_response_serialisable(indexed_fixture):
151+
from api.mcp.tools.structural import impact_analysis
152+
153+
entry_id = await _find_id(indexed_fixture, "entrypoint")
154+
rows = await impact_analysis(
155+
symbol_id=entry_id,
156+
project=indexed_fixture.project,
157+
branch=indexed_fixture.branch,
158+
direction="OUT",
159+
depth=3,
160+
)
161+
json.dumps(rows)
162+
163+
164+
# ---------------------------------------------------------------------------
165+
# Cycle safety — small purpose-built graph
166+
# ---------------------------------------------------------------------------
167+
168+
169+
@pytest.fixture
170+
async def cycle_graph():
171+
"""Build a tiny graph with a 2-cycle (A↔B) plus an unrelated C → A edge.
172+
173+
Created with a unique branch so it's isolated from the shared
174+
sample-project fixture. Not torn down (matches the pattern used by
175+
``indexed_fixture``).
176+
"""
177+
from api.graph import Graph
178+
179+
project = "impact_cycle_test"
180+
branch = f"cycle-{uuid.uuid4().hex[:8]}"
181+
g = Graph(project, branch=branch)
182+
# CREATE three Function nodes with name + path; add CALLS edges
183+
# A → B, B → A (cycle), C → A.
184+
g.g.query(
185+
"""
186+
CREATE
187+
(a:Function:Searchable {name: 'A', path: '/tmp/cycle.py', src_start: 1}),
188+
(b:Function:Searchable {name: 'B', path: '/tmp/cycle.py', src_start: 2}),
189+
(c:Function:Searchable {name: 'C', path: '/tmp/cycle.py', src_start: 3}),
190+
(a)-[:CALLS]->(b),
191+
(b)-[:CALLS]->(a),
192+
(c)-[:CALLS]->(a)
193+
"""
194+
)
195+
yield project, branch
196+
197+
198+
async def test_impact_handles_cycles(cycle_graph):
199+
"""Variable-depth Cypher with DISTINCT must return each node once
200+
even when the graph has a cycle (A↔B). Without DISTINCT a *...*
201+
traversal would loop infinitely or emit duplicates."""
202+
from api.graph import AsyncGraphQuery
203+
from api.mcp.tools.structural import impact_analysis
204+
205+
project, branch = cycle_graph
206+
207+
# Resolve A's id via Cypher (no Searchable index in this throwaway graph)
208+
g = AsyncGraphQuery(project, branch=branch)
209+
try:
210+
res = await g._query("MATCH (n:Function {name: 'A'}) RETURN ID(n)")
211+
a_id = res.result_set[0][0]
212+
finally:
213+
await g.close()
214+
215+
upstream = await impact_analysis(
216+
symbol_id=a_id,
217+
project=project,
218+
branch=branch,
219+
direction="IN",
220+
depth=5,
221+
)
222+
names = [r["name"] for r in upstream]
223+
# DISTINCT collapses the A->B->A->B->... cycle into single entries.
224+
# B (direct caller) and C (direct caller) must both be present;
225+
# A may also appear because A is reachable from itself through the
226+
# cycle. The crucial guarantee is no duplicates and no infinite loop.
227+
assert "B" in names and "C" in names
228+
assert len(names) == len(set(names)), f"duplicates in {names}"

0 commit comments

Comments
 (0)