Skip to content

Commit 18d3cc7

Browse files
DvirDukhanCopilot
andcommitted
feat(mcp): index_repo tool (T4 #652)
First real MCP tool. Wraps the existing Project / SourceAnalyzer pipeline so AI agents can call `index_repo(path_or_url, branch)` over stdio to populate code-graph for a repo. - `api/mcp/tools/structural.py` (NEW) — registers `index_repo` on the shared FastMCP app. Accepts local paths or git URLs; auto-detects branch from local git checkouts via T17's `detect_branch`; honors `ALLOWED_ANALYSIS_DIR` for sandboxing. Non-git folders are handled by driving SourceAnalyzer directly (Project requires a git repo). - `api/mcp/tools/__init__.py` (NEW) — package marker; importing it registers every tool module's `@app.tool()` decorators. - `api/mcp/server.py` — imports tools at module load so both direct `from api.mcp.server import app` and `cgraph-mcp` stdio entry point see the same tool list. - `tests/mcp/test_index_repo.py` (NEW) — 5 tests: local-path happy path, missing-path error, ALLOWED_ANALYSIS_DIR sandboxing, in-process app registration, JSON serialisability. - `tests/mcp/test_scaffold.py` — replaced the "zero tools" assertion with a presence check for `index_repo` so it stays stable as T5-T8 / T11 add more tools. Return shape: {project_name, branch, graph_name, num_nodes, num_edges, languages_detected, mode} `incremental` parameter is accepted now and forwarded once T18 lands; the current full-reindex path ignores it and always returns `mode="full"`. All 8 tests pass against FalkorDB on 6390. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent a598a74 commit 18d3cc7

5 files changed

Lines changed: 285 additions & 4 deletions

File tree

api/mcp/server.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313

1414
app: FastMCP = FastMCP("code-graph")
1515

16+
# Register tools on import so both direct ``import api.mcp.server`` and the
17+
# stdio entry point see the same tool list. Imported below ``app`` because
18+
# the tool modules need a reference to it.
19+
from . import tools # noqa: F401, E402
20+
1621

1722
def main() -> None:
1823
"""Run the MCP server over stdio.

api/mcp/tools/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"""MCP tool implementations for code-graph.
2+
3+
Each submodule registers tools against the shared FastMCP app exposed by
4+
``api.mcp.server``. Import this package to register all tools.
5+
"""
6+
7+
from . import structural # noqa: F401 (registers tools on import)

api/mcp/tools/structural.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
"""Structural MCP tools (T4-T8).
2+
3+
These tools wrap the existing ``Project`` / ``Graph`` / ``AsyncGraphQuery``
4+
operations so MCP-capable agents (Claude Code, Cursor, Copilot, Cline)
5+
can drive code-graph over the standard stdio transport.
6+
7+
Conventions shared by all tools in this module:
8+
9+
* Every tool accepts an optional ``branch`` so the agent can scope queries
10+
to a specific per-branch graph (see T17, issue #651). When omitted the
11+
branch is either auto-detected from a local checkout (``index_repo``)
12+
or defaults to ``_default``.
13+
* Long-running synchronous operations are pushed into a thread via
14+
``asyncio.get_running_loop().run_in_executor`` so the MCP event loop
15+
stays responsive.
16+
"""
17+
18+
from __future__ import annotations
19+
20+
import asyncio
21+
import logging
22+
import os
23+
from pathlib import Path
24+
from typing import Any, Optional
25+
26+
from ..server import app
27+
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
# ---------------------------------------------------------------------------
33+
# Helpers
34+
# ---------------------------------------------------------------------------
35+
36+
37+
def _looks_like_url(spec: str) -> bool:
38+
"""Return True for HTTP(S) / git URLs, False for local paths."""
39+
return spec.startswith(("http://", "https://", "git@", "ssh://", "git://"))
40+
41+
42+
def _languages_detected(graph) -> list[str]:
43+
"""Best-effort enumeration of distinct ``File.ext`` values.
44+
45+
Returns a sorted list of extension strings (without the leading dot).
46+
Empty when no files were indexed.
47+
"""
48+
try:
49+
rows = graph.g.query(
50+
"MATCH (f:File) RETURN DISTINCT f.ext AS ext"
51+
).result_set
52+
except Exception as e: # pragma: no cover — defensive
53+
logger.warning("languages_detected query failed: %s", e)
54+
return []
55+
seen: set[str] = set()
56+
for row in rows or []:
57+
ext = (row[0] or "").lstrip(".")
58+
if ext:
59+
seen.add(ext)
60+
return sorted(seen)
61+
62+
63+
def _count(graph, label: str) -> int:
64+
try:
65+
rows = graph.g.query(
66+
f"MATCH (n:{label}) RETURN count(n) AS c"
67+
).result_set
68+
return int(rows[0][0]) if rows else 0
69+
except Exception:
70+
return 0
71+
72+
73+
def _count_edges(graph) -> int:
74+
try:
75+
rows = graph.g.query("MATCH ()-[r]->() RETURN count(r) AS c").result_set
76+
return int(rows[0][0]) if rows else 0
77+
except Exception:
78+
return 0
79+
80+
81+
# ---------------------------------------------------------------------------
82+
# T4 — index_repo
83+
# ---------------------------------------------------------------------------
84+
85+
86+
@app.tool(
87+
name="index_repo",
88+
description=(
89+
"Index a code repository into code-graph for subsequent navigation. "
90+
"Accepts a local path or a git URL. When `branch` is omitted, "
91+
"auto-detects the current branch from the local checkout (defaults "
92+
"to '_default' for non-git folders). Returns the indexed graph's "
93+
"node/edge counts, detected languages, and the (project, branch) "
94+
"identity callers should pass to other code-graph tools."
95+
),
96+
)
97+
async def index_repo(
98+
path_or_url: str,
99+
branch: Optional[str] = None,
100+
incremental: bool = True, # accepted now, fully honored once T18 lands
101+
ignore: Optional[list[str]] = None,
102+
) -> dict[str, Any]:
103+
"""Implementation for the ``index_repo`` MCP tool.
104+
105+
Args:
106+
path_or_url: Filesystem path to a local repository **or** a clonable
107+
git URL (``https://...``, ``git@host:...``, ``ssh://...``).
108+
branch: Branch identity for the indexed graph. When ``None``:
109+
auto-detect from the checkout via ``git rev-parse --abbrev-ref
110+
HEAD``; falls back to ``_default`` if not a git checkout.
111+
incremental: Accepted for forward-compatibility with T18; the
112+
current full-reindex path ignores it.
113+
ignore: List of relative paths to skip during analysis.
114+
"""
115+
116+
from api.project import Project, detect_branch
117+
118+
if ignore is None:
119+
ignore = []
120+
121+
loop = asyncio.get_running_loop()
122+
123+
def _do_index() -> dict[str, Any]:
124+
if _looks_like_url(path_or_url):
125+
project = Project.from_git_repository(path_or_url, branch=branch)
126+
else:
127+
local_path = Path(path_or_url).expanduser().resolve()
128+
if not local_path.exists():
129+
raise ValueError(f"path does not exist: {local_path}")
130+
131+
# Reject paths outside the allow-list when one is configured.
132+
allowed_root = os.getenv("ALLOWED_ANALYSIS_DIR")
133+
if allowed_root:
134+
allowed = Path(allowed_root).expanduser().resolve()
135+
try:
136+
local_path.relative_to(allowed)
137+
except ValueError as e:
138+
raise ValueError(
139+
f"path {local_path} is outside ALLOWED_ANALYSIS_DIR={allowed}"
140+
) from e
141+
142+
# Use Project for git-repo paths so commit metadata is saved,
143+
# otherwise drive SourceAnalyzer directly so non-git folders work.
144+
if (local_path / ".git").is_dir():
145+
project = Project.from_local_repository(local_path, branch=branch)
146+
else:
147+
# Synthesize a Project-like object so the return shape is uniform.
148+
from api.analyzers.source_analyzer import SourceAnalyzer
149+
from api.graph import Graph
150+
151+
detected = branch if branch is not None else detect_branch(local_path)
152+
graph = Graph(local_path.name, branch=detected)
153+
analyzer = SourceAnalyzer()
154+
analyzer.analyze_local_folder(str(local_path), graph, ignore)
155+
156+
class _Synth: # tiny shim to mirror Project's surface
157+
name = local_path.name
158+
159+
def __init__(self, g, b):
160+
self.graph = g
161+
self.branch = b
162+
163+
return _payload(_Synth(graph, detected))
164+
165+
project.analyze_sources(ignore)
166+
return _payload(project)
167+
168+
def _payload(project) -> dict[str, Any]:
169+
g = project.graph
170+
return {
171+
"project_name": project.name,
172+
"branch": getattr(project, "branch", None),
173+
"graph_name": g.name,
174+
"num_nodes": (
175+
_count(g, "File") + _count(g, "Class") + _count(g, "Function")
176+
),
177+
"num_edges": _count_edges(g),
178+
"languages_detected": _languages_detected(g),
179+
# T18 will flip this to "incremental" when only changed files
180+
# were re-analyzed.
181+
"mode": "full",
182+
}
183+
184+
return await loop.run_in_executor(None, _do_index)

tests/mcp/test_index_repo.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""T4 — ``index_repo`` MCP tool tests."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
from pathlib import Path
7+
8+
import pytest
9+
10+
11+
pytestmark = pytest.mark.anyio
12+
13+
14+
@pytest.fixture
15+
def anyio_backend() -> str:
16+
return "asyncio"
17+
18+
19+
async def test_index_repo_local_path(sample_project_path: Path, expected_contract):
20+
"""Index a local non-git folder and verify the response shape."""
21+
from api.mcp.tools.structural import index_repo
22+
23+
result = await index_repo(str(sample_project_path), branch="t4-local-test")
24+
25+
assert result["project_name"] == "sample_project"
26+
assert result["branch"] == "t4-local-test"
27+
assert result["graph_name"].startswith("code:sample_project:")
28+
assert result["mode"] == "full"
29+
assert result["num_nodes"] >= sum(expected_contract["counts_min"].values())
30+
assert result["num_edges"] > 0
31+
assert "py" in result["languages_detected"]
32+
33+
34+
async def test_index_repo_rejects_missing_path():
35+
"""Missing local paths surface as a clear ValueError to the agent."""
36+
from api.mcp.tools.structural import index_repo
37+
38+
with pytest.raises(ValueError, match="path does not exist"):
39+
await index_repo("/this/path/definitely/does/not/exist/anywhere")
40+
41+
42+
async def test_index_repo_honors_allowed_analysis_dir(
43+
sample_project_path: Path,
44+
monkeypatch: pytest.MonkeyPatch,
45+
tmp_path: Path,
46+
):
47+
"""Sandboxing: paths outside ALLOWED_ANALYSIS_DIR are rejected."""
48+
from api.mcp.tools import structural
49+
50+
monkeypatch.setenv("ALLOWED_ANALYSIS_DIR", str(tmp_path))
51+
52+
with pytest.raises(ValueError, match="outside ALLOWED_ANALYSIS_DIR"):
53+
await structural.index_repo(str(sample_project_path), branch="t4-sandbox")
54+
55+
56+
async def test_index_repo_registered_via_app():
57+
"""The tool is reachable via ``app.list_tools()`` (protocol parity)."""
58+
from api.mcp.server import app
59+
60+
tools = await app.list_tools()
61+
names = {t.name for t in tools}
62+
assert "index_repo" in names
63+
64+
tool = next(t for t in tools if t.name == "index_repo")
65+
schema = tool.inputSchema
66+
# Description / param schema are surfaced to the agent.
67+
assert "path_or_url" in schema["properties"]
68+
assert "branch" in schema["properties"]
69+
assert "incremental" in schema["properties"]
70+
71+
72+
async def test_index_repo_response_serialises_to_json(
73+
sample_project_path: Path,
74+
):
75+
"""MCP transports JSON — the response dict must be JSON-serialisable."""
76+
from api.mcp.tools.structural import index_repo
77+
78+
result = await index_repo(str(sample_project_path), branch="t4-json-test")
79+
# Must not raise.
80+
json.dumps(result)

tests/mcp/test_scaffold.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,12 @@ def test_main_entry_point_exists() -> None:
4545

4646

4747
@pytest.mark.anyio
48-
async def test_stdio_server_lists_zero_tools() -> None:
48+
async def test_stdio_server_lists_registered_tools() -> None:
4949
"""Spawn ``cgraph-mcp`` over stdio and verify the protocol handshake.
5050
51-
The scaffold registers no tools, so ``list_tools`` must return an
52-
empty list. Tool tickets (T4-T8, T11) extend this expectation.
51+
Once tool tickets land (T4+), ``list_tools`` returns at least the
52+
tools they register. This test only guards the *handshake* — per-tool
53+
behavioural assertions live in the per-tool test modules.
5354
"""
5455
cgraph_mcp = shutil.which("cgraph-mcp")
5556
assert cgraph_mcp is not None, (
@@ -62,4 +63,8 @@ async def test_stdio_server_lists_zero_tools() -> None:
6263
async with ClientSession(read, write) as session:
6364
await session.initialize()
6465
result = await session.list_tools()
65-
assert result.tools == []
66+
# ``index_repo`` lands in T4; this assertion intentionally
67+
# only checks for presence so it stays stable as more tools
68+
# are registered in T5-T8 / T11.
69+
names = {t.name for t in result.tools}
70+
assert "index_repo" in names

0 commit comments

Comments
 (0)