Skip to content

Commit 5e376e6

Browse files
DvirDukhanCopilot
andcommitted
feat(mcp): auto-init — ensure FalkorDB + opt-in auto-index (T12 #660)
Zero-config startup so a fresh user doesn't need to run `cgraph ensure-db` and `index_repo` manually. api/mcp/auto_init.py - ensure_falkordb(): on server boot, ping FalkorDB; if unreachable on a localhost host, shell out to `cgraph ensure-db` (reuses the existing CLI's Docker bootstrap rather than duplicating it). Subprocess (not in-process call) so the CLI's stdout JSON doesn't pollute the MCP server's stdio transport. Never raises — server start continues even on bootstrap failure so individual tools can surface their own errors. - maybe_auto_index(cwd=None, project=None, branch=None): opt-in via CODE_GRAPH_AUTO_INDEX env var (off by default — indexing a large repo can take minutes and surprising the user on first call is bad UX). Detects current branch via `git rev-parse`, falls back to `_default`. Per-(project, branch) idempotency via a module-level set; second call for the same key is a no-op. - _truthy helper accepts 1/true/yes/on (case insensitive). api/mcp/server.py - main() now runs ensure_falkordb() and maybe_auto_index() before app.run(). Module-level import behaviour unchanged (tests that `import api.mcp.server` don't trigger any I/O). tests/mcp/test_auto_init.py (9 tests) - ensure_falkordb: no-op when reachable, runs cgraph when not, skips Docker for remote hosts, handles missing CLI binary. - maybe_auto_index: skipped when env unset, indexes when opt-in, idempotent across calls for same key, distinct branches each get one auto-index, _truthy semantics. All mocks — no Docker, no real FalkorDB writes — so the tests run in <2s without external dependencies. Out of scope per ticket: watch mode / re-indexing on FS change, auto-pulling Docker image (cgraph ensure-db handles that), cross- session state. Closes #660. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent a3b3206 commit 5e376e6

3 files changed

Lines changed: 349 additions & 1 deletion

File tree

api/mcp/auto_init.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
"""Zero-config startup helpers for the MCP server (T12).
2+
3+
Two automation behaviours:
4+
5+
1. :func:`ensure_falkordb` — at server boot, ping FalkorDB; if it's
6+
unreachable on a localhost host, run ``cgraph ensure-db`` to spin up
7+
the existing Docker container. Reuses ``api.cli.ensure_db`` rather
8+
than duplicating Docker logic.
9+
10+
2. :func:`maybe_auto_index` — when ``CODE_GRAPH_AUTO_INDEX=true`` is set
11+
(opt-in, off by default), index the current working directory into a
12+
per-branch graph so the agent doesn't have to call ``index_repo``
13+
first. Idempotent within a single process — the second call for the
14+
same ``(project, branch)`` is a no-op.
15+
16+
Both are deliberately conservative: ensure-db only acts on localhost
17+
hosts, and auto-index requires explicit opt-in because indexing a
18+
large repo can take minutes and surprising the user with that on
19+
first tool call is bad UX.
20+
"""
21+
22+
from __future__ import annotations
23+
24+
import logging
25+
import os
26+
import socket
27+
import subprocess
28+
from pathlib import Path
29+
from typing import Iterable, Optional
30+
31+
32+
logger = logging.getLogger(__name__)
33+
34+
35+
_LOCAL_HOSTS = {"localhost", "127.0.0.1", "::1"}
36+
_AUTO_INDEXED: set[tuple[str, str]] = set()
37+
38+
39+
# ---------------------------------------------------------------------------
40+
# ensure_falkordb
41+
# ---------------------------------------------------------------------------
42+
43+
44+
def _falkordb_reachable(host: str, port: int, timeout: float = 1.0) -> bool:
45+
try:
46+
with socket.create_connection((host, port), timeout=timeout):
47+
return True
48+
except OSError:
49+
return False
50+
51+
52+
def ensure_falkordb() -> dict:
53+
"""Make sure FalkorDB is reachable; bootstrap Docker if not.
54+
55+
Returns a small status dict so the caller can log it. Never raises —
56+
the goal is to start the MCP server even if the bootstrap fails;
57+
individual tools will then surface their own errors.
58+
"""
59+
host = os.getenv("FALKORDB_HOST", "localhost")
60+
try:
61+
port = int(os.getenv("FALKORDB_PORT", "6379"))
62+
except ValueError:
63+
return {"status": "error", "message": "invalid FALKORDB_PORT"}
64+
65+
if _falkordb_reachable(host, port):
66+
return {"status": "ok", "host": host, "port": port, "action": "none"}
67+
68+
if host not in _LOCAL_HOSTS:
69+
return {
70+
"status": "error",
71+
"host": host,
72+
"port": port,
73+
"message": "FalkorDB unreachable; auto-start only supports localhost",
74+
}
75+
76+
logger.info("FalkorDB unreachable on %s:%s — running `cgraph ensure-db`", host, port)
77+
try:
78+
# Subprocess so the CLI's stdout (which prints JSON) doesn't pollute
79+
# the MCP server's own stdio transport.
80+
result = subprocess.run(
81+
["cgraph", "ensure-db"],
82+
capture_output=True,
83+
text=True,
84+
check=False,
85+
)
86+
except FileNotFoundError:
87+
return {"status": "error", "message": "cgraph CLI not on PATH"}
88+
89+
return {
90+
"status": "ok" if result.returncode == 0 else "error",
91+
"host": host,
92+
"port": port,
93+
"action": "started",
94+
"stdout": result.stdout.strip(),
95+
"stderr": result.stderr.strip(),
96+
}
97+
98+
99+
# ---------------------------------------------------------------------------
100+
# maybe_auto_index
101+
# ---------------------------------------------------------------------------
102+
103+
104+
def _truthy(val: Optional[str]) -> bool:
105+
return (val or "").strip().lower() in {"1", "true", "yes", "on"}
106+
107+
108+
def _detect_branch(cwd: Path) -> str:
109+
"""Best-effort current-branch detection. Falls back to ``_default``."""
110+
try:
111+
result = subprocess.run(
112+
["git", "rev-parse", "--abbrev-ref", "HEAD"],
113+
cwd=str(cwd),
114+
capture_output=True,
115+
text=True,
116+
check=False,
117+
)
118+
if result.returncode == 0 and result.stdout.strip():
119+
return result.stdout.strip()
120+
except FileNotFoundError:
121+
pass
122+
return "_default"
123+
124+
125+
def maybe_auto_index(
126+
cwd: Optional[Path] = None,
127+
*,
128+
project: Optional[str] = None,
129+
branch: Optional[str] = None,
130+
) -> dict:
131+
"""If opt-in env var is set, index ``cwd`` into the per-branch graph.
132+
133+
Caches "already auto-indexed this session" per ``(project, branch)``
134+
in the module-level :data:`_AUTO_INDEXED` set so subsequent calls
135+
are no-ops.
136+
"""
137+
if not _truthy(os.getenv("CODE_GRAPH_AUTO_INDEX")):
138+
return {"status": "skipped", "reason": "CODE_GRAPH_AUTO_INDEX not set"}
139+
140+
cwd_path = (cwd or Path.cwd()).resolve()
141+
project_name = project or cwd_path.name
142+
branch_name = branch or _detect_branch(cwd_path)
143+
144+
key = (project_name, branch_name)
145+
if key in _AUTO_INDEXED:
146+
return {"status": "skipped", "reason": "already auto-indexed", "key": key}
147+
148+
# Local imports so the MCP server can import this module without paying
149+
# the analyzer-stack import cost at module load.
150+
from api.analyzers.source_analyzer import SourceAnalyzer
151+
from api.graph import Graph
152+
153+
logger.info("Auto-indexing %s @ %s into code:%s:%s", cwd_path, branch_name, project_name, branch_name)
154+
graph = Graph(project_name, branch=branch_name)
155+
SourceAnalyzer().analyze_local_folder(str(cwd_path), graph)
156+
157+
_AUTO_INDEXED.add(key)
158+
return {
159+
"status": "indexed",
160+
"project": project_name,
161+
"branch": branch_name,
162+
"path": str(cwd_path),
163+
}
164+
165+
166+
def reset_auto_index_cache(keys: Optional[Iterable[tuple[str, str]]] = None) -> None:
167+
"""Drop the auto-index session cache. Tests only."""
168+
if keys is None:
169+
_AUTO_INDEXED.clear()
170+
else:
171+
for k in keys:
172+
_AUTO_INDEXED.discard(k)

api/mcp/server.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,16 @@
2222
def main() -> None:
2323
"""Run the MCP server over stdio.
2424
25-
Console-script entry point for ``cgraph-mcp``.
25+
Console-script entry point for ``cgraph-mcp``. Runs the T12
26+
auto-init helpers first so a freshly-cloned user gets a working
27+
FalkorDB without manual `cgraph ensure-db`, and (opt-in via
28+
``CODE_GRAPH_AUTO_INDEX``) an indexed CWD without manual
29+
`index_repo`.
2630
"""
31+
from .auto_init import ensure_falkordb, maybe_auto_index
32+
33+
ensure_falkordb()
34+
maybe_auto_index()
2735
app.run(transport="stdio")
2836

2937

tests/mcp/test_auto_init.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
"""T12 — auto_init tests (mocked subprocess / graph)."""
2+
3+
from __future__ import annotations
4+
5+
from pathlib import Path
6+
from unittest.mock import MagicMock, patch
7+
8+
import pytest
9+
10+
11+
# ---------------------------------------------------------------------------
12+
# ensure_falkordb
13+
# ---------------------------------------------------------------------------
14+
15+
16+
def test_ensure_falkordb_no_action_when_reachable(monkeypatch):
17+
from api.mcp import auto_init
18+
19+
monkeypatch.setenv("FALKORDB_HOST", "localhost")
20+
monkeypatch.setenv("FALKORDB_PORT", "6379")
21+
22+
with patch.object(auto_init, "_falkordb_reachable", return_value=True), \
23+
patch("api.mcp.auto_init.subprocess.run") as mock_run:
24+
status = auto_init.ensure_falkordb()
25+
26+
assert status["status"] == "ok"
27+
assert status["action"] == "none"
28+
mock_run.assert_not_called()
29+
30+
31+
def test_ensure_falkordb_runs_cgraph_when_unreachable(monkeypatch):
32+
from api.mcp import auto_init
33+
34+
monkeypatch.setenv("FALKORDB_HOST", "localhost")
35+
monkeypatch.setenv("FALKORDB_PORT", "6379")
36+
37+
fake_result = MagicMock(returncode=0, stdout="ok", stderr="")
38+
with patch.object(auto_init, "_falkordb_reachable", return_value=False), \
39+
patch("api.mcp.auto_init.subprocess.run", return_value=fake_result) as mock_run:
40+
status = auto_init.ensure_falkordb()
41+
42+
assert status["status"] == "ok"
43+
assert status["action"] == "started"
44+
mock_run.assert_called_once()
45+
args = mock_run.call_args.args[0]
46+
assert args == ["cgraph", "ensure-db"]
47+
48+
49+
def test_ensure_falkordb_skips_docker_for_remote_host(monkeypatch):
50+
"""Auto-start is localhost-only by design."""
51+
from api.mcp import auto_init
52+
53+
monkeypatch.setenv("FALKORDB_HOST", "graph.example.com")
54+
monkeypatch.setenv("FALKORDB_PORT", "6379")
55+
56+
with patch.object(auto_init, "_falkordb_reachable", return_value=False), \
57+
patch("api.mcp.auto_init.subprocess.run") as mock_run:
58+
status = auto_init.ensure_falkordb()
59+
60+
assert status["status"] == "error"
61+
assert "localhost" in status["message"]
62+
mock_run.assert_not_called()
63+
64+
65+
def test_ensure_falkordb_handles_missing_cli(monkeypatch):
66+
from api.mcp import auto_init
67+
68+
monkeypatch.setenv("FALKORDB_HOST", "localhost")
69+
monkeypatch.setenv("FALKORDB_PORT", "6379")
70+
71+
with patch.object(auto_init, "_falkordb_reachable", return_value=False), \
72+
patch("api.mcp.auto_init.subprocess.run", side_effect=FileNotFoundError):
73+
status = auto_init.ensure_falkordb()
74+
75+
assert status["status"] == "error"
76+
assert "PATH" in status["message"]
77+
78+
79+
# ---------------------------------------------------------------------------
80+
# maybe_auto_index
81+
# ---------------------------------------------------------------------------
82+
83+
84+
@pytest.fixture(autouse=True)
85+
def _reset_cache():
86+
from api.mcp.auto_init import reset_auto_index_cache
87+
88+
reset_auto_index_cache()
89+
yield
90+
reset_auto_index_cache()
91+
92+
93+
def test_maybe_auto_index_skipped_when_env_unset(monkeypatch, tmp_path):
94+
from api.mcp import auto_init
95+
96+
monkeypatch.delenv("CODE_GRAPH_AUTO_INDEX", raising=False)
97+
98+
with patch.object(auto_init, "SourceAnalyzer", None, create=True):
99+
status = auto_init.maybe_auto_index(cwd=tmp_path)
100+
101+
assert status["status"] == "skipped"
102+
assert "CODE_GRAPH_AUTO_INDEX" in status["reason"]
103+
104+
105+
def test_maybe_auto_index_indexes_when_opt_in(monkeypatch, tmp_path):
106+
from api.mcp import auto_init
107+
108+
monkeypatch.setenv("CODE_GRAPH_AUTO_INDEX", "true")
109+
110+
fake_analyzer_instance = MagicMock()
111+
fake_graph_instance = MagicMock()
112+
with patch("api.analyzers.source_analyzer.SourceAnalyzer", return_value=fake_analyzer_instance), \
113+
patch("api.graph.Graph", return_value=fake_graph_instance), \
114+
patch.object(auto_init, "_detect_branch", return_value="main"):
115+
status = auto_init.maybe_auto_index(cwd=tmp_path, project="myproj")
116+
117+
assert status["status"] == "indexed"
118+
assert status["project"] == "myproj"
119+
assert status["branch"] == "main"
120+
fake_analyzer_instance.analyze_local_folder.assert_called_once()
121+
122+
123+
def test_maybe_auto_index_idempotent(monkeypatch, tmp_path):
124+
"""Second call for the same (project, branch) is a no-op."""
125+
from api.mcp import auto_init
126+
127+
monkeypatch.setenv("CODE_GRAPH_AUTO_INDEX", "1")
128+
129+
fake_analyzer = MagicMock()
130+
with patch("api.analyzers.source_analyzer.SourceAnalyzer", return_value=fake_analyzer), \
131+
patch("api.graph.Graph", return_value=MagicMock()), \
132+
patch.object(auto_init, "_detect_branch", return_value="main"):
133+
first = auto_init.maybe_auto_index(cwd=tmp_path, project="myproj")
134+
second = auto_init.maybe_auto_index(cwd=tmp_path, project="myproj")
135+
136+
assert first["status"] == "indexed"
137+
assert second["status"] == "skipped"
138+
assert "already" in second["reason"]
139+
# Critical: the analyzer was invoked exactly once.
140+
assert fake_analyzer.analyze_local_folder.call_count == 1
141+
142+
143+
def test_maybe_auto_index_per_branch(monkeypatch, tmp_path):
144+
"""Different branches under the same project each get one auto-index."""
145+
from api.mcp import auto_init
146+
147+
monkeypatch.setenv("CODE_GRAPH_AUTO_INDEX", "yes")
148+
149+
fake_analyzer = MagicMock()
150+
with patch("api.analyzers.source_analyzer.SourceAnalyzer", return_value=fake_analyzer), \
151+
patch("api.graph.Graph", return_value=MagicMock()):
152+
a = auto_init.maybe_auto_index(cwd=tmp_path, project="p", branch="main")
153+
b = auto_init.maybe_auto_index(cwd=tmp_path, project="p", branch="feature-x")
154+
c = auto_init.maybe_auto_index(cwd=tmp_path, project="p", branch="main")
155+
156+
assert a["status"] == "indexed"
157+
assert b["status"] == "indexed"
158+
assert c["status"] == "skipped"
159+
assert fake_analyzer.analyze_local_folder.call_count == 2
160+
161+
162+
def test_truthy_helper():
163+
from api.mcp.auto_init import _truthy
164+
165+
for v in ("1", "true", "TRUE", "yes", "YES", "on"):
166+
assert _truthy(v)
167+
for v in ("", "0", "false", "no", "off", None):
168+
assert not _truthy(v)

0 commit comments

Comments
 (0)