Skip to content

Commit 6b23cb6

Browse files
authored
Merge pull request #682 from FalkorDB/dvirdukhan/mcp-t12-auto-init
feat(mcp): auto-init — ensure FalkorDB + opt-in auto-index (T12)
2 parents 347dd46 + 12c0b0d commit 6b23cb6

3 files changed

Lines changed: 579 additions & 1 deletion

File tree

api/mcp/auto_init.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
"""Zero-config startup helpers for the MCP server (T12).
2+
3+
Two automation behaviours:
4+
5+
1. :func:`ensure_falkordb` — at server boot, ping FalkorDB; if it's
6+
unreachable on a localhost host, shell out to ``cgraph ensure-db``
7+
(a subprocess, so the CLI's JSON stdout can't pollute the MCP
8+
server's own stdio transport) to spin up the Docker container.
9+
10+
2. :func:`maybe_auto_index` — when ``CODE_GRAPH_AUTO_INDEX=true`` is set
11+
(opt-in, off by default), index the current working directory into a
12+
per-branch graph so the agent doesn't have to call ``index_repo``
13+
first. Only indexes when the target graph is empty, stays within
14+
``ALLOWED_ANALYSIS_DIR`` when configured, and is idempotent within a
15+
single process — the second call for the same ``(project, branch)``
16+
is a no-op. The server runs it off the startup path (a daemon thread)
17+
so indexing a large repo never blocks the stdio handshake.
18+
19+
Both are deliberately conservative: ensure-db only acts on localhost
20+
hosts, and auto-index requires explicit opt-in because indexing a
21+
large repo can take minutes.
22+
"""
23+
24+
from __future__ import annotations
25+
26+
import logging
27+
import os
28+
import subprocess
29+
from pathlib import Path
30+
from typing import Iterable, Optional
31+
32+
33+
logger = logging.getLogger(__name__)
34+
35+
36+
_LOCAL_HOSTS = {"localhost", "127.0.0.1", "::1"}
37+
_AUTO_INDEXED: set[tuple[str, str]] = set()
38+
39+
40+
# ---------------------------------------------------------------------------
41+
# ensure_falkordb
42+
# ---------------------------------------------------------------------------
43+
44+
45+
def _falkordb_reachable(host: str, port: int, timeout: float = 1.0) -> bool:
46+
"""Reachability via a Redis ``PING`` — not a bare TCP connect.
47+
48+
A successful socket connection doesn't prove the listener is actually
49+
FalkorDB/Redis (or that it's finished starting up), so we issue a ``PING``
50+
using the same auth env (`FALKORDB_USERNAME`/`FALKORDB_PASSWORD`) the rest
51+
of the app uses; any failure means "not reachable".
52+
"""
53+
import redis
54+
55+
client = None
56+
try:
57+
client = redis.Redis(
58+
host=host,
59+
port=port,
60+
username=os.getenv("FALKORDB_USERNAME") or None,
61+
password=os.getenv("FALKORDB_PASSWORD") or None,
62+
socket_connect_timeout=timeout,
63+
)
64+
return bool(client.ping())
65+
except Exception:
66+
return False
67+
finally:
68+
if client is not None:
69+
try:
70+
client.close()
71+
except Exception:
72+
pass
73+
74+
75+
def ensure_falkordb() -> dict:
76+
"""Make sure FalkorDB is reachable; bootstrap Docker if not.
77+
78+
Returns a small status dict so the caller can log it. Never raises —
79+
the goal is to start the MCP server even if the bootstrap fails;
80+
individual tools will then surface their own errors.
81+
"""
82+
host = os.getenv("FALKORDB_HOST", "localhost")
83+
try:
84+
port = int(os.getenv("FALKORDB_PORT", "6379"))
85+
except ValueError:
86+
return {"status": "error", "message": "invalid FALKORDB_PORT"}
87+
if not 1 <= port <= 65535:
88+
return {
89+
"status": "error",
90+
"message": f"FALKORDB_PORT must be between 1 and 65535, got {port}",
91+
}
92+
93+
if _falkordb_reachable(host, port):
94+
return {"status": "ok", "host": host, "port": port, "action": "none"}
95+
96+
if host not in _LOCAL_HOSTS:
97+
return {
98+
"status": "error",
99+
"host": host,
100+
"port": port,
101+
"message": "FalkorDB unreachable; auto-start only supports localhost",
102+
}
103+
104+
logger.info("FalkorDB unreachable on %s:%s — running `cgraph ensure-db`", host, port)
105+
try:
106+
# Subprocess so the CLI's stdout (which prints JSON) doesn't pollute
107+
# the MCP server's own stdio transport.
108+
result = subprocess.run(
109+
["cgraph", "ensure-db"],
110+
capture_output=True,
111+
text=True,
112+
check=False,
113+
)
114+
except FileNotFoundError:
115+
return {"status": "error", "message": "cgraph CLI not on PATH"}
116+
117+
return {
118+
"status": "ok" if result.returncode == 0 else "error",
119+
"host": host,
120+
"port": port,
121+
"action": "started",
122+
"stdout": result.stdout.strip(),
123+
"stderr": result.stderr.strip(),
124+
}
125+
126+
127+
# ---------------------------------------------------------------------------
128+
# maybe_auto_index
129+
# ---------------------------------------------------------------------------
130+
131+
132+
def _truthy(val: Optional[str]) -> bool:
133+
return (val or "").strip().lower() in {"1", "true", "yes", "on"}
134+
135+
136+
def _detect_branch(cwd: Path) -> str:
137+
"""Best-effort current-branch detection. Falls back to ``_default``."""
138+
try:
139+
result = subprocess.run(
140+
["git", "rev-parse", "--abbrev-ref", "HEAD"],
141+
cwd=str(cwd),
142+
capture_output=True,
143+
text=True,
144+
check=False,
145+
)
146+
if result.returncode == 0:
147+
branch = result.stdout.strip()
148+
# A detached HEAD reports the literal "HEAD"; treat it (and an
149+
# empty result) as the default branch — matching
150+
# api.project.detect_branch — so we never create a graph named
151+
# code:<project>:HEAD.
152+
if branch and branch != "HEAD":
153+
return branch
154+
except FileNotFoundError:
155+
pass
156+
return "_default"
157+
158+
159+
def maybe_auto_index(
160+
cwd: Optional[Path] = None,
161+
*,
162+
project: Optional[str] = None,
163+
branch: Optional[str] = None,
164+
) -> dict:
165+
"""If opt-in env var is set, index ``cwd`` into the per-branch graph.
166+
167+
Caches "already auto-indexed this session" per ``(project, branch)``
168+
in the module-level :data:`_AUTO_INDEXED` set so subsequent calls
169+
are no-ops.
170+
"""
171+
if not _truthy(os.getenv("CODE_GRAPH_AUTO_INDEX")):
172+
return {"status": "skipped", "reason": "CODE_GRAPH_AUTO_INDEX not set"}
173+
174+
cwd_path = (cwd or Path.cwd()).resolve()
175+
176+
# Honor the same sandbox boundary as /api/analyze_folder and the index_repo
177+
# MCP tool: opting in to auto-index must not let the server index a path
178+
# outside the configured allow-list.
179+
allowed_root = os.getenv("ALLOWED_ANALYSIS_DIR")
180+
if allowed_root:
181+
allowed = Path(allowed_root).expanduser().resolve()
182+
try:
183+
cwd_path.relative_to(allowed)
184+
except ValueError:
185+
return {
186+
"status": "skipped",
187+
"reason": f"path {cwd_path} is outside ALLOWED_ANALYSIS_DIR={allowed}",
188+
"path": str(cwd_path),
189+
}
190+
191+
project_name = project or cwd_path.name
192+
branch_name = branch or _detect_branch(cwd_path)
193+
194+
key = (project_name, branch_name)
195+
if key in _AUTO_INDEXED:
196+
return {"status": "skipped", "reason": "already auto-indexed", "key": key}
197+
198+
# Local imports so the MCP server can import this module without paying
199+
# the analyzer-stack import cost at module load.
200+
from api.analyzers.source_analyzer import SourceAnalyzer
201+
from api.graph import Graph, compose_graph_name, graph_exists
202+
203+
# Only auto-index when the target graph is empty. If it already holds data
204+
# (indexed by a previous run or an explicit index_repo) re-indexing would
205+
# add minutes of latency and duplicate writes for no benefit.
206+
graph_name = compose_graph_name(project_name, branch_name)
207+
if graph_exists(graph_name):
208+
try:
209+
populated = Graph(project_name, branch=branch_name).stats().get(
210+
"node_count", 0
211+
) > 0
212+
except Exception:
213+
populated = False
214+
if populated:
215+
_AUTO_INDEXED.add(key)
216+
return {
217+
"status": "skipped",
218+
"reason": "graph already populated",
219+
"project": project_name,
220+
"branch": branch_name,
221+
}
222+
223+
logger.info("Auto-indexing %s @ %s into code:%s:%s", cwd_path, branch_name, project_name, branch_name)
224+
graph = Graph(project_name, branch=branch_name)
225+
SourceAnalyzer().analyze_local_folder(str(cwd_path), graph)
226+
227+
_AUTO_INDEXED.add(key)
228+
return {
229+
"status": "indexed",
230+
"project": project_name,
231+
"branch": branch_name,
232+
"path": str(cwd_path),
233+
}
234+
235+
236+
def reset_auto_index_cache(keys: Optional[Iterable[tuple[str, str]]] = None) -> None:
237+
"""Drop the auto-index session cache. Tests only."""
238+
if keys is None:
239+
_AUTO_INDEXED.clear()
240+
else:
241+
for k in keys:
242+
_AUTO_INDEXED.discard(k)

api/mcp/server.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@
99

1010
from __future__ import annotations
1111

12+
import logging
13+
1214
from mcp.server.fastmcp import FastMCP
1315

16+
logger = logging.getLogger(__name__)
17+
1418
app: FastMCP = FastMCP("code-graph")
1519

1620
# Register tools on import so both direct ``import api.mcp.server`` and the
@@ -19,11 +23,43 @@
1923
from . import tools # noqa: F401, E402
2024

2125

26+
def _start_background_auto_index() -> None:
27+
"""Run opt-in auto-index off the startup path.
28+
29+
Indexing a large repo can take minutes; doing it synchronously before
30+
``app.run`` would block the MCP stdio handshake until it finished. A daemon
31+
thread keeps the server responsive immediately — the analyzer logs to
32+
stderr only, so it can't corrupt the stdio JSON-RPC stream. ``maybe_auto_index``
33+
is a no-op when ``CODE_GRAPH_AUTO_INDEX`` is unset and caches success so the
34+
work happens at most once per ``(project, branch)``.
35+
"""
36+
import threading
37+
38+
from .auto_init import maybe_auto_index
39+
40+
def _run() -> None:
41+
try:
42+
maybe_auto_index()
43+
except Exception: # never let a background failure take down the server
44+
logger.exception("background auto-index failed")
45+
46+
threading.Thread(target=_run, name="cgraph-auto-index", daemon=True).start()
47+
48+
2249
def main() -> None:
2350
"""Run the MCP server over stdio.
2451
25-
Console-script entry point for ``cgraph-mcp``.
52+
Console-script entry point for ``cgraph-mcp``. Ensures FalkorDB is
53+
reachable (bootstrapping the Docker container if needed) before
54+
serving, then kicks off opt-in auto-indexing (via
55+
``CODE_GRAPH_AUTO_INDEX``) in the background so a freshly-cloned user
56+
gets an indexed CWD without manual ``index_repo`` — without blocking
57+
the stdio handshake.
2658
"""
59+
from .auto_init import ensure_falkordb
60+
61+
ensure_falkordb()
62+
_start_background_auto_index()
2763
app.run(transport="stdio")
2864

2965

0 commit comments

Comments
 (0)