|
| 1 | +"""Zero-config startup helpers for the MCP server (T12). |
| 2 | +
|
| 3 | +Two automation behaviours: |
| 4 | +
|
| 5 | +1. :func:`ensure_falkordb` — at server boot, ping FalkorDB; if it's |
| 6 | + unreachable on a localhost host, shell out to ``cgraph ensure-db`` |
| 7 | + (a subprocess, so the CLI's JSON stdout can't pollute the MCP |
| 8 | + server's own stdio transport) to spin up the Docker container. |
| 9 | +
|
| 10 | +2. :func:`maybe_auto_index` — when ``CODE_GRAPH_AUTO_INDEX=true`` is set |
| 11 | + (opt-in, off by default), index the current working directory into a |
| 12 | + per-branch graph so the agent doesn't have to call ``index_repo`` |
| 13 | + first. Only indexes when the target graph is empty, stays within |
| 14 | + ``ALLOWED_ANALYSIS_DIR`` when configured, and is idempotent within a |
| 15 | + single process — the second call for the same ``(project, branch)`` |
| 16 | + is a no-op. The server runs it off the startup path (a daemon thread) |
| 17 | + so indexing a large repo never blocks the stdio handshake. |
| 18 | +
|
| 19 | +Both are deliberately conservative: ensure-db only acts on localhost |
| 20 | +hosts, and auto-index requires explicit opt-in because indexing a |
| 21 | +large repo can take minutes. |
| 22 | +""" |
| 23 | + |
| 24 | +from __future__ import annotations |
| 25 | + |
| 26 | +import logging |
| 27 | +import os |
| 28 | +import subprocess |
| 29 | +from pathlib import Path |
| 30 | +from typing import Iterable, Optional |
| 31 | + |
| 32 | + |
| 33 | +logger = logging.getLogger(__name__) |
| 34 | + |
| 35 | + |
| 36 | +_LOCAL_HOSTS = {"localhost", "127.0.0.1", "::1"} |
| 37 | +_AUTO_INDEXED: set[tuple[str, str]] = set() |
| 38 | + |
| 39 | + |
| 40 | +# --------------------------------------------------------------------------- |
| 41 | +# ensure_falkordb |
| 42 | +# --------------------------------------------------------------------------- |
| 43 | + |
| 44 | + |
| 45 | +def _falkordb_reachable(host: str, port: int, timeout: float = 1.0) -> bool: |
| 46 | + """Reachability via a Redis ``PING`` — not a bare TCP connect. |
| 47 | +
|
| 48 | + A successful socket connection doesn't prove the listener is actually |
| 49 | + FalkorDB/Redis (or that it's finished starting up), so we issue a ``PING`` |
| 50 | + using the same auth env (`FALKORDB_USERNAME`/`FALKORDB_PASSWORD`) the rest |
| 51 | + of the app uses; any failure means "not reachable". |
| 52 | + """ |
| 53 | + import redis |
| 54 | + |
| 55 | + client = None |
| 56 | + try: |
| 57 | + client = redis.Redis( |
| 58 | + host=host, |
| 59 | + port=port, |
| 60 | + username=os.getenv("FALKORDB_USERNAME") or None, |
| 61 | + password=os.getenv("FALKORDB_PASSWORD") or None, |
| 62 | + socket_connect_timeout=timeout, |
| 63 | + ) |
| 64 | + return bool(client.ping()) |
| 65 | + except Exception: |
| 66 | + return False |
| 67 | + finally: |
| 68 | + if client is not None: |
| 69 | + try: |
| 70 | + client.close() |
| 71 | + except Exception: |
| 72 | + pass |
| 73 | + |
| 74 | + |
| 75 | +def ensure_falkordb() -> dict: |
| 76 | + """Make sure FalkorDB is reachable; bootstrap Docker if not. |
| 77 | +
|
| 78 | + Returns a small status dict so the caller can log it. Never raises — |
| 79 | + the goal is to start the MCP server even if the bootstrap fails; |
| 80 | + individual tools will then surface their own errors. |
| 81 | + """ |
| 82 | + host = os.getenv("FALKORDB_HOST", "localhost") |
| 83 | + try: |
| 84 | + port = int(os.getenv("FALKORDB_PORT", "6379")) |
| 85 | + except ValueError: |
| 86 | + return {"status": "error", "message": "invalid FALKORDB_PORT"} |
| 87 | + if not 1 <= port <= 65535: |
| 88 | + return { |
| 89 | + "status": "error", |
| 90 | + "message": f"FALKORDB_PORT must be between 1 and 65535, got {port}", |
| 91 | + } |
| 92 | + |
| 93 | + if _falkordb_reachable(host, port): |
| 94 | + return {"status": "ok", "host": host, "port": port, "action": "none"} |
| 95 | + |
| 96 | + if host not in _LOCAL_HOSTS: |
| 97 | + return { |
| 98 | + "status": "error", |
| 99 | + "host": host, |
| 100 | + "port": port, |
| 101 | + "message": "FalkorDB unreachable; auto-start only supports localhost", |
| 102 | + } |
| 103 | + |
| 104 | + logger.info("FalkorDB unreachable on %s:%s — running `cgraph ensure-db`", host, port) |
| 105 | + try: |
| 106 | + # Subprocess so the CLI's stdout (which prints JSON) doesn't pollute |
| 107 | + # the MCP server's own stdio transport. |
| 108 | + result = subprocess.run( |
| 109 | + ["cgraph", "ensure-db"], |
| 110 | + capture_output=True, |
| 111 | + text=True, |
| 112 | + check=False, |
| 113 | + ) |
| 114 | + except FileNotFoundError: |
| 115 | + return {"status": "error", "message": "cgraph CLI not on PATH"} |
| 116 | + |
| 117 | + return { |
| 118 | + "status": "ok" if result.returncode == 0 else "error", |
| 119 | + "host": host, |
| 120 | + "port": port, |
| 121 | + "action": "started", |
| 122 | + "stdout": result.stdout.strip(), |
| 123 | + "stderr": result.stderr.strip(), |
| 124 | + } |
| 125 | + |
| 126 | + |
| 127 | +# --------------------------------------------------------------------------- |
| 128 | +# maybe_auto_index |
| 129 | +# --------------------------------------------------------------------------- |
| 130 | + |
| 131 | + |
| 132 | +def _truthy(val: Optional[str]) -> bool: |
| 133 | + return (val or "").strip().lower() in {"1", "true", "yes", "on"} |
| 134 | + |
| 135 | + |
| 136 | +def _detect_branch(cwd: Path) -> str: |
| 137 | + """Best-effort current-branch detection. Falls back to ``_default``.""" |
| 138 | + try: |
| 139 | + result = subprocess.run( |
| 140 | + ["git", "rev-parse", "--abbrev-ref", "HEAD"], |
| 141 | + cwd=str(cwd), |
| 142 | + capture_output=True, |
| 143 | + text=True, |
| 144 | + check=False, |
| 145 | + ) |
| 146 | + if result.returncode == 0: |
| 147 | + branch = result.stdout.strip() |
| 148 | + # A detached HEAD reports the literal "HEAD"; treat it (and an |
| 149 | + # empty result) as the default branch — matching |
| 150 | + # api.project.detect_branch — so we never create a graph named |
| 151 | + # code:<project>:HEAD. |
| 152 | + if branch and branch != "HEAD": |
| 153 | + return branch |
| 154 | + except FileNotFoundError: |
| 155 | + pass |
| 156 | + return "_default" |
| 157 | + |
| 158 | + |
| 159 | +def maybe_auto_index( |
| 160 | + cwd: Optional[Path] = None, |
| 161 | + *, |
| 162 | + project: Optional[str] = None, |
| 163 | + branch: Optional[str] = None, |
| 164 | +) -> dict: |
| 165 | + """If opt-in env var is set, index ``cwd`` into the per-branch graph. |
| 166 | +
|
| 167 | + Caches "already auto-indexed this session" per ``(project, branch)`` |
| 168 | + in the module-level :data:`_AUTO_INDEXED` set so subsequent calls |
| 169 | + are no-ops. |
| 170 | + """ |
| 171 | + if not _truthy(os.getenv("CODE_GRAPH_AUTO_INDEX")): |
| 172 | + return {"status": "skipped", "reason": "CODE_GRAPH_AUTO_INDEX not set"} |
| 173 | + |
| 174 | + cwd_path = (cwd or Path.cwd()).resolve() |
| 175 | + |
| 176 | + # Honor the same sandbox boundary as /api/analyze_folder and the index_repo |
| 177 | + # MCP tool: opting in to auto-index must not let the server index a path |
| 178 | + # outside the configured allow-list. |
| 179 | + allowed_root = os.getenv("ALLOWED_ANALYSIS_DIR") |
| 180 | + if allowed_root: |
| 181 | + allowed = Path(allowed_root).expanduser().resolve() |
| 182 | + try: |
| 183 | + cwd_path.relative_to(allowed) |
| 184 | + except ValueError: |
| 185 | + return { |
| 186 | + "status": "skipped", |
| 187 | + "reason": f"path {cwd_path} is outside ALLOWED_ANALYSIS_DIR={allowed}", |
| 188 | + "path": str(cwd_path), |
| 189 | + } |
| 190 | + |
| 191 | + project_name = project or cwd_path.name |
| 192 | + branch_name = branch or _detect_branch(cwd_path) |
| 193 | + |
| 194 | + key = (project_name, branch_name) |
| 195 | + if key in _AUTO_INDEXED: |
| 196 | + return {"status": "skipped", "reason": "already auto-indexed", "key": key} |
| 197 | + |
| 198 | + # Local imports so the MCP server can import this module without paying |
| 199 | + # the analyzer-stack import cost at module load. |
| 200 | + from api.analyzers.source_analyzer import SourceAnalyzer |
| 201 | + from api.graph import Graph, compose_graph_name, graph_exists |
| 202 | + |
| 203 | + # Only auto-index when the target graph is empty. If it already holds data |
| 204 | + # (indexed by a previous run or an explicit index_repo) re-indexing would |
| 205 | + # add minutes of latency and duplicate writes for no benefit. |
| 206 | + graph_name = compose_graph_name(project_name, branch_name) |
| 207 | + if graph_exists(graph_name): |
| 208 | + try: |
| 209 | + populated = Graph(project_name, branch=branch_name).stats().get( |
| 210 | + "node_count", 0 |
| 211 | + ) > 0 |
| 212 | + except Exception: |
| 213 | + populated = False |
| 214 | + if populated: |
| 215 | + _AUTO_INDEXED.add(key) |
| 216 | + return { |
| 217 | + "status": "skipped", |
| 218 | + "reason": "graph already populated", |
| 219 | + "project": project_name, |
| 220 | + "branch": branch_name, |
| 221 | + } |
| 222 | + |
| 223 | + logger.info("Auto-indexing %s @ %s into code:%s:%s", cwd_path, branch_name, project_name, branch_name) |
| 224 | + graph = Graph(project_name, branch=branch_name) |
| 225 | + SourceAnalyzer().analyze_local_folder(str(cwd_path), graph) |
| 226 | + |
| 227 | + _AUTO_INDEXED.add(key) |
| 228 | + return { |
| 229 | + "status": "indexed", |
| 230 | + "project": project_name, |
| 231 | + "branch": branch_name, |
| 232 | + "path": str(cwd_path), |
| 233 | + } |
| 234 | + |
| 235 | + |
| 236 | +def reset_auto_index_cache(keys: Optional[Iterable[tuple[str, str]]] = None) -> None: |
| 237 | + """Drop the auto-index session cache. Tests only.""" |
| 238 | + if keys is None: |
| 239 | + _AUTO_INDEXED.clear() |
| 240 | + else: |
| 241 | + for k in keys: |
| 242 | + _AUTO_INDEXED.discard(k) |
0 commit comments