diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index ae95518d4..cf9b6b295 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -353,6 +353,12 @@ def _strip_yaml_frontmatter(content: str) -> str: "only exposes serve / migrate / secret-set / prune-orphans / doctor, so do " "NOT shell out via subprocess to 'remember' something. " "(3) The config key is the top-level `mcp_servers`, never `mcp.servers`.\n" + "Scope `index_paths` to FOCUSED Markdown roots (e.g. ./docs or one project " + "subdir), NEVER to huge or system trees like /root, /home, $HOME, /tmp, or a " + "whole repo — indexing thousands of files bloats the vector index and crashes " + "the MCP (observed on prod: ~10k chunks triggered a LanceDB re-sync crash and " + "timeout). Index a few hundred files at most; for durable knowledge prefer " + "`mcp_tqmemory_remember_note` over wholesale indexing.\n" "If the `mcp_tqmemory_*` tools are absent in a session, memory is simply " "unavailable there — say so and re-enable it with `hermes mcp add tqmemory " "...`; do NOT fake persistence by writing files or editing config by hand." diff --git a/hermes_cli/tqmemory_setup.py b/hermes_cli/tqmemory_setup.py index 55ca9194e..e20b858da 100644 --- a/hermes_cli/tqmemory_setup.py +++ b/hermes_cli/tqmemory_setup.py @@ -157,7 +157,23 @@ def ensure_turbo_memory_installed(quiet: bool = False) -> Optional[str]: existing = resolve_binary() if existing: # Best-effort upgrade; never fail the caller on a network hiccup. - _run([uv, "tool", "upgrade", BINARY], _UPGRADE_TIMEOUT) + # + # rev-pin trap: if a PRIOR install pinned the receipt to a concrete git + # rev (observed on prod: rev=v0.17.0), `uv tool upgrade` re-resolves to + # that SAME rev and never jumps to a newer commit — the install stays + # silently stale. REPO_SPEC is intentionally unpinned (no @rev) so a + # reinstall floats to the branch HEAD. We try the cheap upgrade first + # (fast on the common, already-latest case) and only fall back to a + # `--reinstall` against the unpinned spec when the upgrade reported no + # change ("Nothing to upgrade" / non-zero) — that re-pins the receipt to + # the unpinned spec and breaks the rev-pin trap without slowing the + # normal path. + up = _run([uv, "tool", "upgrade", BINARY], _UPGRADE_TIMEOUT) + out = (up.stdout or "") + (up.stderr or "") + upgrade_had_effect = up.returncode == 0 and "Nothing to upgrade" not in out + if not upgrade_had_effect: + # Re-resolve from the unpinned REPO_SPEC to escape a rev-pinned receipt. + _run([uv, "tool", "install", "--reinstall", REPO_SPEC], _INSTALL_TIMEOUT) return resolve_binary() or existing _emit(quiet, "🧠 Installing Turbo-Quant Memory MCP (one-time, may take a minute)…") @@ -177,10 +193,21 @@ def ensure_turbo_memory_installed(quiet: bool = False) -> Optional[str]: # --------------------------------------------------------------------------- def _build_entry(tqm_path: str) -> dict: + # Pin TQMEMORY_PROJECT_ROOT to a STABLE root (HERMES_HOME, fallback ~/.hermes) + # so turbo_quant_memory derives a single, cwd-independent project_id. Without + # it the project_id tracks the process cwd and memory fragments into multiple + # buckets (observed on prod: /root vs /root/.hermes). + hermes_home = os.path.expanduser(os.environ.get("HERMES_HOME", "~/.hermes")) + env = dict(_SERVER_ENV) + env.setdefault("TQMEMORY_PROJECT_ROOT", hermes_home) return { "command": tqm_path, "args": ["serve"], - "env": dict(_SERVER_ENV), + "env": env, + # First semantic_search loads a ~600MB embedding model; re-syncs can be + # slow. Give this server a generous per-call timeout (read per-server by + # tools/mcp_tool.py) without touching the global MCP default. + "timeout": 600, "enabled": True, } @@ -219,15 +246,21 @@ def _register_in_config_file(config_path: Path, tqm_path: str) -> bool: if isinstance(existing, dict): # Already registered. Leave a user-disabled entry (enabled: false) # untouched so we respect intent. Otherwise repair anything that drifted: - # a stale absolute command path OR a missing migrate-on-startup env. + # a stale absolute command path, a missing migrate-on-startup env, a + # missing stable project root, or a missing per-server timeout. Repairing + # the project root on EXISTING installs (not just fresh ones) is what lets + # `hermes update` heal client installs whose memory fragmented by cwd. if existing.get("enabled") is False: return False + canonical = _build_entry(tqm_path) env = existing.get("env") already_correct = ( existing.get("command") == tqm_path and existing.get("args") == ["serve"] and isinstance(env, dict) and env.get("TQMEMORY_MIGRATE_ON_STARTUP") == "1" + and env.get("TQMEMORY_PROJECT_ROOT") == canonical["env"]["TQMEMORY_PROJECT_ROOT"] + and existing.get("timeout") == canonical["timeout"] ) if already_correct: return False @@ -236,7 +269,11 @@ def _register_in_config_file(config_path: Path, tqm_path: str) -> bool: if not isinstance(env, dict): env = {} env.setdefault("TQMEMORY_MIGRATE_ON_STARTUP", "1") + # Backfill a stable project root so project_id no longer tracks cwd. + # setdefault: never clobber an operator-chosen TQMEMORY_PROJECT_ROOT. + env.setdefault("TQMEMORY_PROJECT_ROOT", canonical["env"]["TQMEMORY_PROJECT_ROOT"]) existing["env"] = env + existing.setdefault("timeout", canonical["timeout"]) existing["enabled"] = True else: servers[SERVER_NAME] = _build_entry(tqm_path) diff --git a/setup-hermes.sh b/setup-hermes.sh index 26dd05a5c..681b2a0bf 100755 --- a/setup-hermes.sh +++ b/setup-hermes.sh @@ -529,6 +529,25 @@ fi # HERMES_NO_TQMEMORY=1 (or, persistently, memory.tqmemory_autoinstall: false). if [ "${HERMES_NO_TQMEMORY:-0}" != "1" ]; then "$SCRIPT_DIR/venv/bin/python" -m hermes_cli.tqmemory_setup || true + + # Pre-cache the sentence-transformers embedding model so the FIRST + # semantic_search doesn't time out pulling ~600MB from HuggingFace at + # runtime (slow/rate-limited networks blow past the MCP timeout otherwise). + # Best-effort only: any failure here just means the model lazy-loads on + # first use. HF_TOKEN is optional (it only raises the HF rate limit). + # Respect TQMEMORY_EMBEDDING_MODEL if the operator set a custom model; + # otherwise fall back to the package default (paraphrase-multilingual-MiniLM-L12-v2). + echo "🧠 Pre-caching embedding model (best-effort)…" + "$SCRIPT_DIR/venv/bin/python" - <<'PYEOF' 2>/dev/null || echo " (embedding preload skipped — will lazy-load on first use)" +import os +try: + from sentence_transformers import SentenceTransformer + model = os.environ.get("TQMEMORY_EMBEDDING_MODEL", "paraphrase-multilingual-MiniLM-L12-v2") + SentenceTransformer(model) + print(f" ✓ embedding model cached ({model})") +except Exception: + pass +PYEOF fi # Ask if they want to run setup wizard now diff --git a/tests/hermes_cli/test_tqmemory_setup.py b/tests/hermes_cli/test_tqmemory_setup.py index 37fd7714d..7acf6a28e 100644 --- a/tests/hermes_cli/test_tqmemory_setup.py +++ b/tests/hermes_cli/test_tqmemory_setup.py @@ -18,7 +18,8 @@ def _read(p) -> dict: class TestRegisterInConfigFile: - def test_writes_canonical_schema_to_fresh_config(self, tmp_path): + def test_writes_canonical_schema_to_fresh_config(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", "/tmp/hermes-home-test") cfg = tmp_path / "config.yaml" changed = tqm._register_in_config_file(cfg, BIN) assert changed is True @@ -26,7 +27,13 @@ def test_writes_canonical_schema_to_fresh_config(self, tmp_path): # The RC1b regression guard: env must be present AND args == ["serve"]. assert entry["command"] == BIN assert entry["args"] == ["serve"] - assert entry["env"] == {"TQMEMORY_MIGRATE_ON_STARTUP": "1"} + # Stable project root pins project_id (cwd-independent); migrate flag stays. + assert entry["env"] == { + "TQMEMORY_MIGRATE_ON_STARTUP": "1", + "TQMEMORY_PROJECT_ROOT": "/tmp/hermes-home-test", + } + # Generous per-server timeout for the first ~600MB embedding-model load. + assert entry["timeout"] == 600 assert entry["enabled"] is True def test_idempotent_second_call_is_noop(self, tmp_path): @@ -89,21 +96,32 @@ def test_existing_versionless_config_not_stamped(self, tmp_path): assert tqm._register_in_config_file(cfg, BIN) is True assert "_config_version" not in _read(cfg) - def test_repairs_missing_env(self, tmp_path): + def test_repairs_missing_env(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", "/tmp/hermes-home-test") cfg = tmp_path / "config.yaml" cfg.write_text(yaml.safe_dump({ "mcp_servers": {"tqmemory": {"command": BIN, "args": ["serve"], "enabled": True}} }), encoding="utf-8") assert tqm._register_in_config_file(cfg, BIN) is True env = _read(cfg)["mcp_servers"]["tqmemory"]["env"] - assert env == {"TQMEMORY_MIGRATE_ON_STARTUP": "1"} - - def test_fully_correct_entry_is_noop(self, tmp_path): + # Repair back-fills BOTH the migrate flag and the stable project root so + # existing client installs heal on `hermes update`. + assert env == { + "TQMEMORY_MIGRATE_ON_STARTUP": "1", + "TQMEMORY_PROJECT_ROOT": "/tmp/hermes-home-test", + } + + def test_fully_correct_entry_is_noop(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", "/tmp/hermes-home-test") cfg = tmp_path / "config.yaml" cfg.write_text(yaml.safe_dump({ "mcp_servers": {"tqmemory": { "command": BIN, "args": ["serve"], - "env": {"TQMEMORY_MIGRATE_ON_STARTUP": "1"}, "enabled": True, + "env": { + "TQMEMORY_MIGRATE_ON_STARTUP": "1", + "TQMEMORY_PROJECT_ROOT": "/tmp/hermes-home-test", + }, + "timeout": 600, "enabled": True, }} }), encoding="utf-8") assert tqm._register_in_config_file(cfg, BIN) is False