Lexus2016 · Lexus2016 · Jun 23, 2026 · Jun 23, 2026
diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
@@ -353,6 +353,12 @@ def _strip_yaml_frontmatter(content: str) -> str:
     "only exposes serve / migrate / secret-set / prune-orphans / doctor, so do "
     "NOT shell out via subprocess to 'remember' something. "
     "(3) The config key is the top-level `mcp_servers`, never `mcp.servers`.\n"
+    "Scope `index_paths` to FOCUSED Markdown roots (e.g. ./docs or one project "
+    "subdir), NEVER to huge or system trees like /root, /home, $HOME, /tmp, or a "
+    "whole repo — indexing thousands of files bloats the vector index and crashes "
+    "the MCP (observed on prod: ~10k chunks triggered a LanceDB re-sync crash and "
+    "timeout). Index a few hundred files at most; for durable knowledge prefer "
+    "`mcp_tqmemory_remember_note` over wholesale indexing.\n"
     "If the `mcp_tqmemory_*` tools are absent in a session, memory is simply "
     "unavailable there — say so and re-enable it with `hermes mcp add tqmemory "
     "...`; do NOT fake persistence by writing files or editing config by hand."

diff --git a/hermes_cli/tqmemory_setup.py b/hermes_cli/tqmemory_setup.py
@@ -157,7 +157,23 @@ def ensure_turbo_memory_installed(quiet: bool = False) -> Optional[str]:
     existing = resolve_binary()
     if existing:
         # Best-effort upgrade; never fail the caller on a network hiccup.
-        _run([uv, "tool", "upgrade", BINARY], _UPGRADE_TIMEOUT)
+        #
+        # rev-pin trap: if a PRIOR install pinned the receipt to a concrete git
+        # rev (observed on prod: rev=v0.17.0), `uv tool upgrade` re-resolves to
+        # that SAME rev and never jumps to a newer commit — the install stays
+        # silently stale. REPO_SPEC is intentionally unpinned (no @rev) so a
+        # reinstall floats to the branch HEAD. We try the cheap upgrade first
+        # (fast on the common, already-latest case) and only fall back to a
+        # `--reinstall` against the unpinned spec when the upgrade reported no
+        # change ("Nothing to upgrade" / non-zero) — that re-pins the receipt to
+        # the unpinned spec and breaks the rev-pin trap without slowing the
+        # normal path.
+        up = _run([uv, "tool", "upgrade", BINARY], _UPGRADE_TIMEOUT)
+        out = (up.stdout or "") + (up.stderr or "")
+        upgrade_had_effect = up.returncode == 0 and "Nothing to upgrade" not in out
+        if not upgrade_had_effect:
+            # Re-resolve from the unpinned REPO_SPEC to escape a rev-pinned receipt.
+            _run([uv, "tool", "install", "--reinstall", REPO_SPEC], _INSTALL_TIMEOUT)
         return resolve_binary() or existing
 
     _emit(quiet, "🧠 Installing Turbo-Quant Memory MCP (one-time, may take a minute)…")
@@ -177,10 +193,21 @@ def ensure_turbo_memory_installed(quiet: bool = False) -> Optional[str]:
 # ---------------------------------------------------------------------------
 
 def _build_entry(tqm_path: str) -> dict:
+    # Pin TQMEMORY_PROJECT_ROOT to a STABLE root (HERMES_HOME, fallback ~/.hermes)
+    # so turbo_quant_memory derives a single, cwd-independent project_id. Without
+    # it the project_id tracks the process cwd and memory fragments into multiple
+    # buckets (observed on prod: /root vs /root/.hermes).
+    hermes_home = os.path.expanduser(os.environ.get("HERMES_HOME", "~/.hermes"))
+    env = dict(_SERVER_ENV)
+    env.setdefault("TQMEMORY_PROJECT_ROOT", hermes_home)
     return {
         "command": tqm_path,
         "args": ["serve"],
-        "env": dict(_SERVER_ENV),
+        "env": env,
+        # First semantic_search loads a ~600MB embedding model; re-syncs can be
+        # slow. Give this server a generous per-call timeout (read per-server by
+        # tools/mcp_tool.py) without touching the global MCP default.
+        "timeout": 600,
         "enabled": True,
     }
 
@@ -219,15 +246,21 @@ def _register_in_config_file(config_path: Path, tqm_path: str) -> bool:
     if isinstance(existing, dict):
         # Already registered. Leave a user-disabled entry (enabled: false)
         # untouched so we respect intent. Otherwise repair anything that drifted:
-        # a stale absolute command path OR a missing migrate-on-startup env.
+        # a stale absolute command path, a missing migrate-on-startup env, a
+        # missing stable project root, or a missing per-server timeout. Repairing
+        # the project root on EXISTING installs (not just fresh ones) is what lets
+        # `hermes update` heal client installs whose memory fragmented by cwd.
         if existing.get("enabled") is False:
             return False
+        canonical = _build_entry(tqm_path)
         env = existing.get("env")
         already_correct = (
             existing.get("command") == tqm_path
             and existing.get("args") == ["serve"]
             and isinstance(env, dict)
             and env.get("TQMEMORY_MIGRATE_ON_STARTUP") == "1"
+            and env.get("TQMEMORY_PROJECT_ROOT") == canonical["env"]["TQMEMORY_PROJECT_ROOT"]
+            and existing.get("timeout") == canonical["timeout"]
         )
         if already_correct:
             return False
@@ -236,7 +269,11 @@ def _register_in_config_file(config_path: Path, tqm_path: str) -> bool:
         if not isinstance(env, dict):
             env = {}
         env.setdefault("TQMEMORY_MIGRATE_ON_STARTUP", "1")
+        # Backfill a stable project root so project_id no longer tracks cwd.
+        # setdefault: never clobber an operator-chosen TQMEMORY_PROJECT_ROOT.
+        env.setdefault("TQMEMORY_PROJECT_ROOT", canonical["env"]["TQMEMORY_PROJECT_ROOT"])
         existing["env"] = env
+        existing.setdefault("timeout", canonical["timeout"])
         existing["enabled"] = True
     else:
         servers[SERVER_NAME] = _build_entry(tqm_path)

@@ -529,6 +529,25 @@ fi
 # HERMES_NO_TQMEMORY=1 (or, persistently, memory.tqmemory_autoinstall: false).
 if [ "${HERMES_NO_TQMEMORY:-0}" != "1" ]; then
     "$SCRIPT_DIR/venv/bin/python" -m hermes_cli.tqmemory_setup || true
+
+    # Pre-cache the sentence-transformers embedding model so the FIRST
+    # semantic_search doesn't time out pulling ~600MB from HuggingFace at
+    # runtime (slow/rate-limited networks blow past the MCP timeout otherwise).
+    # Best-effort only: any failure here just means the model lazy-loads on
+    # first use. HF_TOKEN is optional (it only raises the HF rate limit).
+    # Respect TQMEMORY_EMBEDDING_MODEL if the operator set a custom model;
+    # otherwise fall back to the package default (paraphrase-multilingual-MiniLM-L12-v2).
+    echo "🧠 Pre-caching embedding model (best-effort)…"
+    "$SCRIPT_DIR/venv/bin/python" - <<'PYEOF' 2>/dev/null || echo "  (embedding preload skipped — will lazy-load on first use)"
+import os
+try:
+    from sentence_transformers import SentenceTransformer
+    model = os.environ.get("TQMEMORY_EMBEDDING_MODEL", "paraphrase-multilingual-MiniLM-L12-v2")
+    SentenceTransformer(model)
+    print(f"  ✓ embedding model cached ({model})")
+except Exception:
+    pass
+PYEOF
 fi
 
 # Ask if they want to run setup wizard now

diff --git a/tests/hermes_cli/test_tqmemory_setup.py b/tests/hermes_cli/test_tqmemory_setup.py
@@ -18,15 +18,22 @@ def _read(p) -> dict:
 
 
 class TestRegisterInConfigFile:
-    def test_writes_canonical_schema_to_fresh_config(self, tmp_path):
+    def test_writes_canonical_schema_to_fresh_config(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", "/tmp/hermes-home-test")
         cfg = tmp_path / "config.yaml"
         changed = tqm._register_in_config_file(cfg, BIN)
         assert changed is True
         entry = _read(cfg)["mcp_servers"]["tqmemory"]
         # The RC1b regression guard: env must be present AND args == ["serve"].
         assert entry["command"] == BIN
         assert entry["args"] == ["serve"]
-        assert entry["env"] == {"TQMEMORY_MIGRATE_ON_STARTUP": "1"}
+        # Stable project root pins project_id (cwd-independent); migrate flag stays.
+        assert entry["env"] == {
+            "TQMEMORY_MIGRATE_ON_STARTUP": "1",
+            "TQMEMORY_PROJECT_ROOT": "/tmp/hermes-home-test",
+        }
+        # Generous per-server timeout for the first ~600MB embedding-model load.
+        assert entry["timeout"] == 600
         assert entry["enabled"] is True
 
     def test_idempotent_second_call_is_noop(self, tmp_path):
@@ -89,21 +96,32 @@ def test_existing_versionless_config_not_stamped(self, tmp_path):
         assert tqm._register_in_config_file(cfg, BIN) is True
         assert "_config_version" not in _read(cfg)
 
-    def test_repairs_missing_env(self, tmp_path):
+    def test_repairs_missing_env(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", "/tmp/hermes-home-test")
         cfg = tmp_path / "config.yaml"
         cfg.write_text(yaml.safe_dump({
             "mcp_servers": {"tqmemory": {"command": BIN, "args": ["serve"], "enabled": True}}
         }), encoding="utf-8")
         assert tqm._register_in_config_file(cfg, BIN) is True
         env = _read(cfg)["mcp_servers"]["tqmemory"]["env"]
-        assert env == {"TQMEMORY_MIGRATE_ON_STARTUP": "1"}
-
-    def test_fully_correct_entry_is_noop(self, tmp_path):
+        # Repair back-fills BOTH the migrate flag and the stable project root so
+        # existing client installs heal on `hermes update`.
+        assert env == {
+            "TQMEMORY_MIGRATE_ON_STARTUP": "1",
+            "TQMEMORY_PROJECT_ROOT": "/tmp/hermes-home-test",
+        }
+
+    def test_fully_correct_entry_is_noop(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", "/tmp/hermes-home-test")
         cfg = tmp_path / "config.yaml"
         cfg.write_text(yaml.safe_dump({
             "mcp_servers": {"tqmemory": {
                 "command": BIN, "args": ["serve"],
-                "env": {"TQMEMORY_MIGRATE_ON_STARTUP": "1"}, "enabled": True,
+                "env": {
+                    "TQMEMORY_MIGRATE_ON_STARTUP": "1",
+                    "TQMEMORY_PROJECT_ROOT": "/tmp/hermes-home-test",
+                },
+                "timeout": 600, "enabled": True,
             }}
         }), encoding="utf-8")
         assert tqm._register_in_config_file(cfg, BIN) is False