Skip to content

Commit b4c4224

Browse files
authored
fix(memory): harden TQMemory for client installs — stable project_id, index_paths guard, timeout, model preload, upgrade reliability (#485)
1 parent 7f86041 commit b4c4224

4 files changed

Lines changed: 90 additions & 10 deletions

File tree

agent/prompt_builder.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,12 @@ def _strip_yaml_frontmatter(content: str) -> str:
353353
"only exposes serve / migrate / secret-set / prune-orphans / doctor, so do "
354354
"NOT shell out via subprocess to 'remember' something. "
355355
"(3) The config key is the top-level `mcp_servers`, never `mcp.servers`.\n"
356+
"Scope `index_paths` to FOCUSED Markdown roots (e.g. ./docs or one project "
357+
"subdir), NEVER to huge or system trees like /root, /home, $HOME, /tmp, or a "
358+
"whole repo — indexing thousands of files bloats the vector index and crashes "
359+
"the MCP (observed on prod: ~10k chunks triggered a LanceDB re-sync crash and "
360+
"timeout). Index a few hundred files at most; for durable knowledge prefer "
361+
"`mcp_tqmemory_remember_note` over wholesale indexing.\n"
356362
"If the `mcp_tqmemory_*` tools are absent in a session, memory is simply "
357363
"unavailable there — say so and re-enable it with `hermes mcp add tqmemory "
358364
"...`; do NOT fake persistence by writing files or editing config by hand."

hermes_cli/tqmemory_setup.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,23 @@ def ensure_turbo_memory_installed(quiet: bool = False) -> Optional[str]:
157157
existing = resolve_binary()
158158
if existing:
159159
# Best-effort upgrade; never fail the caller on a network hiccup.
160-
_run([uv, "tool", "upgrade", BINARY], _UPGRADE_TIMEOUT)
160+
#
161+
# rev-pin trap: if a PRIOR install pinned the receipt to a concrete git
162+
# rev (observed on prod: rev=v0.17.0), `uv tool upgrade` re-resolves to
163+
# that SAME rev and never jumps to a newer commit — the install stays
164+
# silently stale. REPO_SPEC is intentionally unpinned (no @rev) so a
165+
# reinstall floats to the branch HEAD. We try the cheap upgrade first
166+
# (fast on the common, already-latest case) and only fall back to a
167+
# `--reinstall` against the unpinned spec when the upgrade reported no
168+
# change ("Nothing to upgrade" / non-zero) — that re-pins the receipt to
169+
# the unpinned spec and breaks the rev-pin trap without slowing the
170+
# normal path.
171+
up = _run([uv, "tool", "upgrade", BINARY], _UPGRADE_TIMEOUT)
172+
out = (up.stdout or "") + (up.stderr or "")
173+
upgrade_had_effect = up.returncode == 0 and "Nothing to upgrade" not in out
174+
if not upgrade_had_effect:
175+
# Re-resolve from the unpinned REPO_SPEC to escape a rev-pinned receipt.
176+
_run([uv, "tool", "install", "--reinstall", REPO_SPEC], _INSTALL_TIMEOUT)
161177
return resolve_binary() or existing
162178

163179
_emit(quiet, "🧠 Installing Turbo-Quant Memory MCP (one-time, may take a minute)…")
@@ -177,10 +193,21 @@ def ensure_turbo_memory_installed(quiet: bool = False) -> Optional[str]:
177193
# ---------------------------------------------------------------------------
178194

179195
def _build_entry(tqm_path: str) -> dict:
196+
# Pin TQMEMORY_PROJECT_ROOT to a STABLE root (HERMES_HOME, fallback ~/.hermes)
197+
# so turbo_quant_memory derives a single, cwd-independent project_id. Without
198+
# it the project_id tracks the process cwd and memory fragments into multiple
199+
# buckets (observed on prod: /root vs /root/.hermes).
200+
hermes_home = os.path.expanduser(os.environ.get("HERMES_HOME", "~/.hermes"))
201+
env = dict(_SERVER_ENV)
202+
env.setdefault("TQMEMORY_PROJECT_ROOT", hermes_home)
180203
return {
181204
"command": tqm_path,
182205
"args": ["serve"],
183-
"env": dict(_SERVER_ENV),
206+
"env": env,
207+
# First semantic_search loads a ~600MB embedding model; re-syncs can be
208+
# slow. Give this server a generous per-call timeout (read per-server by
209+
# tools/mcp_tool.py) without touching the global MCP default.
210+
"timeout": 600,
184211
"enabled": True,
185212
}
186213

@@ -219,15 +246,21 @@ def _register_in_config_file(config_path: Path, tqm_path: str) -> bool:
219246
if isinstance(existing, dict):
220247
# Already registered. Leave a user-disabled entry (enabled: false)
221248
# untouched so we respect intent. Otherwise repair anything that drifted:
222-
# a stale absolute command path OR a missing migrate-on-startup env.
249+
# a stale absolute command path, a missing migrate-on-startup env, a
250+
# missing stable project root, or a missing per-server timeout. Repairing
251+
# the project root on EXISTING installs (not just fresh ones) is what lets
252+
# `hermes update` heal client installs whose memory fragmented by cwd.
223253
if existing.get("enabled") is False:
224254
return False
255+
canonical = _build_entry(tqm_path)
225256
env = existing.get("env")
226257
already_correct = (
227258
existing.get("command") == tqm_path
228259
and existing.get("args") == ["serve"]
229260
and isinstance(env, dict)
230261
and env.get("TQMEMORY_MIGRATE_ON_STARTUP") == "1"
262+
and env.get("TQMEMORY_PROJECT_ROOT") == canonical["env"]["TQMEMORY_PROJECT_ROOT"]
263+
and existing.get("timeout") == canonical["timeout"]
231264
)
232265
if already_correct:
233266
return False
@@ -236,7 +269,11 @@ def _register_in_config_file(config_path: Path, tqm_path: str) -> bool:
236269
if not isinstance(env, dict):
237270
env = {}
238271
env.setdefault("TQMEMORY_MIGRATE_ON_STARTUP", "1")
272+
# Backfill a stable project root so project_id no longer tracks cwd.
273+
# setdefault: never clobber an operator-chosen TQMEMORY_PROJECT_ROOT.
274+
env.setdefault("TQMEMORY_PROJECT_ROOT", canonical["env"]["TQMEMORY_PROJECT_ROOT"])
239275
existing["env"] = env
276+
existing.setdefault("timeout", canonical["timeout"])
240277
existing["enabled"] = True
241278
else:
242279
servers[SERVER_NAME] = _build_entry(tqm_path)

setup-hermes.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,25 @@ fi
529529
# HERMES_NO_TQMEMORY=1 (or, persistently, memory.tqmemory_autoinstall: false).
530530
if [ "${HERMES_NO_TQMEMORY:-0}" != "1" ]; then
531531
"$SCRIPT_DIR/venv/bin/python" -m hermes_cli.tqmemory_setup || true
532+
533+
# Pre-cache the sentence-transformers embedding model so the FIRST
534+
# semantic_search doesn't time out pulling ~600MB from HuggingFace at
535+
# runtime (slow/rate-limited networks blow past the MCP timeout otherwise).
536+
# Best-effort only: any failure here just means the model lazy-loads on
537+
# first use. HF_TOKEN is optional (it only raises the HF rate limit).
538+
# Respect TQMEMORY_EMBEDDING_MODEL if the operator set a custom model;
539+
# otherwise fall back to the package default (paraphrase-multilingual-MiniLM-L12-v2).
540+
echo "🧠 Pre-caching embedding model (best-effort)…"
541+
"$SCRIPT_DIR/venv/bin/python" - <<'PYEOF' 2>/dev/null || echo " (embedding preload skipped — will lazy-load on first use)"
542+
import os
543+
try:
544+
from sentence_transformers import SentenceTransformer
545+
model = os.environ.get("TQMEMORY_EMBEDDING_MODEL", "paraphrase-multilingual-MiniLM-L12-v2")
546+
SentenceTransformer(model)
547+
print(f" ✓ embedding model cached ({model})")
548+
except Exception:
549+
pass
550+
PYEOF
532551
fi
533552

534553
# Ask if they want to run setup wizard now

tests/hermes_cli/test_tqmemory_setup.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,22 @@ def _read(p) -> dict:
1818

1919

2020
class TestRegisterInConfigFile:
21-
def test_writes_canonical_schema_to_fresh_config(self, tmp_path):
21+
def test_writes_canonical_schema_to_fresh_config(self, tmp_path, monkeypatch):
22+
monkeypatch.setenv("HERMES_HOME", "/tmp/hermes-home-test")
2223
cfg = tmp_path / "config.yaml"
2324
changed = tqm._register_in_config_file(cfg, BIN)
2425
assert changed is True
2526
entry = _read(cfg)["mcp_servers"]["tqmemory"]
2627
# The RC1b regression guard: env must be present AND args == ["serve"].
2728
assert entry["command"] == BIN
2829
assert entry["args"] == ["serve"]
29-
assert entry["env"] == {"TQMEMORY_MIGRATE_ON_STARTUP": "1"}
30+
# Stable project root pins project_id (cwd-independent); migrate flag stays.
31+
assert entry["env"] == {
32+
"TQMEMORY_MIGRATE_ON_STARTUP": "1",
33+
"TQMEMORY_PROJECT_ROOT": "/tmp/hermes-home-test",
34+
}
35+
# Generous per-server timeout for the first ~600MB embedding-model load.
36+
assert entry["timeout"] == 600
3037
assert entry["enabled"] is True
3138

3239
def test_idempotent_second_call_is_noop(self, tmp_path):
@@ -89,21 +96,32 @@ def test_existing_versionless_config_not_stamped(self, tmp_path):
8996
assert tqm._register_in_config_file(cfg, BIN) is True
9097
assert "_config_version" not in _read(cfg)
9198

92-
def test_repairs_missing_env(self, tmp_path):
99+
def test_repairs_missing_env(self, tmp_path, monkeypatch):
100+
monkeypatch.setenv("HERMES_HOME", "/tmp/hermes-home-test")
93101
cfg = tmp_path / "config.yaml"
94102
cfg.write_text(yaml.safe_dump({
95103
"mcp_servers": {"tqmemory": {"command": BIN, "args": ["serve"], "enabled": True}}
96104
}), encoding="utf-8")
97105
assert tqm._register_in_config_file(cfg, BIN) is True
98106
env = _read(cfg)["mcp_servers"]["tqmemory"]["env"]
99-
assert env == {"TQMEMORY_MIGRATE_ON_STARTUP": "1"}
100-
101-
def test_fully_correct_entry_is_noop(self, tmp_path):
107+
# Repair back-fills BOTH the migrate flag and the stable project root so
108+
# existing client installs heal on `hermes update`.
109+
assert env == {
110+
"TQMEMORY_MIGRATE_ON_STARTUP": "1",
111+
"TQMEMORY_PROJECT_ROOT": "/tmp/hermes-home-test",
112+
}
113+
114+
def test_fully_correct_entry_is_noop(self, tmp_path, monkeypatch):
115+
monkeypatch.setenv("HERMES_HOME", "/tmp/hermes-home-test")
102116
cfg = tmp_path / "config.yaml"
103117
cfg.write_text(yaml.safe_dump({
104118
"mcp_servers": {"tqmemory": {
105119
"command": BIN, "args": ["serve"],
106-
"env": {"TQMEMORY_MIGRATE_ON_STARTUP": "1"}, "enabled": True,
120+
"env": {
121+
"TQMEMORY_MIGRATE_ON_STARTUP": "1",
122+
"TQMEMORY_PROJECT_ROOT": "/tmp/hermes-home-test",
123+
},
124+
"timeout": 600, "enabled": True,
107125
}}
108126
}), encoding="utf-8")
109127
assert tqm._register_in_config_file(cfg, BIN) is False

0 commit comments

Comments
 (0)