|
| 1 | +"""Smoke test for the Hermes Agent ↔ Hindsight integration in embedded mode. |
| 2 | +
|
| 3 | +Drives the `HindsightMemoryProvider` plugin shipped with Hermes Agent against |
| 4 | +a locally-spawned Hindsight Embedded daemon, exercising the full retain → |
| 5 | +recall roundtrip end-to-end. |
| 6 | +
|
| 7 | +Run on demand via the installed Hermes venv (it already has every dep — |
| 8 | +hermes-agent's plugin code, hindsight_embed, hindsight_client, pytest): |
| 9 | +
|
| 10 | + HINDSIGHT_LLM_API_KEY=... \ |
| 11 | + ~/.hermes/hermes-agent/venv/bin/python -m pytest \ |
| 12 | + hindsight-integration-tests/tests/test_hermes_embedded_smoke.py -v -s |
| 13 | +
|
| 14 | +Skipped automatically if `HINDSIGHT_LLM_API_KEY` (or `OPENAI_API_KEY`) is not |
| 15 | +set, since embedded mode needs an LLM to extract facts during retain. |
| 16 | +
|
| 17 | +Defaults: openai / gpt-4o-mini. Override via `HINDSIGHT_LLM_PROVIDER` and |
| 18 | +`HINDSIGHT_LLM_MODEL`. |
| 19 | +""" |
| 20 | + |
| 21 | +from __future__ import annotations |
| 22 | + |
| 23 | +import json |
| 24 | +import os |
| 25 | +import sys |
| 26 | +import tempfile |
| 27 | +import time |
| 28 | +import uuid |
| 29 | +from pathlib import Path |
| 30 | + |
| 31 | +import pytest |
| 32 | + |
| 33 | + |
| 34 | +HERMES_VENV_SITE = Path.home() / ".hermes" / "hermes-agent" |
| 35 | +LLM_API_KEY = os.environ.get("HINDSIGHT_LLM_API_KEY") or os.environ.get("OPENAI_API_KEY", "") |
| 36 | +LLM_PROVIDER = os.environ.get("HINDSIGHT_LLM_PROVIDER", "openai") |
| 37 | +LLM_MODEL = os.environ.get("HINDSIGHT_LLM_MODEL", "gpt-4o-mini") |
| 38 | + |
| 39 | +pytestmark = [ |
| 40 | + pytest.mark.skipif( |
| 41 | + not LLM_API_KEY, |
| 42 | + reason="HINDSIGHT_LLM_API_KEY (or OPENAI_API_KEY) not set", |
| 43 | + ), |
| 44 | + pytest.mark.skipif( |
| 45 | + not (HERMES_VENV_SITE / "plugins" / "memory" / "hindsight" / "__init__.py").exists(), |
| 46 | + reason=f"Hermes plugin not found at {HERMES_VENV_SITE} — run `hermes update` first", |
| 47 | + ), |
| 48 | +] |
| 49 | + |
| 50 | + |
| 51 | +@pytest.fixture(scope="module") |
| 52 | +def hermes_path(): |
| 53 | + """Make the installed hermes-agent importable in this process.""" |
| 54 | + if str(HERMES_VENV_SITE) not in sys.path: |
| 55 | + sys.path.insert(0, str(HERMES_VENV_SITE)) |
| 56 | + |
| 57 | + |
| 58 | +@pytest.fixture |
| 59 | +def embedded_provider(tmp_path, monkeypatch, hermes_path): |
| 60 | + """Spin up a HindsightMemoryProvider in local_embedded mode. |
| 61 | +
|
| 62 | + Uses a temp HERMES_HOME so we never touch the user's real ~/.hermes. |
| 63 | + The Hindsight daemon stores its data under that temp dir too. |
| 64 | + """ |
| 65 | + profile_name = f"hermes-smoke-{uuid.uuid4().hex[:8]}" |
| 66 | + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) |
| 67 | + monkeypatch.setenv("HINDSIGHT_LLM_API_KEY", LLM_API_KEY) |
| 68 | + |
| 69 | + config_dir = tmp_path / "hindsight" |
| 70 | + config_dir.mkdir(parents=True, exist_ok=True) |
| 71 | + config = { |
| 72 | + "mode": "local_embedded", |
| 73 | + "profile": profile_name, |
| 74 | + "llm_provider": LLM_PROVIDER, |
| 75 | + "llm_model": LLM_MODEL, |
| 76 | + "llm_api_key": LLM_API_KEY, |
| 77 | + "bank_id": f"smoke-{uuid.uuid4().hex[:8]}", |
| 78 | + "recall_budget": "low", |
| 79 | + "auto_retain": True, |
| 80 | + "auto_recall": True, |
| 81 | + "retain_async": False, |
| 82 | + "retain_every_n_turns": 1, |
| 83 | + } |
| 84 | + (config_dir / "config.json").write_text(json.dumps(config, indent=2)) |
| 85 | + |
| 86 | + from plugins.memory.hindsight import HindsightMemoryProvider |
| 87 | + |
| 88 | + provider = HindsightMemoryProvider() |
| 89 | + provider.initialize(session_id=f"smoke-{uuid.uuid4().hex[:8]}", platform="cli") |
| 90 | + |
| 91 | + # The plugin starts the daemon on a background thread. Force a synchronous |
| 92 | + # boot here so the test isn't racing it. First-run setup can take ~2 min |
| 93 | + # because the embedded daemon installs its own deps into a profile venv. |
| 94 | + deadline = time.time() + 240.0 |
| 95 | + last_err: Exception | None = None |
| 96 | + while time.time() < deadline: |
| 97 | + try: |
| 98 | + client = provider._get_client() |
| 99 | + client._ensure_started() |
| 100 | + if client.is_running: |
| 101 | + break |
| 102 | + except Exception as exc: |
| 103 | + last_err = exc |
| 104 | + time.sleep(2.0) |
| 105 | + else: |
| 106 | + provider.shutdown() |
| 107 | + pytest.fail(f"Hindsight embedded daemon did not start within 240s (last error: {last_err!r})") |
| 108 | + |
| 109 | + yield provider |
| 110 | + |
| 111 | + try: |
| 112 | + provider.shutdown() |
| 113 | + except Exception: |
| 114 | + pass |
| 115 | + try: |
| 116 | + from hindsight_embed.daemon_embed_manager import DaemonEmbedManager |
| 117 | + |
| 118 | + DaemonEmbedManager().stop(profile_name) |
| 119 | + except Exception: |
| 120 | + pass |
| 121 | + |
| 122 | + |
| 123 | +def test_retain_then_recall_roundtrip(embedded_provider): |
| 124 | + """Store a memorable fact, then verify recall finds it. |
| 125 | +
|
| 126 | + This exercises the full Hermes plugin path: sync_turn -> aretain_batch -> |
| 127 | + daemon -> LLM fact extraction -> indexing -> recall -> prefetch. |
| 128 | + """ |
| 129 | + fact = "The user's favorite programming language is Rust" |
| 130 | + embedded_provider.sync_turn( |
| 131 | + user_content="What's my favorite programming language?", |
| 132 | + assistant_content=fact, |
| 133 | + ) |
| 134 | + if embedded_provider._sync_thread: |
| 135 | + embedded_provider._sync_thread.join(timeout=60.0) |
| 136 | + |
| 137 | + deadline = time.time() + 60.0 |
| 138 | + last_result = "" |
| 139 | + while time.time() < deadline: |
| 140 | + embedded_provider._prefetch_result = "" |
| 141 | + embedded_provider.queue_prefetch("favorite programming language") |
| 142 | + if embedded_provider._prefetch_thread: |
| 143 | + embedded_provider._prefetch_thread.join(timeout=30.0) |
| 144 | + last_result = embedded_provider._prefetch_result |
| 145 | + if "rust" in last_result.lower(): |
| 146 | + return |
| 147 | + time.sleep(2.0) |
| 148 | + |
| 149 | + pytest.fail( |
| 150 | + f"recall did not surface the stored fact within 60s. " |
| 151 | + f"Last prefetch result: {last_result!r}" |
| 152 | + ) |
0 commit comments