Skip to content

Commit 84c3f96

Browse files
authored
feat: add Ogham memory provider (#9)
Ogham MCP (https://ogham-mcp.dev) -- open-source memory layer using PostgreSQL + pgvector with hybrid vector + BM25 search via Reciprocal Rank Fusion. Architecture: verbatim conversation storage with optional read-time fact extraction. Local Postgres backend, MIT licensed. Setup: pip install ogham-mcp # Requires: local Postgres with pgvector, OGHAM_REPO env var pointing # to the ogham-mcp source, embedding provider configured via env vars. Usage: omb run --dataset longmemeval --split s --memory ogham
1 parent c85ad25 commit 84c3f96

2 files changed

Lines changed: 220 additions & 0 deletions

File tree

src/memory_bench/memory/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from .mem0 import Mem0MemoryProvider
88
from .mem0_cloud import Mem0CloudMemoryProvider
99
from .hybrid_search import HybridSearchMemoryProvider
10+
from .ogham import OghamMemoryProvider
1011
from .supermemory import SupermemoryMemoryProvider
1112

1213
REGISTRY: dict[str, type[MemoryProvider]] = {
@@ -20,6 +21,7 @@
2021
"mastra-om": MastraOMMemoryProvider,
2122
"mem0": Mem0MemoryProvider,
2223
"mem0-cloud": Mem0CloudMemoryProvider,
24+
"ogham": OghamMemoryProvider,
2325
"qdrant": HybridSearchMemoryProvider,
2426
"supermemory": SupermemoryMemoryProvider,
2527
}

src/memory_bench/memory/ogham.py

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
"""Ogham MCP memory provider for Agent Memory Benchmark.
2+
3+
Uses Ogham's hybrid search (vector + BM25 + entity overlap boost)
4+
via the local Python API. For gateway/cloud use, swap to HTTP calls.
5+
"""
6+
7+
import os
8+
import sys
9+
from pathlib import Path
10+
11+
from ..models import Document
12+
from .base import MemoryProvider
13+
14+
# Add Ogham source to path for direct import
15+
_OGHAM_REPO = os.environ.get("OGHAM_REPO", "")
16+
17+
18+
def _ensure_ogham():
19+
"""Lazy-import Ogham modules, adding repo to sys.path if needed."""
20+
if _OGHAM_REPO and _OGHAM_REPO not in sys.path:
21+
sys.path.insert(0, os.path.join(_OGHAM_REPO, "src"))
22+
# Set config from env before importing
23+
os.environ.setdefault("DATABASE_BACKEND", "postgres")
24+
25+
26+
class OghamMemoryProvider(MemoryProvider):
27+
name = "ogham"
28+
description = (
29+
"Ogham MCP: hybrid vector + BM25 search with entity overlap boost. "
30+
"Local Postgres + pgvector. Stores verbatim conversations and retrieves "
31+
"via Reciprocal Rank Fusion with optional read-time fact extraction."
32+
)
33+
kind = "local"
34+
provider = "ogham"
35+
variant = "local"
36+
link = "https://ogham-mcp.dev"
37+
concurrency = 8
38+
39+
def __init__(self, k: int = 20, extract_facts: bool = False):
40+
self.k = k
41+
self._profile_prefix = "amb_"
42+
self._extract_facts_enabled = extract_facts
43+
self._extractor_client = None
44+
45+
def initialize(self) -> None:
46+
_ensure_ogham()
47+
48+
def prepare(
49+
self, store_dir: Path, unit_ids: set[str] | None = None, reset: bool = True
50+
) -> None:
51+
_ensure_ogham()
52+
53+
def cleanup(self) -> None:
54+
from ogham.database import _reset_backend
55+
56+
_reset_backend()
57+
58+
def _profile(self, user_id: str | None) -> str:
59+
return f"{self._profile_prefix}{user_id or 'default'}"
60+
61+
@staticmethod
62+
def _format_content(doc: Document) -> str:
63+
"""Convert document to clean text for embedding and retrieval.
64+
65+
Handles three cases:
66+
1. doc.messages is populated (structured turns)
67+
2. doc.content is a JSON string of messages (LME format)
68+
3. doc.content is plain text
69+
"""
70+
import json
71+
72+
messages = doc.messages
73+
if not messages and doc.content.strip().startswith("["):
74+
try:
75+
messages = json.loads(doc.content)
76+
except (json.JSONDecodeError, TypeError):
77+
pass
78+
79+
if messages and isinstance(messages, list):
80+
parts = []
81+
for msg in messages:
82+
if isinstance(msg, dict):
83+
role = "User" if msg.get("role") == "user" else "Assistant"
84+
content = msg.get("content", "").strip()
85+
if content:
86+
parts.append(f"{role}: {content}")
87+
if parts:
88+
text = "\n".join(parts)
89+
if doc.timestamp:
90+
text = f"[Date: {doc.timestamp}]\n{text}"
91+
return text
92+
93+
return doc.content
94+
95+
def ingest(self, documents: list[Document]) -> None:
96+
from ogham.embeddings import generate_embeddings_batch
97+
from ogham.database import get_backend
98+
99+
backend = get_backend()
100+
101+
texts = [self._format_content(doc) for doc in documents]
102+
if not texts:
103+
return
104+
105+
embeddings = generate_embeddings_batch(texts)
106+
107+
rows = []
108+
for i, (text, emb) in enumerate(zip(texts, embeddings)):
109+
d = documents[i]
110+
profile = self._profile(d.user_id)
111+
tags = []
112+
if d.timestamp:
113+
tags.append(f"date:{d.timestamp}")
114+
rows.append(
115+
{
116+
"content": text,
117+
"embedding": str(emb),
118+
"profile": profile,
119+
"source": "amb",
120+
"tags": tags,
121+
"metadata": {"doc_id": d.id},
122+
}
123+
)
124+
125+
for i in range(0, len(rows), 100):
126+
batch = rows[i : i + 100]
127+
backend.store_memories_batch(batch)
128+
129+
def _get_extractor(self):
130+
"""Lazy-init LLM client for read-time fact extraction."""
131+
if self._extractor_client is None:
132+
provider = os.environ.get("OGHAM_EXTRACTOR_PROVIDER", "gemini")
133+
if provider == "openai":
134+
from openai import OpenAI
135+
136+
self._extractor_client = ("openai", OpenAI(api_key=os.environ.get("OPENAI_API_KEY")))
137+
else:
138+
from google import genai
139+
140+
api_key = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
141+
self._extractor_client = ("gemini", genai.Client(api_key=api_key))
142+
return self._extractor_client
143+
144+
def _extract_facts(self, query: str, raw_content: str) -> str:
145+
"""Extract query-relevant facts from raw conversation context.
146+
147+
Read-time extraction: the extractor sees both the query and the
148+
retrieved context, producing a focused summary for the reader.
149+
"""
150+
prompt = f"""Given a user's question and conversation history, extract the facts most relevant to answering the question.
151+
152+
Question: {query}
153+
154+
Conversation history:
155+
{raw_content}
156+
157+
Extract relevant facts as a concise bulleted list. Preserve specific details: names, numbers, dates, locations. If the history contains no relevant information, respond with "NO RELEVANT FACTS"."""
158+
159+
try:
160+
provider, client = self._get_extractor()
161+
if provider == "openai":
162+
model = os.environ.get("OGHAM_EXTRACTOR_MODEL", "gpt-4.1-mini")
163+
response = client.chat.completions.create(
164+
model=model,
165+
messages=[{"role": "user", "content": prompt}],
166+
)
167+
return response.choices[0].message.content or raw_content
168+
else:
169+
model = os.environ.get("OGHAM_EXTRACTOR_MODEL", "gemini-2.5-flash")
170+
response = client.models.generate_content(
171+
model=model,
172+
contents=prompt,
173+
)
174+
return response.text or raw_content
175+
except Exception:
176+
return raw_content
177+
178+
def retrieve(
179+
self,
180+
query: str,
181+
k: int = 10,
182+
user_id: str | None = None,
183+
query_timestamp: str | None = None,
184+
) -> tuple[list[Document], dict | None]:
185+
from ogham.service import search_memories_enriched
186+
187+
profile = self._profile(user_id)
188+
results = search_memories_enriched(
189+
query=query,
190+
profile=profile,
191+
limit=k or self.k,
192+
)
193+
194+
if not results:
195+
return [], None
196+
197+
if self._extract_facts_enabled:
198+
raw_bundle_parts = []
199+
for i, r in enumerate(results):
200+
content = r.get("content", "")
201+
raw_bundle_parts.append(f"## Memory {i + 1}\n{content}")
202+
raw_bundle = "\n\n".join(raw_bundle_parts)
203+
204+
facts = self._extract_facts(query, raw_bundle)
205+
return [Document(id="ogham-extracted-facts", content=facts)], None
206+
207+
docs = []
208+
for r in results:
209+
content_parts = [r.get("content", "")]
210+
if r.get("relevance") is not None:
211+
content_parts.append(f"relevance: {r['relevance']:.3f}")
212+
docs.append(
213+
Document(
214+
id=str(r.get("id", "")),
215+
content="\n".join(content_parts),
216+
)
217+
)
218+
return docs, None

0 commit comments

Comments
 (0)