Skip to content
This repository was archived by the owner on Jun 3, 2026. It is now read-only.

Commit e006a29

Browse files
committed
Add memory lifecycle metadata support
1 parent d1090f2 commit e006a29

3 files changed

Lines changed: 304 additions & 14 deletions

File tree

src/storage/local.py

Lines changed: 147 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,22 @@
1313
import sqlite3
1414
import uuid
1515
from pathlib import Path
16-
from typing import Any, Dict, List, Optional, Sequence, Tuple
16+
from typing import Any, Dict, List, Optional, Sequence
1717

1818
from src.config import settings
1919
from src.storage.base import BaseVectorStore, IndexStats, SearchResult
20+
from src.storage.memory_lifecycle import (
21+
CONTENT_HASH_KEY,
22+
FORGET_REASON_KEY,
23+
FORGOTTEN_AT_KEY,
24+
IS_CURRENT_KEY,
25+
PARENT_MEMORY_ID_KEY,
26+
VERSION_KEY,
27+
build_lifecycle_metadata,
28+
compute_memory_hash,
29+
is_retrievable_memory,
30+
utc_now_iso,
31+
)
2032
from src.utils.exceptions import VectorStoreValidationError
2133

2234

@@ -101,16 +113,26 @@ def add(
101113

102114
ids = ids or [str(uuid.uuid4()) for _ in texts]
103115
metadata = metadata or [{} for _ in texts]
104-
rows = [
105-
(
106-
self._namespace,
107-
vec_id,
108-
text,
109-
json.dumps([float(v) for v in embedding]),
110-
json.dumps(meta or {}),
116+
output_ids: List[str] = []
117+
rows = []
118+
for text, embedding, vec_id, meta in zip(texts, embeddings, ids, metadata):
119+
lifecycle_meta = build_lifecycle_metadata(text, meta)
120+
existing_id = self._find_current_by_hash(lifecycle_meta[CONTENT_HASH_KEY])
121+
if existing_id:
122+
output_ids.append(existing_id)
123+
continue
124+
output_ids.append(vec_id)
125+
rows.append(
126+
(
127+
self._namespace,
128+
vec_id,
129+
text,
130+
json.dumps([float(v) for v in embedding]),
131+
json.dumps(lifecycle_meta),
132+
)
111133
)
112-
for text, embedding, vec_id, meta in zip(texts, embeddings, ids, metadata)
113-
]
134+
if not rows:
135+
return output_ids
114136
self._conn.executemany(
115137
"""
116138
INSERT INTO xmem_vectors(namespace, id, content, embedding, metadata)
@@ -124,7 +146,7 @@ def add(
124146
rows,
125147
)
126148
self._conn.commit()
127-
return ids
149+
return output_ids
128150

129151
def search(
130152
self,
@@ -145,7 +167,7 @@ def search(
145167
results: List[SearchResult] = []
146168
for row in rows:
147169
meta = json.loads(row["metadata"] or "{}")
148-
if not _metadata_matches(meta, filters):
170+
if not is_retrievable_memory(meta) or not _metadata_matches(meta, filters):
149171
continue
150172
embedding = json.loads(row["embedding"])
151173
results.append(
@@ -175,6 +197,8 @@ def update(
175197
return False
176198
current_meta = json.loads(row["metadata"] or "{}")
177199
current_meta.update(metadata or {})
200+
new_text = text if text is not None else row["content"]
201+
current_meta[CONTENT_HASH_KEY] = compute_memory_hash(new_text)
178202
new_embedding = embedding if embedding is not None else json.loads(row["embedding"])
179203
if len(new_embedding) != self._dimension:
180204
raise VectorStoreValidationError(
@@ -188,7 +212,7 @@ def update(
188212
WHERE namespace = ? AND id = ?
189213
""",
190214
(
191-
text if text is not None else row["content"],
215+
new_text,
192216
json.dumps([float(v) for v in new_embedding]),
193217
json.dumps(current_meta),
194218
self._namespace,
@@ -198,6 +222,103 @@ def update(
198222
self._conn.commit()
199223
return True
200224

225+
def add_version(
226+
self,
227+
parent_id: str,
228+
text: str,
229+
embedding: List[float],
230+
id: Optional[str] = None,
231+
metadata: Optional[Dict[str, Any]] = None,
232+
) -> Optional[str]:
233+
"""Create a new current memory version and keep the parent as history."""
234+
235+
parent = self.get([parent_id])
236+
if not parent:
237+
return None
238+
if len(embedding) != self._dimension:
239+
raise VectorStoreValidationError(
240+
f"Embedding dimension {len(embedding)} doesn't match {self._dimension}",
241+
operation="add_version",
242+
)
243+
244+
parent_meta = dict(parent[0]["metadata"] or {})
245+
root_parent_id = parent_meta.get(PARENT_MEMORY_ID_KEY) or parent_id
246+
next_version = int(parent_meta.get(VERSION_KEY) or 1) + 1
247+
new_id = id or str(uuid.uuid4())
248+
new_meta = build_lifecycle_metadata(
249+
text,
250+
metadata,
251+
parent_memory_id=root_parent_id,
252+
version=next_version,
253+
is_current=True,
254+
)
255+
existing_id = self._find_current_by_hash(new_meta[CONTENT_HASH_KEY])
256+
if existing_id:
257+
return existing_id
258+
259+
parent_meta[IS_CURRENT_KEY] = False
260+
with self._conn:
261+
self._conn.execute(
262+
"""
263+
UPDATE xmem_vectors
264+
SET metadata = ?, updated_at = CURRENT_TIMESTAMP
265+
WHERE namespace = ? AND id = ?
266+
""",
267+
(json.dumps(parent_meta), self._namespace, parent_id),
268+
)
269+
self._conn.execute(
270+
"""
271+
INSERT INTO xmem_vectors(namespace, id, content, embedding, metadata)
272+
VALUES (?, ?, ?, ?, ?)
273+
""",
274+
(
275+
self._namespace,
276+
new_id,
277+
text,
278+
json.dumps([float(v) for v in embedding]),
279+
json.dumps(new_meta),
280+
),
281+
)
282+
return new_id
283+
284+
def forget(
285+
self,
286+
ids: List[str],
287+
reason: Optional[str] = None,
288+
hard_delete: bool = False,
289+
) -> bool:
290+
"""Soft-forget memories by default, preserving audit history."""
291+
292+
if hard_delete:
293+
return self.delete(ids)
294+
if not ids:
295+
return True
296+
297+
placeholders = ",".join("?" for _ in ids)
298+
rows = self._conn.execute(
299+
f"SELECT id, metadata FROM xmem_vectors "
300+
f"WHERE namespace = ? AND id IN ({placeholders})",
301+
[self._namespace, *ids],
302+
).fetchall()
303+
304+
now = utc_now_iso()
305+
updates = []
306+
for row in rows:
307+
meta = json.loads(row["metadata"] or "{}")
308+
meta[IS_CURRENT_KEY] = False
309+
meta[FORGOTTEN_AT_KEY] = now
310+
meta[FORGET_REASON_KEY] = reason
311+
updates.append((json.dumps(meta), self._namespace, row["id"]))
312+
313+
if updates:
314+
self._conn.executemany(
315+
"UPDATE xmem_vectors SET metadata = ?, updated_at = CURRENT_TIMESTAMP "
316+
"WHERE namespace = ? AND id = ?",
317+
updates,
318+
)
319+
self._conn.commit()
320+
return True
321+
201322
def delete(self, ids: List[str]) -> bool:
202323
if not ids:
203324
return True
@@ -239,10 +360,22 @@ def search_by_metadata(
239360
results: List[SearchResult] = []
240361
for row in rows:
241362
meta = json.loads(row["metadata"] or "{}")
242-
if _metadata_matches(meta, filters):
363+
if is_retrievable_memory(meta) and _metadata_matches(meta, filters):
243364
results.append(SearchResult(id=row["id"], content=row["content"], score=1.0, metadata=meta))
244365
return results[:top_k]
245366

367+
def _find_current_by_hash(self, content_hash: str) -> Optional[str]:
368+
row = self._conn.execute(
369+
f"SELECT id FROM xmem_vectors "
370+
f"WHERE namespace = ? "
371+
f"AND json_extract(metadata, '$.{CONTENT_HASH_KEY}') = ? "
372+
f"AND json_extract(metadata, '$.{IS_CURRENT_KEY}') IS NOT 0 "
373+
f"AND json_extract(metadata, '$.{FORGOTTEN_AT_KEY}') IS NULL "
374+
f"LIMIT 1",
375+
(self._namespace, content_hash),
376+
).fetchone()
377+
return row["id"] if row else None
378+
246379
async def search_by_text(
247380
self,
248381
query_text: str,

src/storage/memory_lifecycle.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
"""Memory lifecycle metadata helpers.
2+
3+
These helpers keep duplicate detection, version lineage, and soft-forget
4+
metadata consistent across vector-store implementations.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import hashlib
10+
import re
11+
from datetime import datetime, timezone
12+
from typing import Any, Dict, Optional
13+
14+
CONTENT_HASH_KEY = "content_hash"
15+
PARENT_MEMORY_ID_KEY = "parent_memory_id"
16+
VERSION_KEY = "version"
17+
IS_CURRENT_KEY = "is_current"
18+
FORGOTTEN_AT_KEY = "forgotten_at"
19+
FORGET_REASON_KEY = "forget_reason"
20+
21+
22+
def normalize_memory_content(content: str) -> str:
23+
"""Normalize memory text before hashing to catch whitespace-only duplicates."""
24+
25+
return re.sub(r"\s+", " ", content.strip()).casefold()
26+
27+
28+
def compute_memory_hash(content: str) -> str:
29+
"""Return a stable SHA-256 digest for normalized memory content."""
30+
31+
normalized = normalize_memory_content(content)
32+
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
33+
34+
35+
def utc_now_iso() -> str:
36+
return datetime.now(timezone.utc).isoformat()
37+
38+
39+
def build_lifecycle_metadata(
40+
content: str,
41+
metadata: Optional[Dict[str, Any]] = None,
42+
*,
43+
parent_memory_id: Optional[str] = None,
44+
version: int = 1,
45+
is_current: bool = True,
46+
) -> Dict[str, Any]:
47+
"""Merge caller metadata with lifecycle fields without losing custom keys."""
48+
49+
merged = dict(metadata or {})
50+
merged.setdefault(CONTENT_HASH_KEY, compute_memory_hash(content))
51+
merged.setdefault(PARENT_MEMORY_ID_KEY, parent_memory_id)
52+
merged.setdefault(VERSION_KEY, version)
53+
merged.setdefault(IS_CURRENT_KEY, is_current)
54+
merged.setdefault(FORGOTTEN_AT_KEY, None)
55+
merged.setdefault(FORGET_REASON_KEY, None)
56+
return merged
57+
58+
59+
def is_retrievable_memory(metadata: Optional[Dict[str, Any]]) -> bool:
60+
"""Return False for superseded or soft-forgotten memory records."""
61+
62+
meta = metadata or {}
63+
return meta.get(IS_CURRENT_KEY, True) is not False and not meta.get(FORGOTTEN_AT_KEY)
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from __future__ import annotations
2+
3+
from src.storage.local import SQLiteVectorStore
4+
from src.storage.memory_lifecycle import (
5+
CONTENT_HASH_KEY,
6+
FORGET_REASON_KEY,
7+
FORGOTTEN_AT_KEY,
8+
IS_CURRENT_KEY,
9+
PARENT_MEMORY_ID_KEY,
10+
VERSION_KEY,
11+
compute_memory_hash,
12+
)
13+
14+
15+
def _store(tmp_path):
16+
return SQLiteVectorStore(
17+
path=str(tmp_path / "vectors.sqlite3"),
18+
namespace="test",
19+
dimension=3,
20+
)
21+
22+
23+
def test_sqlite_add_reuses_current_memory_with_same_normalized_hash(tmp_path):
24+
store = _store(tmp_path)
25+
26+
first_ids = store.add(
27+
["Remember that Alice likes XMem."],
28+
[[1.0, 0.0, 0.0]],
29+
ids=["memory-1"],
30+
metadata=[{"user_id": "alice"}],
31+
)
32+
duplicate_ids = store.add(
33+
[" remember THAT alice likes xmem. "],
34+
[[0.0, 1.0, 0.0]],
35+
ids=["memory-duplicate"],
36+
metadata=[{"user_id": "alice"}],
37+
)
38+
39+
assert first_ids == ["memory-1"]
40+
assert duplicate_ids == ["memory-1"]
41+
assert store.search_by_metadata({"user_id": "alice"}, top_k=10)[0].id == "memory-1"
42+
43+
stored = store.get(["memory-1"])[0]
44+
assert stored["metadata"][CONTENT_HASH_KEY] == compute_memory_hash("Remember that Alice likes XMem.")
45+
assert stored["metadata"][VERSION_KEY] == 1
46+
assert stored["metadata"][IS_CURRENT_KEY] is True
47+
48+
49+
def test_sqlite_add_version_supersedes_parent_but_keeps_history(tmp_path):
50+
store = _store(tmp_path)
51+
store.add(
52+
["Alice works at XMem."],
53+
[[1.0, 0.0, 0.0]],
54+
ids=["profile-1"],
55+
metadata=[{"user_id": "alice", "domain": "profile"}],
56+
)
57+
58+
version_id = store.add_version(
59+
"profile-1",
60+
"Alice works at XortexAI.",
61+
[0.0, 1.0, 0.0],
62+
id="profile-2",
63+
metadata={"user_id": "alice", "domain": "profile"},
64+
)
65+
66+
assert version_id == "profile-2"
67+
parent = store.get(["profile-1"])[0]
68+
child = store.get(["profile-2"])[0]
69+
assert parent["metadata"][IS_CURRENT_KEY] is False
70+
assert child["metadata"][PARENT_MEMORY_ID_KEY] == "profile-1"
71+
assert child["metadata"][VERSION_KEY] == 2
72+
73+
visible = store.search_by_metadata({"user_id": "alice"}, top_k=10)
74+
assert [result.id for result in visible] == ["profile-2"]
75+
76+
77+
def test_sqlite_forget_soft_deletes_memory_from_retrieval(tmp_path):
78+
store = _store(tmp_path)
79+
store.add(
80+
["Alice's temporary preference."],
81+
[[1.0, 0.0, 0.0]],
82+
ids=["temp-1"],
83+
metadata=[{"user_id": "alice"}],
84+
)
85+
86+
assert store.forget(["temp-1"], reason="user requested deletion") is True
87+
88+
assert store.search_by_metadata({"user_id": "alice"}, top_k=10) == []
89+
assert store.search([1.0, 0.0, 0.0], top_k=10) == []
90+
91+
stored = store.get(["temp-1"])[0]
92+
assert stored["metadata"][IS_CURRENT_KEY] is False
93+
assert stored["metadata"][FORGOTTEN_AT_KEY]
94+
assert stored["metadata"][FORGET_REASON_KEY] == "user requested deletion"

0 commit comments

Comments
 (0)