1313import sqlite3
1414import uuid
1515from pathlib import Path
16- from typing import Any , Dict , List , Optional , Sequence , Tuple
16+ from typing import Any , Dict , List , Optional , Sequence
1717
1818from src .config import settings
1919from src .storage .base import BaseVectorStore , IndexStats , SearchResult
20+ from src .storage .memory_lifecycle import (
21+ CONTENT_HASH_KEY ,
22+ FORGET_REASON_KEY ,
23+ FORGOTTEN_AT_KEY ,
24+ IS_CURRENT_KEY ,
25+ PARENT_MEMORY_ID_KEY ,
26+ VERSION_KEY ,
27+ build_lifecycle_metadata ,
28+ compute_memory_hash ,
29+ is_retrievable_memory ,
30+ utc_now_iso ,
31+ )
2032from src .utils .exceptions import VectorStoreValidationError
2133
2234
@@ -101,16 +113,26 @@ def add(
101113
102114 ids = ids or [str (uuid .uuid4 ()) for _ in texts ]
103115 metadata = metadata or [{} for _ in texts ]
104- rows = [
105- (
106- self ._namespace ,
107- vec_id ,
108- text ,
109- json .dumps ([float (v ) for v in embedding ]),
110- json .dumps (meta or {}),
116+ output_ids : List [str ] = []
117+ rows = []
118+ for text , embedding , vec_id , meta in zip (texts , embeddings , ids , metadata ):
119+ lifecycle_meta = build_lifecycle_metadata (text , meta )
120+ existing_id = self ._find_current_by_hash (lifecycle_meta [CONTENT_HASH_KEY ])
121+ if existing_id :
122+ output_ids .append (existing_id )
123+ continue
124+ output_ids .append (vec_id )
125+ rows .append (
126+ (
127+ self ._namespace ,
128+ vec_id ,
129+ text ,
130+ json .dumps ([float (v ) for v in embedding ]),
131+ json .dumps (lifecycle_meta ),
132+ )
111133 )
112- for text , embedding , vec_id , meta in zip ( texts , embeddings , ids , metadata )
113- ]
134+ if not rows :
135+ return output_ids
114136 self ._conn .executemany (
115137 """
116138 INSERT INTO xmem_vectors(namespace, id, content, embedding, metadata)
@@ -124,7 +146,7 @@ def add(
124146 rows ,
125147 )
126148 self ._conn .commit ()
127- return ids
149+ return output_ids
128150
129151 def search (
130152 self ,
@@ -145,7 +167,7 @@ def search(
145167 results : List [SearchResult ] = []
146168 for row in rows :
147169 meta = json .loads (row ["metadata" ] or "{}" )
148- if not _metadata_matches (meta , filters ):
170+ if not is_retrievable_memory ( meta ) or not _metadata_matches (meta , filters ):
149171 continue
150172 embedding = json .loads (row ["embedding" ])
151173 results .append (
@@ -175,6 +197,8 @@ def update(
175197 return False
176198 current_meta = json .loads (row ["metadata" ] or "{}" )
177199 current_meta .update (metadata or {})
200+ new_text = text if text is not None else row ["content" ]
201+ current_meta [CONTENT_HASH_KEY ] = compute_memory_hash (new_text )
178202 new_embedding = embedding if embedding is not None else json .loads (row ["embedding" ])
179203 if len (new_embedding ) != self ._dimension :
180204 raise VectorStoreValidationError (
@@ -188,7 +212,7 @@ def update(
188212 WHERE namespace = ? AND id = ?
189213 """ ,
190214 (
191- text if text is not None else row [ "content" ] ,
215+ new_text ,
192216 json .dumps ([float (v ) for v in new_embedding ]),
193217 json .dumps (current_meta ),
194218 self ._namespace ,
@@ -198,6 +222,103 @@ def update(
198222 self ._conn .commit ()
199223 return True
200224
225+ def add_version (
226+ self ,
227+ parent_id : str ,
228+ text : str ,
229+ embedding : List [float ],
230+ id : Optional [str ] = None ,
231+ metadata : Optional [Dict [str , Any ]] = None ,
232+ ) -> Optional [str ]:
233+ """Create a new current memory version and keep the parent as history."""
234+
235+ parent = self .get ([parent_id ])
236+ if not parent :
237+ return None
238+ if len (embedding ) != self ._dimension :
239+ raise VectorStoreValidationError (
240+ f"Embedding dimension { len (embedding )} doesn't match { self ._dimension } " ,
241+ operation = "add_version" ,
242+ )
243+
244+ parent_meta = dict (parent [0 ]["metadata" ] or {})
245+ root_parent_id = parent_meta .get (PARENT_MEMORY_ID_KEY ) or parent_id
246+ next_version = int (parent_meta .get (VERSION_KEY ) or 1 ) + 1
247+ new_id = id or str (uuid .uuid4 ())
248+ new_meta = build_lifecycle_metadata (
249+ text ,
250+ metadata ,
251+ parent_memory_id = root_parent_id ,
252+ version = next_version ,
253+ is_current = True ,
254+ )
255+ existing_id = self ._find_current_by_hash (new_meta [CONTENT_HASH_KEY ])
256+ if existing_id :
257+ return existing_id
258+
259+ parent_meta [IS_CURRENT_KEY ] = False
260+ with self ._conn :
261+ self ._conn .execute (
262+ """
263+ UPDATE xmem_vectors
264+ SET metadata = ?, updated_at = CURRENT_TIMESTAMP
265+ WHERE namespace = ? AND id = ?
266+ """ ,
267+ (json .dumps (parent_meta ), self ._namespace , parent_id ),
268+ )
269+ self ._conn .execute (
270+ """
271+ INSERT INTO xmem_vectors(namespace, id, content, embedding, metadata)
272+ VALUES (?, ?, ?, ?, ?)
273+ """ ,
274+ (
275+ self ._namespace ,
276+ new_id ,
277+ text ,
278+ json .dumps ([float (v ) for v in embedding ]),
279+ json .dumps (new_meta ),
280+ ),
281+ )
282+ return new_id
283+
284+ def forget (
285+ self ,
286+ ids : List [str ],
287+ reason : Optional [str ] = None ,
288+ hard_delete : bool = False ,
289+ ) -> bool :
290+ """Soft-forget memories by default, preserving audit history."""
291+
292+ if hard_delete :
293+ return self .delete (ids )
294+ if not ids :
295+ return True
296+
297+ placeholders = "," .join ("?" for _ in ids )
298+ rows = self ._conn .execute (
299+ f"SELECT id, metadata FROM xmem_vectors "
300+ f"WHERE namespace = ? AND id IN ({ placeholders } )" ,
301+ [self ._namespace , * ids ],
302+ ).fetchall ()
303+
304+ now = utc_now_iso ()
305+ updates = []
306+ for row in rows :
307+ meta = json .loads (row ["metadata" ] or "{}" )
308+ meta [IS_CURRENT_KEY ] = False
309+ meta [FORGOTTEN_AT_KEY ] = now
310+ meta [FORGET_REASON_KEY ] = reason
311+ updates .append ((json .dumps (meta ), self ._namespace , row ["id" ]))
312+
313+ if updates :
314+ self ._conn .executemany (
315+ "UPDATE xmem_vectors SET metadata = ?, updated_at = CURRENT_TIMESTAMP "
316+ "WHERE namespace = ? AND id = ?" ,
317+ updates ,
318+ )
319+ self ._conn .commit ()
320+ return True
321+
201322 def delete (self , ids : List [str ]) -> bool :
202323 if not ids :
203324 return True
@@ -239,10 +360,22 @@ def search_by_metadata(
239360 results : List [SearchResult ] = []
240361 for row in rows :
241362 meta = json .loads (row ["metadata" ] or "{}" )
242- if _metadata_matches (meta , filters ):
363+ if is_retrievable_memory ( meta ) and _metadata_matches (meta , filters ):
243364 results .append (SearchResult (id = row ["id" ], content = row ["content" ], score = 1.0 , metadata = meta ))
244365 return results [:top_k ]
245366
367+ def _find_current_by_hash (self , content_hash : str ) -> Optional [str ]:
368+ row = self ._conn .execute (
369+ f"SELECT id FROM xmem_vectors "
370+ f"WHERE namespace = ? "
371+ f"AND json_extract(metadata, '$.{ CONTENT_HASH_KEY } ') = ? "
372+ f"AND json_extract(metadata, '$.{ IS_CURRENT_KEY } ') IS NOT 0 "
373+ f"AND json_extract(metadata, '$.{ FORGOTTEN_AT_KEY } ') IS NULL "
374+ f"LIMIT 1" ,
375+ (self ._namespace , content_hash ),
376+ ).fetchone ()
377+ return row ["id" ] if row else None
378+
246379 async def search_by_text (
247380 self ,
248381 query_text : str ,
0 commit comments