1313import sqlite3
1414import uuid
1515from pathlib import Path
16- from typing import Any , Dict , List , Optional , Sequence , Tuple
16+ from typing import Any , Dict , List , Optional , Sequence
1717
1818from src .config import settings
1919from src .storage .base import BaseVectorStore , IndexStats , SearchResult
20+ from src .storage .memory_lifecycle import (
21+ CONTENT_HASH_KEY ,
22+ FORGET_REASON_KEY ,
23+ FORGOTTEN_AT_KEY ,
24+ IS_CURRENT_KEY ,
25+ PARENT_MEMORY_ID_KEY ,
26+ VERSION_KEY ,
27+ build_lifecycle_metadata ,
28+ compute_memory_hash ,
29+ is_retrievable_memory ,
30+ utc_now_iso ,
31+ )
2032from src .utils .exceptions import VectorStoreValidationError
2133
2234
@@ -43,6 +55,9 @@ def _cosine_similarity(a: Sequence[float], b: Sequence[float]) -> float:
4355 return max (0.0 , min (1.0 , (dot / (norm_a * norm_b ) + 1.0 ) / 2.0 ))
4456
4557
58+ _DEDUP_SCOPE_KEYS = ("user_id" , "tenant_id" , "org_id" , "workspace_id" , "project_id" )
59+
60+
4661class SQLiteVectorStore (BaseVectorStore ):
4762 """Small embedded vector store for single-user local testing.
4863
@@ -101,16 +116,29 @@ def add(
101116
102117 ids = ids or [str (uuid .uuid4 ()) for _ in texts ]
103118 metadata = metadata or [{} for _ in texts ]
104- rows = [
105- (
106- self . _namespace ,
107- vec_id ,
108- text ,
109- json . dumps ([ float ( v ) for v in embedding ]) ,
110- json . dumps ( meta or {}) ,
119+ output_ids : List [ str ] = []
120+ rows = []
121+ for text , embedding , vec_id , meta in zip ( texts , embeddings , ids , metadata ):
122+ lifecycle_meta = build_lifecycle_metadata ( text , meta )
123+ existing_id = self . _find_current_by_hash (
124+ lifecycle_meta [ CONTENT_HASH_KEY ] ,
125+ lifecycle_meta ,
111126 )
112- for text , embedding , vec_id , meta in zip (texts , embeddings , ids , metadata )
113- ]
127+ if existing_id :
128+ output_ids .append (existing_id )
129+ continue
130+ output_ids .append (vec_id )
131+ rows .append (
132+ (
133+ self ._namespace ,
134+ vec_id ,
135+ text ,
136+ json .dumps ([float (v ) for v in embedding ]),
137+ json .dumps (lifecycle_meta ),
138+ )
139+ )
140+ if not rows :
141+ return output_ids
114142 self ._conn .executemany (
115143 """
116144 INSERT INTO xmem_vectors(namespace, id, content, embedding, metadata)
@@ -124,7 +152,7 @@ def add(
124152 rows ,
125153 )
126154 self ._conn .commit ()
127- return ids
155+ return output_ids
128156
129157 def search (
130158 self ,
@@ -145,7 +173,7 @@ def search(
145173 results : List [SearchResult ] = []
146174 for row in rows :
147175 meta = json .loads (row ["metadata" ] or "{}" )
148- if not _metadata_matches (meta , filters ):
176+ if not is_retrievable_memory ( meta ) or not _metadata_matches (meta , filters ):
149177 continue
150178 embedding = json .loads (row ["embedding" ])
151179 results .append (
@@ -175,6 +203,8 @@ def update(
175203 return False
176204 current_meta = json .loads (row ["metadata" ] or "{}" )
177205 current_meta .update (metadata or {})
206+ new_text = text if text is not None else row ["content" ]
207+ current_meta [CONTENT_HASH_KEY ] = compute_memory_hash (new_text )
178208 new_embedding = embedding if embedding is not None else json .loads (row ["embedding" ])
179209 if len (new_embedding ) != self ._dimension :
180210 raise VectorStoreValidationError (
@@ -188,7 +218,7 @@ def update(
188218 WHERE namespace = ? AND id = ?
189219 """ ,
190220 (
191- text if text is not None else row [ "content" ] ,
221+ new_text ,
192222 json .dumps ([float (v ) for v in new_embedding ]),
193223 json .dumps (current_meta ),
194224 self ._namespace ,
@@ -198,6 +228,115 @@ def update(
198228 self ._conn .commit ()
199229 return True
200230
231+ def add_version (
232+ self ,
233+ parent_id : str ,
234+ text : str ,
235+ embedding : List [float ],
236+ id : Optional [str ] = None ,
237+ metadata : Optional [Dict [str , Any ]] = None ,
238+ ) -> Optional [str ]:
239+ """Create a new current memory version and keep the parent as history."""
240+
241+ parent = self .get ([parent_id ])
242+ if not parent :
243+ return None
244+ if len (embedding ) != self ._dimension :
245+ raise VectorStoreValidationError (
246+ f"Embedding dimension { len (embedding )} doesn't match { self ._dimension } " ,
247+ operation = "add_version" ,
248+ )
249+
250+ parent_meta = dict (parent [0 ]["metadata" ] or {})
251+ root_parent_id = parent_meta .get (PARENT_MEMORY_ID_KEY ) or parent_id
252+ next_version = int (parent_meta .get (VERSION_KEY ) or 1 ) + 1
253+ new_id = id or str (uuid .uuid4 ())
254+ new_meta = build_lifecycle_metadata (
255+ text ,
256+ metadata ,
257+ parent_memory_id = root_parent_id ,
258+ version = next_version ,
259+ is_current = True ,
260+ )
261+ existing_id = self ._find_current_by_hash (new_meta [CONTENT_HASH_KEY ], new_meta )
262+ if existing_id :
263+ if existing_id == parent_id :
264+ return existing_id
265+ parent_meta [IS_CURRENT_KEY ] = False
266+ with self ._conn :
267+ self ._conn .execute (
268+ """
269+ UPDATE xmem_vectors
270+ SET metadata = ?, updated_at = CURRENT_TIMESTAMP
271+ WHERE namespace = ? AND id = ?
272+ """ ,
273+ (json .dumps (parent_meta ), self ._namespace , parent_id ),
274+ )
275+ return existing_id
276+
277+ parent_meta [IS_CURRENT_KEY ] = False
278+ with self ._conn :
279+ self ._conn .execute (
280+ """
281+ UPDATE xmem_vectors
282+ SET metadata = ?, updated_at = CURRENT_TIMESTAMP
283+ WHERE namespace = ? AND id = ?
284+ """ ,
285+ (json .dumps (parent_meta ), self ._namespace , parent_id ),
286+ )
287+ self ._conn .execute (
288+ """
289+ INSERT INTO xmem_vectors(namespace, id, content, embedding, metadata)
290+ VALUES (?, ?, ?, ?, ?)
291+ """ ,
292+ (
293+ self ._namespace ,
294+ new_id ,
295+ text ,
296+ json .dumps ([float (v ) for v in embedding ]),
297+ json .dumps (new_meta ),
298+ ),
299+ )
300+ return new_id
301+
302+ def forget (
303+ self ,
304+ ids : List [str ],
305+ reason : Optional [str ] = None ,
306+ hard_delete : bool = False ,
307+ ) -> bool :
308+ """Soft-forget memories by default, preserving audit history."""
309+
310+ if hard_delete :
311+ return self .delete (ids )
312+ if not ids :
313+ return True
314+
315+ placeholders = "," .join ("?" for _ in ids )
316+ rows = self ._conn .execute (
317+ f"SELECT id, metadata FROM xmem_vectors "
318+ f"WHERE namespace = ? AND id IN ({ placeholders } )" ,
319+ [self ._namespace , * ids ],
320+ ).fetchall ()
321+
322+ now = utc_now_iso ()
323+ updates = []
324+ for row in rows :
325+ meta = json .loads (row ["metadata" ] or "{}" )
326+ meta [IS_CURRENT_KEY ] = False
327+ meta [FORGOTTEN_AT_KEY ] = now
328+ meta [FORGET_REASON_KEY ] = reason
329+ updates .append ((json .dumps (meta ), self ._namespace , row ["id" ]))
330+
331+ if updates :
332+ self ._conn .executemany (
333+ "UPDATE xmem_vectors SET metadata = ?, updated_at = CURRENT_TIMESTAMP "
334+ "WHERE namespace = ? AND id = ?" ,
335+ updates ,
336+ )
337+ self ._conn .commit ()
338+ return True
339+
201340 def delete (self , ids : List [str ]) -> bool :
202341 if not ids :
203342 return True
@@ -239,10 +378,41 @@ def search_by_metadata(
239378 results : List [SearchResult ] = []
240379 for row in rows :
241380 meta = json .loads (row ["metadata" ] or "{}" )
242- if _metadata_matches (meta , filters ):
381+ if is_retrievable_memory ( meta ) and _metadata_matches (meta , filters ):
243382 results .append (SearchResult (id = row ["id" ], content = row ["content" ], score = 1.0 , metadata = meta ))
244383 return results [:top_k ]
245384
385+ def _find_current_by_hash (
386+ self ,
387+ content_hash : str ,
388+ metadata : Optional [Dict [str , Any ]] = None ,
389+ ) -> Optional [str ]:
390+ clauses = [
391+ "namespace = ?" ,
392+ f"json_extract(metadata, '$.{ CONTENT_HASH_KEY } ') = ?" ,
393+ f"json_extract(metadata, '$.{ IS_CURRENT_KEY } ') = 1" ,
394+ f"json_extract(metadata, '$.{ FORGOTTEN_AT_KEY } ') IS NULL" ,
395+ ]
396+ params : List [Any ] = [self ._namespace , content_hash ]
397+ scope = {
398+ key : (metadata or {}).get (key )
399+ for key in _DEDUP_SCOPE_KEYS
400+ if (metadata or {}).get (key ) is not None
401+ }
402+ if scope :
403+ for key , value in scope .items ():
404+ clauses .append (f"json_extract(metadata, '$.{ key } ') = ?" )
405+ params .append (value )
406+ else :
407+ for key in _DEDUP_SCOPE_KEYS :
408+ clauses .append (f"json_type(metadata, '$.{ key } ') IS NULL" )
409+
410+ row = self ._conn .execute (
411+ f"SELECT id FROM xmem_vectors WHERE { ' AND ' .join (clauses )} LIMIT 1" ,
412+ params ,
413+ ).fetchone ()
414+ return row ["id" ] if row else None
415+
246416 async def search_by_text (
247417 self ,
248418 query_text : str ,
0 commit comments