11"""Pytest configuration and fixtures for vector store tests."""
22
3+ import math
34import os
5+ from typing import Any , Dict , List , Optional , Set
46
57import pytest
68from dotenv import load_dotenv
79
10+ from crossvector .engine import VectorEngine
11+ from crossvector .querydsl .q import Q
12+ from crossvector .schema import VectorDocument
13+
814# Load environment variables
915load_dotenv ()
1016
@@ -31,6 +37,189 @@ def sample_documents(sample_texts):
3137 }
3238
3339
40+ # In-memory mock adapter for Query DSL testing
41+ class InMemoryAdapter :
42+ """Simple in-memory adapter to test Query DSL without external backends."""
43+
44+ name = "inmemory"
45+ supports_metadata_only = True
46+
47+ def __init__ (self ) -> None :
48+ self ._docs : Dict [str , VectorDocument ] = {}
49+
50+ def create (self , docs : List [VectorDocument ] | VectorDocument | Dict [str , Any ]) -> List [VectorDocument ]:
51+ normalized : List [VectorDocument ] = []
52+ if isinstance (docs , list ):
53+ normalized = docs
54+ elif isinstance (docs , VectorDocument ):
55+ normalized = [docs ]
56+ elif isinstance (docs , dict ):
57+ normalized = [VectorDocument .from_kwargs (** docs )]
58+ else :
59+ raise TypeError ("Unsupported document type for create" )
60+ for d in normalized :
61+ self ._docs [d .id ] = d
62+ return normalized
63+
64+ def delete (self , ids : List [str ]) -> int :
65+ count = 0
66+ for _id in ids :
67+ if _id in self ._docs :
68+ del self ._docs [_id ]
69+ count += 1
70+ return count
71+
72+ def get (self , _id : str ) -> Optional [VectorDocument ]:
73+ return self ._docs .get (_id )
74+
75+ def count (self ) -> int :
76+ return len (self ._docs )
77+
78+ def search (
79+ self ,
80+ vector : Optional [List [float ]] = None ,
81+ limit : Optional [int ] = None ,
82+ offset : int = 0 ,
83+ where : Optional [Dict [str , Any ]] = None ,
84+ fields : Optional [Set [str ]] = None ,
85+ ) -> List [VectorDocument ]:
86+ items = list (self ._docs .values ())
87+
88+ def match (doc : VectorDocument ) -> bool :
89+ meta = doc .metadata or {}
90+
91+ def eval_condition (key : str , cond : Dict [str , Any ]) -> bool :
92+ val = meta
93+ parts = key .split ("__" ) if "__" in key else key .split ("." )
94+ for part in parts :
95+ if isinstance (val , dict ):
96+ val = val .get (part )
97+ else :
98+ val = None
99+ break
100+ if "$eq" in cond :
101+ return val == cond ["$eq" ]
102+ if "$ne" in cond :
103+ return val != cond ["$ne" ]
104+ if "$gt" in cond :
105+ return val is not None and val > cond ["$gt" ]
106+ if "$gte" in cond :
107+ return val is not None and val >= cond ["$gte" ]
108+ if "$lt" in cond :
109+ return val is not None and val < cond ["$lt" ]
110+ if "$lte" in cond :
111+ return val is not None and val <= cond ["$lte" ]
112+ if "$in" in cond :
113+ return val in cond ["$in" ]
114+ if "$nin" in cond :
115+ return val not in cond ["$nin" ]
116+ return True
117+
118+ def eval_where (w : Dict [str , Any ]) -> bool :
119+ if "$and" in w :
120+ return all (eval_where (x ) for x in w ["$and" ])
121+ if "$or" in w :
122+ return any (eval_where (x ) for x in w ["$or" ])
123+ return all (eval_condition (k , (v if isinstance (v , dict ) else {"$eq" : v })) for k , v in w .items ())
124+
125+ return eval_where (where ) if where else True
126+
127+ if where :
128+ if isinstance (where , Q ):
129+ where = where .to_dict ()
130+ items = [d for d in items if match (d )]
131+
132+ def cosine (a : List [float ], b : List [float ]) -> float :
133+ if not a or not b or len (a ) != len (b ):
134+ return 0.0
135+ dot = sum (x * y for x , y in zip (a , b ))
136+ na = math .sqrt (sum (x * x for x in a ))
137+ nb = math .sqrt (sum (y * y for y in b ))
138+ if na == 0 or nb == 0 :
139+ return 0.0
140+ return dot / (na * nb )
141+
142+ if vector is not None :
143+ items .sort (key = lambda d : cosine (vector , d .vector or []), reverse = True )
144+
145+ start = offset
146+ end = start + (limit if limit is not None else len (items ))
147+ return items [start :end ]
148+
149+
150+ class FixedEmbedding :
151+ """Deterministic embedding for testing without external API calls."""
152+
153+ def get_embeddings (self , texts : List [str ]) -> List [List [float ]]:
154+ out : List [List [float ]] = []
155+ for t in texts :
156+ h = abs (hash (t ))
157+ vec = [
158+ ((h >> 0 ) & 0xFF ) / 255.0 ,
159+ ((h >> 8 ) & 0xFF ) / 255.0 ,
160+ ((h >> 16 ) & 0xFF ) / 255.0 ,
161+ ((h >> 24 ) & 0xFF ) / 255.0 ,
162+ ]
163+ out .append (vec )
164+ return out
165+
166+
167+ @pytest .fixture (scope = "module" )
168+ def mock_engine ():
169+ """Build VectorEngine with in-memory adapter and fixed embeddings."""
170+ adapter = InMemoryAdapter ()
171+ embedding = FixedEmbedding ()
172+ engine = VectorEngine (db = adapter , embedding = embedding )
173+
174+ # Seed with test documents
175+ docs = [
176+ VectorDocument (
177+ id = "doc1" ,
178+ text = "AI in 2024" ,
179+ vector = [0.1 , 0.2 , 0.3 , 0.4 ],
180+ metadata = {"category" : "tech" , "year" : 2024 , "score" : 91 },
181+ ),
182+ VectorDocument (
183+ id = "doc2" ,
184+ text = "Cooking tips" ,
185+ vector = [0.0 , 0.1 , 0.0 , 0.2 ],
186+ metadata = {"category" : "food" , "year" : 2023 , "score" : 85 },
187+ ),
188+ VectorDocument (
189+ id = "doc3" ,
190+ text = "Travel guide" ,
191+ vector = [0.2 , 0.0 , 0.1 , 0.0 ],
192+ metadata = {"category" : "travel" , "year" : 2022 , "score" : 78 },
193+ ),
194+ VectorDocument (
195+ id = "doc4" ,
196+ text = "Tech gadgets" ,
197+ vector = [0.3 , 0.2 , 0.1 , 0.0 ],
198+ metadata = {"category" : "tech" , "year" : 2024 , "score" : 88 },
199+ ),
200+ VectorDocument (
201+ id = "doc5" ,
202+ text = "Healthy recipes" ,
203+ vector = [0.05 , 0.05 , 0.05 , 0.05 ],
204+ metadata = {"category" : "food" , "year" : 2024 , "score" : 92 },
205+ ),
206+ ]
207+ engine .db .create (docs )
208+ return engine
209+
210+
211+ @pytest .fixture (scope = "module" )
212+ def sample_docs (mock_engine ):
213+ """Return seeded test documents from mock engine."""
214+ return [
215+ mock_engine .get ("doc1" ),
216+ mock_engine .get ("doc2" ),
217+ mock_engine .get ("doc3" ),
218+ mock_engine .get ("doc4" ),
219+ mock_engine .get ("doc5" ),
220+ ]
221+
222+
34223@pytest .fixture
35224def mock_embeddings (sample_texts ):
36225 """Mock embeddings for testing without API calls."""
0 commit comments