@@ -89,64 +89,9 @@ def test_from_dict(_mock_boto3):
8989 assert store .create_bucket_and_index is False
9090
9191
92- @patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
93- def test_count_documents_empty (mock_boto3 ):
94- client = MagicMock ()
95- client .get_vector_bucket .return_value = {}
96- client .get_index .return_value = {}
97- client .list_vectors .return_value = {"vectors" : []}
98- mock_boto3 .client .return_value = client
99-
100- store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
101- assert store .count_documents () == 0
102-
103-
104- @patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
105- def test_count_documents_pagination (mock_boto3 ):
106- client = MagicMock ()
107- client .get_vector_bucket .return_value = {}
108- client .get_index .return_value = {}
109- client .list_vectors .side_effect = [
110- {"vectors" : [{"key" : "1" }, {"key" : "2" }], "nextToken" : "tok" },
111- {"vectors" : [{"key" : "3" }]},
112- ]
113- mock_boto3 .client .return_value = client
114-
115- store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
116- assert store .count_documents () == 3
117-
118-
119- @patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
120- def test_write_documents (mock_boto3 ):
121- client = MagicMock ()
122- client .get_vector_bucket .return_value = {}
123- client .get_index .return_value = {}
124- client .put_vectors .return_value = {}
125- mock_boto3 .client .return_value = client
126-
127- store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
128- docs = [
129- Document (id = "1" , content = "Hello" , embedding = [0.1 ] * 4 ),
130- Document (id = "2" , content = "World" , embedding = [0.2 ] * 4 ),
131- ]
132- assert store .write_documents (docs ) == 2
133- client .put_vectors .assert_called_once ()
134-
135-
136- @patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
137- def test_write_documents_empty (mock_boto3 ):
138- client = MagicMock ()
139- client .get_vector_bucket .return_value = {}
140- client .get_index .return_value = {}
141- mock_boto3 .client .return_value = client
142-
143- store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
144- assert store .write_documents ([]) == 0
145- client .put_vectors .assert_not_called ()
146-
147-
14892@patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
14993def test_write_documents_no_embedding_raises (mock_boto3 ):
94+ """S3 Vectors requires embeddings — this tests our validation, not the store."""
15095 client = MagicMock ()
15196 client .get_vector_bucket .return_value = {}
15297 client .get_index .return_value = {}
@@ -159,98 +104,82 @@ def test_write_documents_no_embedding_raises(mock_boto3):
159104
160105@patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
161106def test_write_documents_skip_existing (mock_boto3 ):
107+ """Tests our batch existence check logic for SKIP policy."""
162108 client = MagicMock ()
163109 client .get_vector_bucket .return_value = {}
164110 client .get_index .return_value = {}
165111 client .get_vectors .return_value = {"vectors" : [{"key" : "1" }]}
166112 mock_boto3 .client .return_value = client
167113
168114 store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
169- docs = [Document (id = "1" , content = "Hello" , embedding = [0.1 ] * 4 )]
170- assert store .write_documents (docs , policy = DuplicatePolicy .SKIP ) == 0
115+ result = store .write_documents (
116+ [Document (id = "1" , content = "Hello" , embedding = [0.1 ] * 4 )],
117+ policy = DuplicatePolicy .SKIP ,
118+ )
119+ assert result == 0
120+ client .put_vectors .assert_not_called ()
171121
172122
173123@patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
174124def test_write_documents_none_policy_raises (mock_boto3 ):
125+ """Tests our batch existence check logic for NONE policy."""
175126 client = MagicMock ()
176127 client .get_vector_bucket .return_value = {}
177128 client .get_index .return_value = {}
178129 client .get_vectors .return_value = {"vectors" : [{"key" : "1" }]}
179130 mock_boto3 .client .return_value = client
180131
181132 store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
182- docs = [Document (id = "1" , content = "Hello" , embedding = [0.1 ] * 4 )]
183133 with pytest .raises (DocumentStoreError , match = "already exist" ):
184- store .write_documents (docs , policy = DuplicatePolicy .NONE )
134+ store .write_documents (
135+ [Document (id = "1" , content = "Hello" , embedding = [0.1 ] * 4 )],
136+ policy = DuplicatePolicy .NONE ,
137+ )
185138
186139
187140@patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
188- def test_write_documents_metadata (mock_boto3 ):
189- client = MagicMock ()
190- client .get_vector_bucket .return_value = {}
191- client .get_index .return_value = {}
192- client .put_vectors .return_value = {}
193- mock_boto3 .client .return_value = client
194-
195- store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
196- docs = [Document (id = "1" , content = "Hello" , embedding = [0.1 ] * 4 , meta = {"category" : "test" , "year" : 2024 })]
197- store .write_documents (docs )
198-
199- vectors = client .put_vectors .call_args [1 ]["vectors" ]
200- assert len (vectors ) == 1
201- assert vectors [0 ]["key" ] == "1"
202- assert vectors [0 ]["metadata" ]["_content" ] == "Hello"
203- assert vectors [0 ]["metadata" ]["category" ] == "test"
204- assert vectors [0 ]["metadata" ]["year" ] == 2024
205-
206-
207- @patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
208- def test_delete_documents (mock_boto3 ):
209- client = MagicMock ()
210- client .get_vector_bucket .return_value = {}
211- client .get_index .return_value = {}
212- client .delete_vectors .return_value = {}
213- mock_boto3 .client .return_value = client
214-
215- store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
216- store .delete_documents (["1" , "2" ])
217- client .delete_vectors .assert_called_once_with (vectorBucketName = "b" , indexName = "i" , keys = ["1" , "2" ])
218-
219-
220- @patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
221- def test_delete_documents_empty (mock_boto3 ):
141+ def test_embedding_retrieval_score_conversion (mock_boto3 ):
142+ """Tests our distance-to-score conversion logic — the only non-trivial transform in retrieval."""
222143 client = MagicMock ()
223144 client .get_vector_bucket .return_value = {}
224145 client .get_index .return_value = {}
146+ client .query_vectors .return_value = {
147+ "vectors" : [{"key" : "1" , "distance" : 0.05 , "metadata" : {"_content" : "Hello" , "category" : "news" }}],
148+ "distanceMetric" : "cosine" ,
149+ }
225150 mock_boto3 .client .return_value = client
226151
227152 store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
228- store .delete_documents ([])
229- client .delete_vectors .assert_not_called ()
153+ docs = store ._embedding_retrieval (query_embedding = [0.1 ] * 4 , top_k = 5 )
154+ assert len (docs ) == 1
155+ assert docs [0 ].id == "1"
156+ assert docs [0 ].content == "Hello"
157+ assert docs [0 ].score == pytest .approx (0.95 ) # cosine: 1.0 - 0.05
158+ assert docs [0 ].meta == {"category" : "news" }
230159
231160
232161@patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
233- def test_embedding_retrieval (mock_boto3 ):
162+ def test_embedding_retrieval_euclidean_score (mock_boto3 ):
163+ """Tests euclidean distance-to-score conversion (negated)."""
234164 client = MagicMock ()
235165 client .get_vector_bucket .return_value = {}
236166 client .get_index .return_value = {}
237167 client .query_vectors .return_value = {
238- "vectors" : [{"key" : "1" , "distance" : 0.05 , "metadata" : {"_content" : "Hello" , "category" : "news" }}],
239- "distanceMetric" : "cosine " ,
168+ "vectors" : [{"key" : "1" , "distance" : 1.5 , "metadata" : {}}],
169+ "distanceMetric" : "euclidean " ,
240170 }
241171 mock_boto3 .client .return_value = client
242172
243- store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
173+ store = S3VectorsDocumentStore (
174+ vector_bucket_name = "b" , index_name = "i" , dimension = 4 , distance_metric = "euclidean" , region_name = "us-east-1"
175+ )
244176 docs = store ._embedding_retrieval (query_embedding = [0.1 ] * 4 , top_k = 5 )
245- assert len (docs ) == 1
246- assert docs [0 ].id == "1"
247- assert docs [0 ].content == "Hello"
248- assert docs [0 ].score == pytest .approx (0.95 ) # 1 - 0.05
249- assert docs [0 ].meta == {"category" : "news" }
177+ assert docs [0 ].score == pytest .approx (- 1.5 ) # euclidean: negated
250178
251179
252180@patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
253- def test_embedding_retrieval_with_filters (mock_boto3 ):
181+ def test_embedding_retrieval_passes_filters (mock_boto3 ):
182+ """Tests that Haystack filters are converted and passed to query_vectors."""
254183 client = MagicMock ()
255184 client .get_vector_bucket .return_value = {}
256185 client .get_index .return_value = {}
@@ -265,19 +194,15 @@ def test_embedding_retrieval_with_filters(mock_boto3):
265194 assert call_args ["filter" ] == {"$and" : [{"category" : {"$eq" : "news" }}]}
266195
267196
268- @patch ("haystack_integrations.document_stores.amazon_s3_vectors.document_store.boto3" )
269- def test_embedding_retrieval_empty_embedding_raises (mock_boto3 ):
270- client = MagicMock ()
271- client .get_vector_bucket .return_value = {}
272- client .get_index .return_value = {}
273- mock_boto3 .client .return_value = client
274-
275- store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , region_name = "us-east-1" )
197+ def test_embedding_retrieval_empty_embedding_raises ():
198+ """Tests our input validation — no mocking needed."""
199+ store = S3VectorsDocumentStore (vector_bucket_name = "b" , index_name = "i" , dimension = 4 , create_bucket_and_index = False )
276200 with pytest .raises (ValueError , match = "non-empty" ):
277201 store ._embedding_retrieval (query_embedding = [])
278202
279203
280204def test_document_to_s3_vector ():
205+ """Tests our Document → S3 vector conversion (pure function)."""
281206 doc = Document (
282207 id = "test-1" , content = "Hello world" , embedding = [0.1 , 0.2 , 0.3 ], meta = {"category" : "test" , "year" : 2024 }
283208 )
@@ -290,6 +215,7 @@ def test_document_to_s3_vector():
290215
291216
292217def test_s3_vector_to_document ():
218+ """Tests our S3 vector → Document conversion (pure function)."""
293219 vector = {
294220 "key" : "test-1" ,
295221 "data" : {"float32" : [0.1 , 0.2 , 0.3 ]},
@@ -303,6 +229,7 @@ def test_s3_vector_to_document():
303229
304230
305231def test_document_roundtrip ():
232+ """Tests Document → S3 vector → Document is lossless."""
306233 doc = Document (
307234 id = "test-1" , content = "Hello world" , embedding = [0.1 , 0.2 , 0.3 ], meta = {"category" : "test" , "year" : 2024 }
308235 )
0 commit comments