22#
33# SPDX-License-Identifier: Apache-2.0
44
5+ import dataclasses
56import logging
67from collections .abc import AsyncGenerator
7- from dataclasses import replace
88from pathlib import Path
99
1010import pytest
1111import pytest_asyncio
1212from haystack .dataclasses .byte_stream import ByteStream
1313from haystack .dataclasses .document import Document
1414from haystack .document_stores .errors import DocumentStoreError
15+ from haystack .testing .document_store_async import (
16+ CountDocumentsAsyncTest ,
17+ CountDocumentsByFilterAsyncTest ,
18+ CountUniqueMetadataByFilterAsyncTest ,
19+ DeleteAllAsyncTest ,
20+ DeleteByFilterAsyncTest ,
21+ DeleteDocumentsAsyncTest ,
22+ FilterDocumentsAsyncTest ,
23+ GetMetadataFieldMinMaxAsyncTest ,
24+ GetMetadataFieldsInfoAsyncTest ,
25+ GetMetadataFieldUniqueValuesAsyncTest ,
26+ UpdateByFilterAsyncTest ,
27+ WriteDocumentsAsyncTest ,
28+ )
1529
1630from haystack_integrations .document_stores .weaviate import WeaviateDocumentStore
1731from haystack_integrations .document_stores .weaviate .document_store import DOCUMENT_COLLECTION_PROPERTIES
1832
1933
2034@pytest .mark .integration
21- class TestWeaviateDocumentStoreAsync :
35+ class TestWeaviateDocumentStoreAsync (
36+ CountDocumentsAsyncTest ,
37+ WriteDocumentsAsyncTest ,
38+ DeleteDocumentsAsyncTest ,
39+ DeleteAllAsyncTest ,
40+ DeleteByFilterAsyncTest ,
41+ FilterDocumentsAsyncTest ,
42+ UpdateByFilterAsyncTest ,
43+ CountDocumentsByFilterAsyncTest ,
44+ CountUniqueMetadataByFilterAsyncTest ,
45+ GetMetadataFieldsInfoAsyncTest ,
46+ GetMetadataFieldMinMaxAsyncTest ,
47+ GetMetadataFieldUniqueValuesAsyncTest ,
48+ ):
2249 @pytest_asyncio .fixture
2350 async def document_store (self , request ) -> AsyncGenerator [WeaviateDocumentStore , None , None ]:
2451 collection_settings = {
@@ -39,6 +66,11 @@ async def document_store(self, request) -> AsyncGenerator[WeaviateDocumentStore,
3966 await (await store .async_client ).collections .delete (collection_settings ["class" ])
4067 await store .close_async ()
4168
69+ def assert_documents_are_equal (self , received : list [Document ], expected : list [Document ]):
70+ # filter_documents_async() returns Documents with score populated; strip it before comparing
71+ received = [dataclasses .replace (doc , score = None ) for doc in received ]
72+ super ().assert_documents_are_equal (received , expected )
73+
4274 @pytest .mark .asyncio
4375 async def test_close_async (self , document_store : WeaviateDocumentStore ) -> None :
4476 # Initialise client and collection
@@ -69,18 +101,6 @@ async def test_filter_documents_with_blob_data_async(
69101 assert len (docs ) == 1
70102 assert docs [0 ].blob == image
71103
72- @pytest .mark .asyncio
73- async def test_filter_documents_below_default_limit_async (self , document_store : WeaviateDocumentStore ) -> None :
74- docs = []
75- for index in range (9998 ):
76- docs .append (Document (content = "This is some content" , meta = {"index" : index }))
77- await document_store .write_documents_async (docs )
78- result = await document_store .filter_documents_async (
79- {"field" : "content" , "operator" : "==" , "value" : "This is some content" }
80- )
81-
82- assert len (result ) == 9998
83-
84104 @pytest .mark .asyncio
85105 async def test_filter_documents_over_default_limit (self , document_store : WeaviateDocumentStore ) -> None :
86106 docs = []
@@ -92,19 +112,6 @@ async def test_filter_documents_over_default_limit(self, document_store: Weaviat
92112 {"field" : "content" , "operator" : "==" , "value" : "This is some content" }
93113 )
94114
95- @pytest .mark .asyncio
96- async def test_write_documents_async (self , document_store : WeaviateDocumentStore ) -> None :
97- """
98- Test write_documents() with default policy overwrites existing documents.
99- """
100- doc = Document (content = "test doc" )
101- assert await document_store .write_documents_async ([doc ]) == 1
102- assert await document_store .count_documents_async () == 1
103-
104- doc = replace (doc , content = "test doc 2" )
105- assert await document_store .write_documents_async ([doc ]) == 1
106- assert await document_store .count_documents_async () == 1
107-
108115 @pytest .mark .asyncio
109116 async def test_write_documents_with_blob_data_async (
110117 self , document_store : WeaviateDocumentStore , test_files_path : Path
@@ -250,52 +257,6 @@ async def test_hybrid_retrieval_async_with_alpha(self, document_store):
250257 assert len (result_vector ) > 0
251258 assert result_vector [0 ].score > 0.0
252259
253- @pytest .mark .asyncio
254- async def test_delete_by_filter_async (self , document_store ):
255- docs = [
256- Document (content = "Doc 1" , meta = {"category" : "TypeA" }),
257- Document (content = "Doc 2" , meta = {"category" : "TypeB" }),
258- Document (content = "Doc 3" , meta = {"category" : "TypeA" }),
259- ]
260- await document_store .write_documents_async (docs )
261-
262- # delete documents with category="TypeA"
263- deleted_count = await document_store .delete_by_filter_async (
264- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" }
265- )
266- assert deleted_count == 2
267- assert document_store .count_documents () == 1
268-
269- # verify only category TypeB remains
270- remaining_docs = await document_store .filter_documents_async ()
271- assert len (remaining_docs ) == 1
272- assert remaining_docs [0 ].meta ["category" ] == "TypeB"
273-
274- @pytest .mark .asyncio
275- async def test_update_by_filter_async (self , document_store ):
276- docs = [
277- Document (content = "Doc 1" , meta = {"category" : "TypeA" , "status" : "draft" }),
278- Document (content = "Doc 2" , meta = {"category" : "TypeB" , "status" : "draft" }),
279- Document (content = "Doc 3" , meta = {"category" : "TypeA" , "status" : "draft" }),
280- ]
281- await document_store .write_documents_async (docs )
282- assert document_store .count_documents () == 3
283-
284- # update status for category="TypeA" documents
285- updated_count = await document_store .update_by_filter_async (
286- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" }, meta = {"status" : "published" }
287- )
288- assert updated_count == 2
289-
290- # Verify the updates
291- published_docs = await document_store .filter_documents_async (
292- filters = {"field" : "meta.status" , "operator" : "==" , "value" : "published" }
293- )
294- assert len (published_docs ) == 2
295- for doc in published_docs :
296- assert doc .meta ["category" ] == "TypeA"
297- assert doc .meta ["status" ] == "published"
298-
299260 @pytest .mark .asyncio
300261 async def test_update_by_filter_async_with_pagination (self , document_store , monkeypatch ):
301262 # Reduce DEFAULT_QUERY_LIMIT to test pagination without creating 10000+ documents
@@ -328,84 +289,6 @@ async def test_update_by_filter_async_with_pagination(self, document_store, monk
328289 assert "index" in doc .meta
329290 assert 0 <= doc .meta ["index" ] < 250
330291
331- @pytest .mark .asyncio
332- async def test_count_documents_async (self , document_store : WeaviateDocumentStore ) -> None :
333- docs = [
334- Document (content = "Doc 1" , meta = {"category" : "TypeA" }),
335- Document (content = "Doc 2" , meta = {"category" : "TypeB" }),
336- Document (content = "Doc 3" , meta = {"category" : "TypeA" }),
337- Document (content = "Doc 4" , meta = {"category" : "TypeA" }),
338- ]
339- await document_store .write_documents_async (docs )
340- assert await document_store .count_documents_async () == 4
341-
342- @pytest .mark .asyncio
343- async def test_count_documents_by_filter_async (self , document_store ):
344- docs = [
345- Document (content = "Doc 1" , meta = {"category" : "TypeA" }),
346- Document (content = "Doc 2" , meta = {"category" : "TypeB" }),
347- Document (content = "Doc 3" , meta = {"category" : "TypeA" }),
348- Document (content = "Doc 4" , meta = {"category" : "TypeA" }),
349- ]
350- await document_store .write_documents_async (docs )
351- assert await document_store .count_documents_async () == 4
352-
353- count = await document_store .count_documents_by_filter_async (
354- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" }
355- )
356- assert count == 3
357-
358- count = await document_store .count_documents_by_filter_async (
359- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeB" }
360- )
361- assert count == 1
362-
363- count = await document_store .count_documents_by_filter_async (
364- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeC" }
365- )
366- assert count == 0
367-
368- @pytest .mark .asyncio
369- async def test_get_metadata_fields_info_async (self , document_store ):
370- fields_info = await document_store .get_metadata_fields_info_async ()
371-
372- assert "_original_id" not in fields_info
373- assert "content" not in fields_info
374- assert "blob_data" not in fields_info
375- assert "blob_mime_type" not in fields_info
376- assert "score" not in fields_info
377-
378- assert "category" in fields_info
379- assert fields_info ["category" ]["type" ] == "text"
380- assert "status" in fields_info
381- assert fields_info ["status" ]["type" ] == "text"
382-
383- @pytest .mark .asyncio
384- async def test_get_metadata_field_min_max_async (self , document_store ):
385- docs = [
386- Document (content = "Doc 1" , meta = {"number" : 10 }),
387- Document (content = "Doc 2" , meta = {"number" : 5 }),
388- Document (content = "Doc 3" , meta = {"number" : 20 }),
389- Document (content = "Doc 4" , meta = {"number" : 15 }),
390- ]
391- await document_store .write_documents_async (docs )
392-
393- result = await document_store .get_metadata_field_min_max_async ("number" )
394- assert result ["min" ] == 5
395- assert result ["max" ] == 20
396-
397- @pytest .mark .asyncio
398- async def test_get_metadata_field_min_max_async_with_meta_prefix (self , document_store ):
399- docs = [
400- Document (content = "Doc 1" , meta = {"number" : 100 }),
401- Document (content = "Doc 2" , meta = {"number" : 200 }),
402- ]
403- await document_store .write_documents_async (docs )
404-
405- result = await document_store .get_metadata_field_min_max_async ("meta.number" )
406- assert result ["min" ] == 100
407- assert result ["max" ] == 200
408-
409292 @pytest .mark .asyncio
410293 async def test_get_metadata_field_min_max_async_unsupported_type (self , document_store ):
411294 with pytest .raises (ValueError , match = "doesn't support min/max aggregation" ):
@@ -416,35 +299,6 @@ async def test_get_metadata_field_min_max_async_field_not_found(self, document_s
416299 with pytest .raises (ValueError , match = "not found in collection schema" ):
417300 await document_store .get_metadata_field_min_max_async ("nonexistent_field" )
418301
419- @pytest .mark .asyncio
420- async def test_count_unique_metadata_by_filter_async (self , document_store ):
421- docs = [
422- Document (content = "Doc 1" , meta = {"category" : "TypeA" , "status" : "draft" }),
423- Document (content = "Doc 2" , meta = {"category" : "TypeB" , "status" : "published" }),
424- Document (content = "Doc 3" , meta = {"category" : "TypeA" , "status" : "draft" }),
425- Document (content = "Doc 4" , meta = {"category" : "TypeC" , "status" : "published" }),
426- Document (content = "Doc 5" , meta = {"category" : "TypeA" , "status" : "archived" }),
427- ]
428- await document_store .write_documents_async (docs )
429-
430- result = await document_store .count_unique_metadata_by_filter_async (
431- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" }, metadata_fields = ["status" ]
432- )
433- assert result ["status" ] == 2
434-
435- result = await document_store .count_unique_metadata_by_filter_async (
436- filters = {
437- "operator" : "OR" ,
438- "conditions" : [
439- {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" },
440- {"field" : "meta.category" , "operator" : "==" , "value" : "TypeB" },
441- ],
442- },
443- metadata_fields = ["category" , "status" ],
444- )
445- assert result ["category" ] == 2
446- assert result ["status" ] == 3
447-
448302 @pytest .mark .asyncio
449303 async def test_count_unique_metadata_by_filter_async_with_meta_prefix (self , document_store ):
450304 docs = [
@@ -480,21 +334,6 @@ async def test_count_unique_metadata_by_filter_async_field_not_found(self, docum
480334 metadata_fields = ["nonexistent_field" ],
481335 )
482336
483- @pytest .mark .asyncio
484- async def test_get_metadata_field_unique_values_async (self , document_store ):
485- docs = [
486- Document (content = "Doc 1" , meta = {"category" : "TypeA" }),
487- Document (content = "Doc 2" , meta = {"category" : "TypeB" }),
488- Document (content = "Doc 3" , meta = {"category" : "TypeA" }),
489- Document (content = "Doc 4" , meta = {"category" : "TypeC" }),
490- Document (content = "Doc 5" , meta = {"category" : "TypeB" }),
491- ]
492- await document_store .write_documents_async (docs )
493-
494- values , total_count = await document_store .get_metadata_field_unique_values_async ("category" )
495- assert total_count == 3
496- assert set (values ) == {"TypeA" , "TypeB" , "TypeC" }
497-
498337 @pytest .mark .asyncio
499338 async def test_get_metadata_field_unique_values_async_with_meta_prefix (self , document_store ):
500339 docs = [
@@ -555,29 +394,6 @@ async def test_get_metadata_field_unique_values_async_empty_result(self, documen
555394 assert total_count == 0
556395 assert values == []
557396
558- @pytest .mark .asyncio
559- async def test_delete_all_documents_recreate_async (self , document_store : WeaviateDocumentStore ) -> None :
560- docs = [Document (content = "test doc 1" ), Document (content = "test doc 2" )]
561- assert await document_store .write_documents_async (docs ) == 2
562- assert await document_store .count_documents_async () == 2
563-
564- cls = document_store ._collection_settings ["class" ]
565- collection = (await document_store .async_client ).collections .get (cls )
566- previous_config = (await collection .config .get ()).to_dict ()
567-
568- await document_store .delete_all_documents_async (recreate_index = True )
569- assert await document_store .count_documents_async () == 0
570-
571- new_config = (await (await document_store .async_client ).collections .get (cls ).config .get ()).to_dict ()
572- assert previous_config == new_config
573-
574- @pytest .mark .asyncio
575- async def test_delete_all_documents_batch_size_async (self , document_store : WeaviateDocumentStore ) -> None :
576- docs = [Document (content = str (i )) for i in range (0 , 5 )]
577- assert await document_store .write_documents_async (docs ) == 5
578- await document_store .delete_all_documents_async (batch_size = 2 )
579- assert await document_store .count_documents_async () == 0
580-
581397 @pytest .mark .asyncio
582398 async def test_delete_all_documents_excessive_batch_size_async (
583399 self , document_store : WeaviateDocumentStore , caplog : pytest .LogCaptureFixture
0 commit comments