Skip to content

Commit 559550b

Browse files
refactor(weaviate): use async DocumentStore mixin tests
Inherits all standard async mixin test classes from `haystack.testing.document_store_async`, removing 13 tests that are now covered by the mixins: - test_write_documents_async - test_count_documents_async - test_filter_documents_below_default_limit_async - test_delete_by_filter_async - test_update_by_filter_async - test_count_documents_by_filter_async - test_get_metadata_fields_info_async - test_get_metadata_field_min_max_async - test_get_metadata_field_min_max_async_with_meta_prefix - test_count_unique_metadata_by_filter_async - test_get_metadata_field_unique_values_async - test_delete_all_documents_recreate_async - test_delete_all_documents_batch_size_async Weaviate-specific tests are preserved: blob data handling, the over-default-limit error, all retrieval tests (BM25, embedding, hybrid), pagination in update_by_filter, Weaviate-specific error cases, and the excessive batch size warning test. Adds assert_documents_are_equal override to strip scores before comparison, since filter_documents_async populates the score field. Closes #3054
1 parent fe71e97 commit 559550b

1 file changed

Lines changed: 34 additions & 218 deletions

File tree

integrations/weaviate/tests/test_document_store_async.py

Lines changed: 34 additions & 218 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,50 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import dataclasses
56
import logging
67
from collections.abc import AsyncGenerator
7-
from dataclasses import replace
88
from pathlib import Path
99

1010
import pytest
1111
import pytest_asyncio
1212
from haystack.dataclasses.byte_stream import ByteStream
1313
from haystack.dataclasses.document import Document
1414
from haystack.document_stores.errors import DocumentStoreError
15+
from haystack.testing.document_store_async import (
16+
CountDocumentsAsyncTest,
17+
CountDocumentsByFilterAsyncTest,
18+
CountUniqueMetadataByFilterAsyncTest,
19+
DeleteAllAsyncTest,
20+
DeleteByFilterAsyncTest,
21+
DeleteDocumentsAsyncTest,
22+
FilterDocumentsAsyncTest,
23+
GetMetadataFieldMinMaxAsyncTest,
24+
GetMetadataFieldsInfoAsyncTest,
25+
GetMetadataFieldUniqueValuesAsyncTest,
26+
UpdateByFilterAsyncTest,
27+
WriteDocumentsAsyncTest,
28+
)
1529

1630
from haystack_integrations.document_stores.weaviate import WeaviateDocumentStore
1731
from haystack_integrations.document_stores.weaviate.document_store import DOCUMENT_COLLECTION_PROPERTIES
1832

1933

2034
@pytest.mark.integration
21-
class TestWeaviateDocumentStoreAsync:
35+
class TestWeaviateDocumentStoreAsync(
36+
CountDocumentsAsyncTest,
37+
WriteDocumentsAsyncTest,
38+
DeleteDocumentsAsyncTest,
39+
DeleteAllAsyncTest,
40+
DeleteByFilterAsyncTest,
41+
FilterDocumentsAsyncTest,
42+
UpdateByFilterAsyncTest,
43+
CountDocumentsByFilterAsyncTest,
44+
CountUniqueMetadataByFilterAsyncTest,
45+
GetMetadataFieldsInfoAsyncTest,
46+
GetMetadataFieldMinMaxAsyncTest,
47+
GetMetadataFieldUniqueValuesAsyncTest,
48+
):
2249
@pytest_asyncio.fixture
2350
async def document_store(self, request) -> AsyncGenerator[WeaviateDocumentStore, None, None]:
2451
collection_settings = {
@@ -39,6 +66,11 @@ async def document_store(self, request) -> AsyncGenerator[WeaviateDocumentStore,
3966
await (await store.async_client).collections.delete(collection_settings["class"])
4067
await store.close_async()
4168

69+
def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
70+
# filter_documents_async() returns Documents with score populated; strip it before comparing
71+
received = [dataclasses.replace(doc, score=None) for doc in received]
72+
super().assert_documents_are_equal(received, expected)
73+
4274
@pytest.mark.asyncio
4375
async def test_close_async(self, document_store: WeaviateDocumentStore) -> None:
4476
# Initialise client and collection
@@ -69,18 +101,6 @@ async def test_filter_documents_with_blob_data_async(
69101
assert len(docs) == 1
70102
assert docs[0].blob == image
71103

72-
@pytest.mark.asyncio
73-
async def test_filter_documents_below_default_limit_async(self, document_store: WeaviateDocumentStore) -> None:
74-
docs = []
75-
for index in range(9998):
76-
docs.append(Document(content="This is some content", meta={"index": index}))
77-
await document_store.write_documents_async(docs)
78-
result = await document_store.filter_documents_async(
79-
{"field": "content", "operator": "==", "value": "This is some content"}
80-
)
81-
82-
assert len(result) == 9998
83-
84104
@pytest.mark.asyncio
85105
async def test_filter_documents_over_default_limit(self, document_store: WeaviateDocumentStore) -> None:
86106
docs = []
@@ -92,19 +112,6 @@ async def test_filter_documents_over_default_limit(self, document_store: Weaviat
92112
{"field": "content", "operator": "==", "value": "This is some content"}
93113
)
94114

95-
@pytest.mark.asyncio
96-
async def test_write_documents_async(self, document_store: WeaviateDocumentStore) -> None:
97-
"""
98-
Test write_documents() with default policy overwrites existing documents.
99-
"""
100-
doc = Document(content="test doc")
101-
assert await document_store.write_documents_async([doc]) == 1
102-
assert await document_store.count_documents_async() == 1
103-
104-
doc = replace(doc, content="test doc 2")
105-
assert await document_store.write_documents_async([doc]) == 1
106-
assert await document_store.count_documents_async() == 1
107-
108115
@pytest.mark.asyncio
109116
async def test_write_documents_with_blob_data_async(
110117
self, document_store: WeaviateDocumentStore, test_files_path: Path
@@ -250,52 +257,6 @@ async def test_hybrid_retrieval_async_with_alpha(self, document_store):
250257
assert len(result_vector) > 0
251258
assert result_vector[0].score > 0.0
252259

253-
@pytest.mark.asyncio
254-
async def test_delete_by_filter_async(self, document_store):
255-
docs = [
256-
Document(content="Doc 1", meta={"category": "TypeA"}),
257-
Document(content="Doc 2", meta={"category": "TypeB"}),
258-
Document(content="Doc 3", meta={"category": "TypeA"}),
259-
]
260-
await document_store.write_documents_async(docs)
261-
262-
# delete documents with category="TypeA"
263-
deleted_count = await document_store.delete_by_filter_async(
264-
filters={"field": "meta.category", "operator": "==", "value": "TypeA"}
265-
)
266-
assert deleted_count == 2
267-
assert document_store.count_documents() == 1
268-
269-
# verify only category TypeB remains
270-
remaining_docs = await document_store.filter_documents_async()
271-
assert len(remaining_docs) == 1
272-
assert remaining_docs[0].meta["category"] == "TypeB"
273-
274-
@pytest.mark.asyncio
275-
async def test_update_by_filter_async(self, document_store):
276-
docs = [
277-
Document(content="Doc 1", meta={"category": "TypeA", "status": "draft"}),
278-
Document(content="Doc 2", meta={"category": "TypeB", "status": "draft"}),
279-
Document(content="Doc 3", meta={"category": "TypeA", "status": "draft"}),
280-
]
281-
await document_store.write_documents_async(docs)
282-
assert document_store.count_documents() == 3
283-
284-
# update status for category="TypeA" documents
285-
updated_count = await document_store.update_by_filter_async(
286-
filters={"field": "meta.category", "operator": "==", "value": "TypeA"}, meta={"status": "published"}
287-
)
288-
assert updated_count == 2
289-
290-
# Verify the updates
291-
published_docs = await document_store.filter_documents_async(
292-
filters={"field": "meta.status", "operator": "==", "value": "published"}
293-
)
294-
assert len(published_docs) == 2
295-
for doc in published_docs:
296-
assert doc.meta["category"] == "TypeA"
297-
assert doc.meta["status"] == "published"
298-
299260
@pytest.mark.asyncio
300261
async def test_update_by_filter_async_with_pagination(self, document_store, monkeypatch):
301262
# Reduce DEFAULT_QUERY_LIMIT to test pagination without creating 10000+ documents
@@ -328,84 +289,6 @@ async def test_update_by_filter_async_with_pagination(self, document_store, monk
328289
assert "index" in doc.meta
329290
assert 0 <= doc.meta["index"] < 250
330291

331-
@pytest.mark.asyncio
332-
async def test_count_documents_async(self, document_store: WeaviateDocumentStore) -> None:
333-
docs = [
334-
Document(content="Doc 1", meta={"category": "TypeA"}),
335-
Document(content="Doc 2", meta={"category": "TypeB"}),
336-
Document(content="Doc 3", meta={"category": "TypeA"}),
337-
Document(content="Doc 4", meta={"category": "TypeA"}),
338-
]
339-
await document_store.write_documents_async(docs)
340-
assert await document_store.count_documents_async() == 4
341-
342-
@pytest.mark.asyncio
343-
async def test_count_documents_by_filter_async(self, document_store):
344-
docs = [
345-
Document(content="Doc 1", meta={"category": "TypeA"}),
346-
Document(content="Doc 2", meta={"category": "TypeB"}),
347-
Document(content="Doc 3", meta={"category": "TypeA"}),
348-
Document(content="Doc 4", meta={"category": "TypeA"}),
349-
]
350-
await document_store.write_documents_async(docs)
351-
assert await document_store.count_documents_async() == 4
352-
353-
count = await document_store.count_documents_by_filter_async(
354-
filters={"field": "meta.category", "operator": "==", "value": "TypeA"}
355-
)
356-
assert count == 3
357-
358-
count = await document_store.count_documents_by_filter_async(
359-
filters={"field": "meta.category", "operator": "==", "value": "TypeB"}
360-
)
361-
assert count == 1
362-
363-
count = await document_store.count_documents_by_filter_async(
364-
filters={"field": "meta.category", "operator": "==", "value": "TypeC"}
365-
)
366-
assert count == 0
367-
368-
@pytest.mark.asyncio
369-
async def test_get_metadata_fields_info_async(self, document_store):
370-
fields_info = await document_store.get_metadata_fields_info_async()
371-
372-
assert "_original_id" not in fields_info
373-
assert "content" not in fields_info
374-
assert "blob_data" not in fields_info
375-
assert "blob_mime_type" not in fields_info
376-
assert "score" not in fields_info
377-
378-
assert "category" in fields_info
379-
assert fields_info["category"]["type"] == "text"
380-
assert "status" in fields_info
381-
assert fields_info["status"]["type"] == "text"
382-
383-
@pytest.mark.asyncio
384-
async def test_get_metadata_field_min_max_async(self, document_store):
385-
docs = [
386-
Document(content="Doc 1", meta={"number": 10}),
387-
Document(content="Doc 2", meta={"number": 5}),
388-
Document(content="Doc 3", meta={"number": 20}),
389-
Document(content="Doc 4", meta={"number": 15}),
390-
]
391-
await document_store.write_documents_async(docs)
392-
393-
result = await document_store.get_metadata_field_min_max_async("number")
394-
assert result["min"] == 5
395-
assert result["max"] == 20
396-
397-
@pytest.mark.asyncio
398-
async def test_get_metadata_field_min_max_async_with_meta_prefix(self, document_store):
399-
docs = [
400-
Document(content="Doc 1", meta={"number": 100}),
401-
Document(content="Doc 2", meta={"number": 200}),
402-
]
403-
await document_store.write_documents_async(docs)
404-
405-
result = await document_store.get_metadata_field_min_max_async("meta.number")
406-
assert result["min"] == 100
407-
assert result["max"] == 200
408-
409292
@pytest.mark.asyncio
410293
async def test_get_metadata_field_min_max_async_unsupported_type(self, document_store):
411294
with pytest.raises(ValueError, match="doesn't support min/max aggregation"):
@@ -416,35 +299,6 @@ async def test_get_metadata_field_min_max_async_field_not_found(self, document_s
416299
with pytest.raises(ValueError, match="not found in collection schema"):
417300
await document_store.get_metadata_field_min_max_async("nonexistent_field")
418301

419-
@pytest.mark.asyncio
420-
async def test_count_unique_metadata_by_filter_async(self, document_store):
421-
docs = [
422-
Document(content="Doc 1", meta={"category": "TypeA", "status": "draft"}),
423-
Document(content="Doc 2", meta={"category": "TypeB", "status": "published"}),
424-
Document(content="Doc 3", meta={"category": "TypeA", "status": "draft"}),
425-
Document(content="Doc 4", meta={"category": "TypeC", "status": "published"}),
426-
Document(content="Doc 5", meta={"category": "TypeA", "status": "archived"}),
427-
]
428-
await document_store.write_documents_async(docs)
429-
430-
result = await document_store.count_unique_metadata_by_filter_async(
431-
filters={"field": "meta.category", "operator": "==", "value": "TypeA"}, metadata_fields=["status"]
432-
)
433-
assert result["status"] == 2
434-
435-
result = await document_store.count_unique_metadata_by_filter_async(
436-
filters={
437-
"operator": "OR",
438-
"conditions": [
439-
{"field": "meta.category", "operator": "==", "value": "TypeA"},
440-
{"field": "meta.category", "operator": "==", "value": "TypeB"},
441-
],
442-
},
443-
metadata_fields=["category", "status"],
444-
)
445-
assert result["category"] == 2
446-
assert result["status"] == 3
447-
448302
@pytest.mark.asyncio
449303
async def test_count_unique_metadata_by_filter_async_with_meta_prefix(self, document_store):
450304
docs = [
@@ -480,21 +334,6 @@ async def test_count_unique_metadata_by_filter_async_field_not_found(self, docum
480334
metadata_fields=["nonexistent_field"],
481335
)
482336

483-
@pytest.mark.asyncio
484-
async def test_get_metadata_field_unique_values_async(self, document_store):
485-
docs = [
486-
Document(content="Doc 1", meta={"category": "TypeA"}),
487-
Document(content="Doc 2", meta={"category": "TypeB"}),
488-
Document(content="Doc 3", meta={"category": "TypeA"}),
489-
Document(content="Doc 4", meta={"category": "TypeC"}),
490-
Document(content="Doc 5", meta={"category": "TypeB"}),
491-
]
492-
await document_store.write_documents_async(docs)
493-
494-
values, total_count = await document_store.get_metadata_field_unique_values_async("category")
495-
assert total_count == 3
496-
assert set(values) == {"TypeA", "TypeB", "TypeC"}
497-
498337
@pytest.mark.asyncio
499338
async def test_get_metadata_field_unique_values_async_with_meta_prefix(self, document_store):
500339
docs = [
@@ -555,29 +394,6 @@ async def test_get_metadata_field_unique_values_async_empty_result(self, documen
555394
assert total_count == 0
556395
assert values == []
557396

558-
@pytest.mark.asyncio
559-
async def test_delete_all_documents_recreate_async(self, document_store: WeaviateDocumentStore) -> None:
560-
docs = [Document(content="test doc 1"), Document(content="test doc 2")]
561-
assert await document_store.write_documents_async(docs) == 2
562-
assert await document_store.count_documents_async() == 2
563-
564-
cls = document_store._collection_settings["class"]
565-
collection = (await document_store.async_client).collections.get(cls)
566-
previous_config = (await collection.config.get()).to_dict()
567-
568-
await document_store.delete_all_documents_async(recreate_index=True)
569-
assert await document_store.count_documents_async() == 0
570-
571-
new_config = (await (await document_store.async_client).collections.get(cls).config.get()).to_dict()
572-
assert previous_config == new_config
573-
574-
@pytest.mark.asyncio
575-
async def test_delete_all_documents_batch_size_async(self, document_store: WeaviateDocumentStore) -> None:
576-
docs = [Document(content=str(i)) for i in range(0, 5)]
577-
assert await document_store.write_documents_async(docs) == 5
578-
await document_store.delete_all_documents_async(batch_size=2)
579-
assert await document_store.count_documents_async() == 0
580-
581397
@pytest.mark.asyncio
582398
async def test_delete_all_documents_excessive_batch_size_async(
583399
self, document_store: WeaviateDocumentStore, caplog: pytest.LogCaptureFixture

0 commit comments

Comments
 (0)