diff --git a/integrations/pgvector/pyproject.toml b/integrations/pgvector/pyproject.toml index a011115755..299672cee8 100644 --- a/integrations/pgvector/pyproject.toml +++ b/integrations/pgvector/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "haystack-ai>=2.26.1", + "haystack-ai>=2.27.0", "pgvector>=0.3.0", "psycopg[binary]" ] diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index edbb3fae87..0a32bbbf3c 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -1669,7 +1669,7 @@ def _analyze_metadata_fields_from_records(records: list[dict[str, Any]]) -> dict :param records: List of database records containing 'meta' field. :returns: A dictionary mapping field names to their type information. """ - fields_info: dict[str, dict[str, str]] = {"content": {"type": "text"}} + fields_info: dict[str, dict[str, str]] = {} # Analyze metadata from all documents for record in records: @@ -1846,15 +1846,14 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]: :returns: A dictionary with 'min' and 'max' keys containing the minimum and maximum values. For numeric fields (integer, real), returns numeric min/max. For text fields, returns lexicographic min/max based on database collation. - :raises ValueError: If the field doesn't exist or has no values. + Returns ``{"min": None, "max": None}`` when the field has no values or the store is empty. """ normalized_field = PgvectorDocumentStore._normalize_metadata_field_name(metadata_field) # Get field type information from metadata fields info fields_info = self.get_metadata_fields_info() if normalized_field not in fields_info: - msg = f"Metadata field '{metadata_field}' not found in document store" - raise ValueError(msg) + return {"min": None, "max": None} field_type = fields_info[normalized_field]["type"] sql_query = self._build_min_max_query(normalized_field, field_type) @@ -1879,15 +1878,14 @@ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[st :returns: A dictionary with 'min' and 'max' keys containing the minimum and maximum values. For numeric fields (integer, real), returns numeric min/max. For text fields, returns lexicographic min/max based on database collation. - :raises ValueError: If the field doesn't exist or has no values. + Returns ``{"min": None, "max": None}`` when the field has no values or the store is empty. """ normalized_field = PgvectorDocumentStore._normalize_metadata_field_name(metadata_field) # Get field type information from metadata fields info fields_info = await self.get_metadata_fields_info_async() if normalized_field not in fields_info: - msg = f"Metadata field '{metadata_field}' not found in document store" - raise ValueError(msg) + return {"min": None, "max": None} field_type = fields_info[normalized_field]["type"] sql_query = self._build_min_max_query(normalized_field, field_type) diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index 2b7e9613eb..d6cdfdb14f 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -44,18 +44,19 @@ class TestDocumentStore( GetMetadataFieldUniqueValuesTest, ): def test_get_metadata_fields_info_empty_collection(self, document_store: PgvectorDocumentStore): - """PgvectorDocumentStore always includes 'content' in fields info, even for empty stores.""" + """Returns empty dict when the store has no documents.""" assert document_store.count_documents() == 0 fields_info = document_store.get_metadata_fields_info() - assert fields_info == {"content": {"type": "text"}} + assert fields_info == {} def test_get_metadata_field_min_max_empty_collection(self, document_store: PgvectorDocumentStore): - """PgvectorDocumentStore raises ValueError when the field doesn't exist in the store.""" + """Returns None min/max when the field doesn't exist in the store.""" assert document_store.count_documents() == 0 - with pytest.raises(ValueError, match="not found in document store"): - document_store.get_metadata_field_min_max("priority") + result = document_store.get_metadata_field_min_max("priority") + assert result["min"] is None + assert result["max"] is None def test_write_documents(self, document_store: PgvectorDocumentStore): docs = [Document(id="1")] diff --git a/integrations/pgvector/tests/test_document_store_async.py b/integrations/pgvector/tests/test_document_store_async.py index 64b4a49bbb..bd8896a798 100644 --- a/integrations/pgvector/tests/test_document_store_async.py +++ b/integrations/pgvector/tests/test_document_store_async.py @@ -9,6 +9,22 @@ from haystack.dataclasses.document import ByteStream, Document from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy +from haystack.testing.document_store import ( + CountDocumentsByFilterAsyncTest, + CountUniqueMetadataByFilterAsyncTest, + UpdateByFilterAsyncTest, +) +from haystack.testing.document_store_async import ( + CountDocumentsAsyncTest, + DeleteAllAsyncTest, + DeleteByFilterAsyncTest, + DeleteDocumentsAsyncTest, + FilterDocumentsAsyncTest, + GetMetadataFieldMinMaxAsyncTest, + GetMetadataFieldsInfoAsyncTest, + GetMetadataFieldUniqueValuesAsyncTest, + WriteDocumentsAsyncTest, +) from haystack.utils import Secret from psycopg.sql import SQL @@ -17,13 +33,51 @@ @pytest.mark.integration @pytest.mark.asyncio -class TestDocumentStoreAsync: - async def test_write_documents(self, document_store: PgvectorDocumentStore): +class TestDocumentStoreAsync( + CountDocumentsAsyncTest, + WriteDocumentsAsyncTest, + DeleteDocumentsAsyncTest, + DeleteAllAsyncTest, + DeleteByFilterAsyncTest, + FilterDocumentsAsyncTest, + UpdateByFilterAsyncTest, + CountDocumentsByFilterAsyncTest, + CountUniqueMetadataByFilterAsyncTest, + GetMetadataFieldsInfoAsyncTest, + GetMetadataFieldMinMaxAsyncTest, + GetMetadataFieldUniqueValuesAsyncTest, +): + @staticmethod + def assert_documents_are_equal(received: list[Document], expected: list[Document]): + """ + Embeddings lose float32 precision when round-tripped through pgvector, so we + compare them approximately and then do an exact equality check on the rest. + """ + assert len(received) == len(expected) + received.sort(key=lambda x: x.id) + expected.sort(key=lambda x: x.id) + for received_doc, expected_doc in zip(received, expected, strict=True): + if received_doc.embedding is None: + assert expected_doc.embedding is None + else: + assert received_doc.embedding == pytest.approx(expected_doc.embedding) + received_doc.embedding, expected_doc.embedding = None, None + assert received_doc == expected_doc + + async def test_write_documents_async(self, document_store: PgvectorDocumentStore): + """pgvector default policy raises DuplicateDocumentError on duplicate writes.""" docs = [Document(id="1")] assert await document_store.write_documents_async(docs) == 1 with pytest.raises(DuplicateDocumentError): await document_store.write_documents_async(docs, DuplicatePolicy.FAIL) + async def test_count_not_empty_async(self, document_store: PgvectorDocumentStore): + """Override: mixin method is missing 'self', causing fixture injection to fail.""" + await document_store.write_documents_async( + [Document(content="test doc 1"), Document(content="test doc 2"), Document(content="test doc 3")] + ) + assert await document_store.count_documents_async() == 3 + async def test_write_blob(self, document_store: PgvectorDocumentStore): bytestream = ByteStream(b"test", meta={"meta_key": "meta_value"}, mime_type="mime_type") docs = [Document(id="1", blob=bytestream)] @@ -32,46 +86,6 @@ async def test_write_blob(self, document_store: PgvectorDocumentStore): retrieved_docs = await document_store.filter_documents_async() assert retrieved_docs == docs - async def test_count_documents(self, document_store: PgvectorDocumentStore): - await document_store.write_documents_async( - [ - Document(content="test doc 1"), - Document(content="test doc 2"), - Document(content="test doc 3"), - ] - ) - assert await document_store.count_documents_async() == 3 - - async def test_filter_documents(self, document_store: PgvectorDocumentStore): - filterable_docs = [ - Document( - content="1", - meta={ - "number": -10, - }, - ), - Document( - content="2", - meta={ - "number": 100, - }, - ), - ] - await document_store.write_documents_async(filterable_docs) - result = await document_store.filter_documents_async( - filters={"field": "meta.number", "operator": "==", "value": 100} - ) - - assert result == [d for d in filterable_docs if d.meta.get("number") == 100] - - async def test_delete_documents(self, document_store: PgvectorDocumentStore): - doc = Document(content="test doc") - await document_store.write_documents_async([doc]) - assert await document_store.count_documents_async() == 1 - - await document_store.delete_documents_async([doc.id]) - assert await document_store.count_documents_async() == 0 - async def test_connection_check_and_recreation(self, document_store: PgvectorDocumentStore): await document_store._ensure_db_setup_async() original_connection = document_store._async_connection @@ -94,13 +108,6 @@ async def test_connection_check_and_recreation(self, document_store: PgvectorDoc same_connection = document_store._async_connection assert same_connection is document_store._async_connection - async def test_delete_all_documents_async(self, document_store: PgvectorDocumentStore) -> None: - document_store.write_documents([Document(id=str(i)) for i in range(10)]) - await document_store.delete_all_documents_async() - assert document_store.count_documents() == 0 - document_store.write_documents([Document(id="1")]) - assert document_store.count_documents() == 1 - async def test_invalid_connection_string(self, monkeypatch): monkeypatch.setenv("PG_CONN_STR", "invalid_connection_string") document_store = PgvectorDocumentStore() @@ -108,215 +115,6 @@ async def test_invalid_connection_string(self, monkeypatch): await document_store._ensure_db_setup_async() assert "Failed to connect to PostgreSQL database" in str(e) - async def test_delete_by_filter_async(self, document_store: PgvectorDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A", "year": 2023}), - Document(content="Doc 2", meta={"category": "B", "year": 2023}), - Document(content="Doc 3", meta={"category": "A", "year": 2024}), - ] - await document_store.write_documents_async(docs) - assert await document_store.count_documents_async() == 3 - - deleted_count = await document_store.delete_by_filter_async( - filters={"field": "meta.category", "operator": "==", "value": "A"} - ) - assert deleted_count == 2 - assert await document_store.count_documents_async() == 1 - - remaining_docs = await document_store.filter_documents_async() - assert len(remaining_docs) == 1 - assert remaining_docs[0].meta["category"] == "B" - - deleted_count = await document_store.delete_by_filter_async( - filters={"field": "meta.year", "operator": "==", "value": 2023} - ) - assert deleted_count == 1 - assert await document_store.count_documents_async() == 0 - - async def test_delete_by_filter_async_no_matches(self, document_store: PgvectorDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A"}), - Document(content="Doc 2", meta={"category": "B"}), - ] - await document_store.write_documents_async(docs) - assert await document_store.count_documents_async() == 2 - - deleted_count = await document_store.delete_by_filter_async( - filters={"field": "meta.category", "operator": "==", "value": "C"} - ) - assert deleted_count == 0 - assert await document_store.count_documents_async() == 2 - - async def test_delete_by_filter_async_advanced_filters(self, document_store: PgvectorDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A", "year": 2023, "status": "draft"}), - Document(content="Doc 2", meta={"category": "A", "year": 2024, "status": "published"}), - Document(content="Doc 3", meta={"category": "B", "year": 2023, "status": "draft"}), - ] - await document_store.write_documents_async(docs) - assert await document_store.count_documents_async() == 3 - - # AND condition - deleted_count = await document_store.delete_by_filter_async( - filters={ - "operator": "AND", - "conditions": [ - {"field": "meta.category", "operator": "==", "value": "A"}, - {"field": "meta.year", "operator": "==", "value": 2023}, - ], - } - ) - assert deleted_count == 1 - assert await document_store.count_documents_async() == 2 - - # OR condition - deleted_count = await document_store.delete_by_filter_async( - filters={ - "operator": "OR", - "conditions": [ - {"field": "meta.category", "operator": "==", "value": "B"}, - {"field": "meta.status", "operator": "==", "value": "published"}, - ], - } - ) - assert deleted_count == 2 - assert await document_store.count_documents_async() == 0 - - async def test_update_by_filter_async(self, document_store: PgvectorDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A", "status": "draft"}), - Document(content="Doc 2", meta={"category": "B", "status": "draft"}), - Document(content="Doc 3", meta={"category": "A", "status": "draft"}), - ] - await document_store.write_documents_async(docs) - assert await document_store.count_documents_async() == 3 - - # Update status for category="A" documents - updated_count = await document_store.update_by_filter_async( - filters={"field": "meta.category", "operator": "==", "value": "A"}, meta={"status": "published"} - ) - assert updated_count == 2 - - # Verify the updates - published_docs = await document_store.filter_documents_async( - filters={"field": "meta.status", "operator": "==", "value": "published"} - ) - assert len(published_docs) == 2 - for doc in published_docs: - assert doc.meta["category"] == "A" - assert doc.meta["status"] == "published" - - # Verify category B still has draft status - draft_docs = await document_store.filter_documents_async( - filters={"field": "meta.status", "operator": "==", "value": "draft"} - ) - assert len(draft_docs) == 1 - assert draft_docs[0].meta["category"] == "B" - - async def test_update_by_filter_async_multiple_fields(self, document_store: PgvectorDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A", "year": 2023}), - Document(content="Doc 2", meta={"category": "A", "year": 2023}), - Document(content="Doc 3", meta={"category": "B", "year": 2024}), - ] - await document_store.write_documents_async(docs) - assert await document_store.count_documents_async() == 3 - - # update multiple fields for category="A" documents - updated_count = await document_store.update_by_filter_async( - filters={"field": "meta.category", "operator": "==", "value": "A"}, - meta={"status": "published", "priority": "high", "reviewed": True}, - ) - assert updated_count == 2 - - # verify - published_docs = await document_store.filter_documents_async( - filters={"field": "meta.category", "operator": "==", "value": "A"} - ) - assert len(published_docs) == 2 - for doc in published_docs: - assert doc.meta["status"] == "published" - assert doc.meta["priority"] == "high" - assert doc.meta["reviewed"] is True - assert doc.meta["year"] == 2023 # Original field should still be present - - # verify category B was not updated - b_docs = await document_store.filter_documents_async( - filters={"field": "meta.category", "operator": "==", "value": "B"} - ) - assert len(b_docs) == 1 - assert "status" not in b_docs[0].meta - assert "priority" not in b_docs[0].meta - - async def test_update_by_filter_async_no_matches(self, document_store: PgvectorDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A"}), - Document(content="Doc 2", meta={"category": "B"}), - ] - await document_store.write_documents_async(docs) - assert await document_store.count_documents_async() == 2 - - # update documents with category="C" (no matches) - updated_count = await document_store.update_by_filter_async( - filters={"field": "meta.category", "operator": "==", "value": "C"}, meta={"status": "published"} - ) - assert updated_count == 0 - assert await document_store.count_documents_async() == 2 - - # verify no documents were updated - published_docs = await document_store.filter_documents_async( - filters={"field": "meta.status", "operator": "==", "value": "published"} - ) - assert len(published_docs) == 0 - - async def test_update_by_filter_async_advanced_filters(self, document_store: PgvectorDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A", "year": 2023, "status": "draft"}), - Document(content="Doc 2", meta={"category": "A", "year": 2024, "status": "draft"}), - Document(content="Doc 3", meta={"category": "B", "year": 2023, "status": "draft"}), - ] - await document_store.write_documents_async(docs) - assert await document_store.count_documents_async() == 3 - - # AND condition - updated_count = await document_store.update_by_filter_async( - filters={ - "operator": "AND", - "conditions": [ - {"field": "meta.category", "operator": "==", "value": "A"}, - {"field": "meta.year", "operator": "==", "value": 2023}, - ], - }, - meta={"status": "published"}, - ) - assert updated_count == 1 - - # verify - published_docs = await document_store.filter_documents_async( - filters={"field": "meta.status", "operator": "==", "value": "published"} - ) - assert len(published_docs) == 1 - assert published_docs[0].meta["category"] == "A" - assert published_docs[0].meta["year"] == 2023 - - # OR condition - updated_count = await document_store.update_by_filter_async( - filters={ - "operator": "OR", - "conditions": [ - {"field": "meta.category", "operator": "==", "value": "B"}, - {"field": "meta.year", "operator": "==", "value": 2024}, - ], - }, - meta={"featured": True}, - ) - assert updated_count == 2 - - featured_docs = await document_store.filter_documents_async( - filters={"field": "meta.featured", "operator": "==", "value": True} - ) - assert len(featured_docs) == 2 - async def test_update_by_filter_async_empty_meta_raises_error(self, document_store: PgvectorDocumentStore): docs = [Document(content="Doc 1", meta={"category": "A"})] await document_store.write_documents_async(docs) @@ -448,352 +246,3 @@ async def test_delete_table_async_first_call(document_store): without triggering errors due to an uninitialized state. """ await document_store.delete_table_async() # if throw error, test fails - - -@pytest.mark.integration -@pytest.mark.asyncio -async def test_count_documents_by_filter_async(document_store: PgvectorDocumentStore): - filterable_docs = [ - Document(content="Doc 1", meta={"category": "A", "status": "active"}), - Document(content="Doc 2", meta={"category": "B", "status": "active"}), - Document(content="Doc 3", meta={"category": "A", "status": "inactive"}), - Document(content="Doc 4", meta={"category": "A", "status": "active"}), - ] - await document_store.write_documents_async(filterable_docs) - assert await document_store.count_documents_async() == 4 - - count_a = await document_store.count_documents_by_filter_async( - filters={"field": "meta.category", "operator": "==", "value": "A"} - ) - assert count_a == 3 - - count_active = await document_store.count_documents_by_filter_async( - filters={"field": "meta.status", "operator": "==", "value": "active"} - ) - assert count_active == 3 - - count_a_active = await document_store.count_documents_by_filter_async( - filters={ - "operator": "AND", - "conditions": [ - {"field": "meta.category", "operator": "==", "value": "A"}, - {"field": "meta.status", "operator": "==", "value": "active"}, - ], - } - ) - assert count_a_active == 2 - - -@pytest.mark.integration -@pytest.mark.asyncio -async def test_count_unique_metadata_by_filter_async(document_store: PgvectorDocumentStore): - filterable_docs = [ - Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), - Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}), - Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}), - Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}), - Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}), - ] - await document_store.write_documents_async(filterable_docs) - assert await document_store.count_documents_async() == 5 - - # count distinct values for all documents - distinct_counts = await document_store.count_unique_metadata_by_filter_async( - filters={}, metadata_fields=["category", "status", "priority"] - ) - assert distinct_counts["category"] == 3 # A, B, C - assert distinct_counts["status"] == 2 # active, inactive - assert distinct_counts["priority"] == 3 # 1, 2, 3 - - # count distinct values for documents with category="A" - distinct_counts_a = await document_store.count_unique_metadata_by_filter_async( - filters={"field": "meta.category", "operator": "==", "value": "A"}, - metadata_fields=["category", "status", "priority"], - ) - assert distinct_counts_a["category"] == 1 # Only A - assert distinct_counts_a["status"] == 2 # active, inactive - assert distinct_counts_a["priority"] == 2 # 1, 3 - - # count distinct values for documents with status="active" - distinct_counts_active = await document_store.count_unique_metadata_by_filter_async( - filters={"field": "meta.status", "operator": "==", "value": "active"}, - metadata_fields=["category", "status", "priority"], - ) - assert distinct_counts_active["category"] == 3 # A, B, C - assert distinct_counts_active["status"] == 1 # Only active - assert distinct_counts_active["priority"] == 3 # 1, 2, 3 - - # count distinct values with complex filter (category="A" AND status="active") - distinct_counts_a_active = await document_store.count_unique_metadata_by_filter_async( - filters={ - "operator": "AND", - "conditions": [ - {"field": "meta.category", "operator": "==", "value": "A"}, - {"field": "meta.status", "operator": "==", "value": "active"}, - ], - }, - metadata_fields=["category", "status", "priority"], - ) - assert distinct_counts_a_active["category"] == 1 # Only A - assert distinct_counts_a_active["status"] == 1 # Only active - assert distinct_counts_a_active["priority"] == 2 # 1, 3 - - # Test with only a subset of fields - distinct_counts_subset = await document_store.count_unique_metadata_by_filter_async( - filters={}, metadata_fields=["category", "status"] - ) - assert distinct_counts_subset["category"] == 3 - assert distinct_counts_subset["status"] == 2 - assert "priority" not in distinct_counts_subset - - # Test field name normalization (with "meta." prefix) - distinct_counts_normalized = await document_store.count_unique_metadata_by_filter_async( - filters={}, metadata_fields=["meta.category", "status", "meta.priority"] - ) - assert distinct_counts_normalized["category"] == 3 - assert distinct_counts_normalized["status"] == 2 - assert distinct_counts_normalized["priority"] == 3 - - -@pytest.mark.integration -@pytest.mark.asyncio -async def test_get_metadata_fields_info_async(document_store: PgvectorDocumentStore): - filterable_docs = [ - Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), - Document(content="Doc 2", meta={"category": "B", "status": "inactive"}), - ] - await document_store.write_documents_async(filterable_docs) - - fields_info = await document_store.get_metadata_fields_info_async() - - # Verify that fields_info contains expected fields - assert "content" in fields_info - assert "category" in fields_info - assert "status" in fields_info - assert "priority" in fields_info - - assert fields_info["content"]["type"] == "text" - assert fields_info["category"]["type"] == "text" - assert fields_info["status"]["type"] == "text" - assert fields_info["priority"]["type"] == "integer" - - -@pytest.mark.integration -@pytest.mark.asyncio -async def test_get_metadata_field_min_max_async(document_store: PgvectorDocumentStore): - # Test with integer values - docs = [ - Document(content="Doc 1", meta={"priority": 1, "age": 10}), - Document(content="Doc 2", meta={"priority": 5, "age": 20}), - Document(content="Doc 3", meta={"priority": 3, "age": 15}), - Document(content="Doc 4", meta={"priority": 10, "age": 5}), - Document(content="Doc 6", meta={"rating": 10.5}), - Document(content="Doc 7", meta={"rating": 20.3}), - Document(content="Doc 8", meta={"rating": 15.7}), - Document(content="Doc 9", meta={"rating": 5.2}), - ] - await document_store.write_documents_async(docs) - - # Test with "meta." prefix for integer field - min_max_priority = await document_store.get_metadata_field_min_max_async("meta.priority") - assert min_max_priority["min"] == 1 - assert min_max_priority["max"] == 10 - - # Test with "meta." prefix for another integer field - min_max_age = await document_store.get_metadata_field_min_max_async("meta.age") - assert min_max_age["min"] == 5 - assert min_max_age["max"] == 20 - - # Test with single value - single_doc = [Document(content="Doc 5", meta={"single_value": 42})] - await document_store.write_documents_async(single_doc) - min_max_single = await document_store.get_metadata_field_min_max_async("meta.single_value") - assert min_max_single["min"] == 42 - assert min_max_single["max"] == 42 - - # Test with float values - min_max_rating = await document_store.get_metadata_field_min_max_async("meta.rating") - assert min_max_rating["min"] == pytest.approx(5.2) - assert min_max_rating["max"] == pytest.approx(20.3) - - # Test with text/string values - lexicographic comparison - text_docs = [ - Document(content="Doc 1", meta={"category": "Zebra", "status": "active"}), - Document(content="Doc 2", meta={"category": "Apple", "status": "pending"}), - Document(content="Doc 3", meta={"category": "Banana", "status": "inactive"}), - Document(content="Doc 4", meta={"category": "apple", "status": "active"}), - ] - await document_store.write_documents_async(text_docs) - - # Test lexicographic min/max for text fields (case-sensitive) - min_max_category = await document_store.get_metadata_field_min_max_async("meta.category") - assert min_max_category["min"] == "Apple" # 'A' comes before 'B' and 'Z' and 'a' - assert min_max_category["max"] == "apple" # 'a' comes after 'A', 'B', 'Z' in ASCII - - min_max_status = await document_store.get_metadata_field_min_max_async("meta.status") - assert min_max_status["min"] == "active" # 'a' comes before 'i' and 'p' - assert min_max_status["max"] == "pending" # 'p' comes after 'a' and 'i' - - # Test with empty strings - empty_string_docs = [ - Document(content="Doc 1", meta={"tag": ""}), - Document(content="Doc 2", meta={"tag": "A"}), - Document(content="Doc 3", meta={"tag": "B"}), - ] - await document_store.write_documents_async(empty_string_docs) - min_max_tag = await document_store.get_metadata_field_min_max_async("meta.tag") - assert min_max_tag["min"] == "" # Empty string is typically minimum - assert min_max_tag["max"] == "B" # 'B' is maximum - - # Test with special characters - special_char_docs = [ - Document(content="Doc 1", meta={"code": "!@#"}), - Document(content="Doc 2", meta={"code": "$%^"}), - Document(content="Doc 3", meta={"code": "&*()"}), - ] - await document_store.write_documents_async(special_char_docs) - min_max_code = await document_store.get_metadata_field_min_max_async("meta.code") - # Special characters have specific ASCII ordering - assert min_max_code["min"] in ["!@#", "$%^", "&*()"] - assert min_max_code["max"] in ["!@#", "$%^", "&*()"] - - # Test with Unicode characters - unicode_docs = [ - Document(content="Doc 1", meta={"name": "Ángel"}), - Document(content="Doc 2", meta={"name": "Zebra"}), - Document(content="Doc 3", meta={"name": "Alpha"}), - ] - await document_store.write_documents_async(unicode_docs) - min_max_name = await document_store.get_metadata_field_min_max_async("meta.name") - # With COLLATE "C", comparison is byte-order based - # "Alpha" should be minimum (A comes first in ASCII) - # "Ángel" or "Zebra" will be maximum depending on byte encoding - assert min_max_name["min"] == "Alpha" # 'A' comes first in ASCII - assert min_max_name["max"] in ["Ángel", "Zebra"] # Depends on UTF-8 byte encoding - - -@pytest.mark.integration -@pytest.mark.asyncio -async def test_get_metadata_field_unique_values_async(document_store: PgvectorDocumentStore): - # Test with string values - docs = [ - Document(content="Python programming", meta={"category": "A", "language": "Python"}), - Document(content="Java programming", meta={"category": "B", "language": "Java"}), - Document(content="Python scripting", meta={"category": "A", "language": "Python"}), - Document(content="JavaScript development", meta={"category": "C", "language": "JavaScript"}), - Document(content="Python data science", meta={"category": "A", "language": "Python"}), - Document(content="Java backend", meta={"category": "B", "language": "Java"}), - ] - await document_store.write_documents_async(docs) - - # Test getting all unique values without search term - unique_values, total_count = await document_store.get_metadata_field_unique_values_async( - "meta.category", None, 0, 10 - ) - assert set(unique_values) == {"A", "B", "C"} - assert total_count == 3 - - # Test with "meta." prefix - unique_languages, total_languages = await document_store.get_metadata_field_unique_values_async( - "meta.language", None, 0, 10 - ) - assert set(unique_languages) == {"Python", "Java", "JavaScript"} - assert total_languages == 3 - - # Test pagination - first page - unique_values_page1, total_count_page1 = await document_store.get_metadata_field_unique_values_async( - "meta.category", None, 0, 2 - ) - assert len(unique_values_page1) == 2 - assert all(val in ["A", "B", "C"] for val in unique_values_page1) - assert total_count_page1 == 3 - - # Test pagination - second page - unique_values_page2, total_count_page2 = await document_store.get_metadata_field_unique_values_async( - "meta.category", None, 2, 2 - ) - assert len(unique_values_page2) == 1 - assert unique_values_page2[0] in ["A", "B", "C"] - assert total_count_page2 == 3 - - # Test pagination - verify pages don't overlap - assert not set(unique_values_page1).intersection(set(unique_values_page2)) - - # Test pagination - verify all values are covered - all_values = set(unique_values_page1) | set(unique_values_page2) - assert all_values == {"A", "B", "C"} - - # Test pagination - size larger than total count - unique_values_large, total_large = await document_store.get_metadata_field_unique_values_async( - "meta.category", None, 0, 100 - ) - assert len(unique_values_large) == 3 - assert set(unique_values_large) == {"A", "B", "C"} - assert total_large == 3 - - # Test pagination - from_ beyond total count (should return empty) - unique_values_beyond, total_beyond = await document_store.get_metadata_field_unique_values_async( - "meta.category", None, 10, 10 - ) - assert len(unique_values_beyond) == 0 - assert total_beyond == 3 - - # Test pagination - single item per page - unique_values_single1, _ = await document_store.get_metadata_field_unique_values_async("meta.category", None, 0, 1) - unique_values_single2, _ = await document_store.get_metadata_field_unique_values_async("meta.category", None, 1, 1) - unique_values_single3, _ = await document_store.get_metadata_field_unique_values_async("meta.category", None, 2, 1) - assert len(unique_values_single1) == 1 - assert len(unique_values_single2) == 1 - assert len(unique_values_single3) == 1 - # All three pages should be different - assert len(set(unique_values_single1 + unique_values_single2 + unique_values_single3)) == 3 - - # Test with search term - filter by content matching "Python" - unique_values_filtered, total_filtered = await document_store.get_metadata_field_unique_values_async( - "meta.category", "Python", 0, 10 - ) - assert set(unique_values_filtered) == {"A"} # Only category A has documents with "Python" in content - assert total_filtered == 1 - - # Test with search term - filter by content matching "Java" - unique_values_java, total_java = await document_store.get_metadata_field_unique_values_async( - "meta.category", "Java", 0, 10 - ) - assert set(unique_values_java) == {"B"} # Only category B has documents with "Java" in content - assert total_java == 1 - - # Test pagination with search term - unique_values_search_page1, total_search = await document_store.get_metadata_field_unique_values_async( - "meta.language", "Python", 0, 1 - ) - assert len(unique_values_search_page1) == 1 - assert unique_values_search_page1[0] == "Python" - assert total_search == 1 - - # Test pagination with search term - beyond results - unique_values_search_empty, total_search_empty = await document_store.get_metadata_field_unique_values_async( - "meta.language", "Python", 10, 10 - ) - assert len(unique_values_search_empty) == 0 - assert total_search_empty == 1 - - # Test with integer values - int_docs = [ - Document(content="Doc 1", meta={"priority": 1}), - Document(content="Doc 2", meta={"priority": 2}), - Document(content="Doc 3", meta={"priority": 1}), - Document(content="Doc 4", meta={"priority": 3}), - ] - await document_store.write_documents_async(int_docs) - unique_priorities, total_priorities = await document_store.get_metadata_field_unique_values_async( - "meta.priority", None, 0, 10 - ) - assert set(unique_priorities) == {"1", "2", "3"} - assert total_priorities == 3 - - # Test with search term on integer field - unique_priorities_filtered, total_priorities_filtered = await document_store.get_metadata_field_unique_values_async( - "meta.priority", "Doc 1", 0, 10 - ) - assert set(unique_priorities_filtered) == {"1"} - assert total_priorities_filtered == 1 diff --git a/integrations/qdrant/tests/test_document_store_async.py b/integrations/qdrant/tests/test_document_store_async.py index dd6467b657..ce5f7a5c87 100644 --- a/integrations/qdrant/tests/test_document_store_async.py +++ b/integrations/qdrant/tests/test_document_store_async.py @@ -18,7 +18,102 @@ ) -class TestQdrantDocumentStore: +@pytest.mark.asyncio +class TestQdrantDocumentStoreAsyncUnit: + async def test_query_hybrid_search_batch_failure_async(self): + document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True) + await document_store._initialize_async_client() + sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33]) + embedding = [0.1] * 768 + + with patch.object(document_store._async_client, "query_points", side_effect=Exception("query_points")): + with pytest.raises(QdrantStoreError): + await document_store._query_hybrid_async( + query_sparse_embedding=sparse_embedding, query_embedding=embedding + ) + + async def test_set_up_collection_with_dimension_mismatch_async(self): + document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False, similarity="cosine") + await document_store._initialize_async_client() + # Mock collection info with different vector size + mock_collection_info = MagicMock() + mock_collection_info.config.params.vectors = MagicMock() + mock_collection_info.config.params.vectors.distance = rest.Distance.COSINE + mock_collection_info.config.params.vectors.size = 512 + + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), + ): + with pytest.raises(ValueError, match="different vector size"): + await document_store._set_up_collection_async("test_collection", 768, False, "cosine", False, False) + + async def test_set_up_collection_with_existing_incompatible_collection_async(self): + document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True) + await document_store._initialize_async_client() + # Mock collection info with named vectors but missing DENSE_VECTORS_NAME + mock_collection_info = MagicMock() + mock_collection_info.config.params.vectors = {"some_other_vector": MagicMock()} + + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), + ): + with pytest.raises(QdrantStoreError, match="created outside of Haystack"): + await document_store._set_up_collection_async("test_collection", 768, False, "cosine", True, False) + + async def test_set_up_collection_use_sparse_embeddings_true_without_named_vectors_async(self): + """Test that an error is raised when use_sparse_embeddings is True but collection doesn't have named vectors""" + document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True) + await document_store._initialize_async_client() + + # Mock collection info without named vectors + mock_collection_info = MagicMock() + mock_collection_info.config.params.vectors = MagicMock(spec=rest.VectorsConfig) + + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), + ): + with pytest.raises(QdrantStoreError, match="without sparse embedding vectors"): + await document_store._set_up_collection_async("test_collection", 768, False, "cosine", True, False) + + async def test_set_up_collection_use_sparse_embeddings_false_with_named_vectors_async(self): + """Test that an error is raised when use_sparse_embeddings is False but collection has named vectors""" + document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False) + await document_store._initialize_async_client() + # Mock collection info with named vectors + mock_collection_info = MagicMock() + mock_collection_info.config.params.vectors = {DENSE_VECTORS_NAME: MagicMock()} + + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), + ): + with pytest.raises(QdrantStoreError, match="with sparse embedding vectors"): + await document_store._set_up_collection_async("test_collection", 768, False, "cosine", False, False) + + async def test_set_up_collection_with_distance_mismatch_async(self): + document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False, similarity="cosine") + await document_store._initialize_async_client() + + # Mock collection info with different distance + mock_collection_info = MagicMock() + mock_collection_info.config.params.vectors = MagicMock() + mock_collection_info.config.params.vectors.distance = rest.Distance.DOT + mock_collection_info.config.params.vectors.size = 768 + + with ( + patch.object(document_store._async_client, "collection_exists", return_value=True), + patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), + ): + with pytest.raises(ValueError, match="different similarity"): + await document_store._set_up_collection_async("test_collection", 768, False, "cosine", False, False) + + +@pytest.mark.integration +@pytest.mark.asyncio +class TestQdrantDocumentStoreAsync: @pytest.fixture def document_store(self) -> QdrantDocumentStore: return QdrantDocumentStore( @@ -30,7 +125,6 @@ def document_store(self) -> QdrantDocumentStore: progress_bar=False, ) - @pytest.mark.asyncio async def test_write_documents_async(self, document_store: QdrantDocumentStore): docs = [Document(id="1")] @@ -39,7 +133,6 @@ async def test_write_documents_async(self, document_store: QdrantDocumentStore): with pytest.raises(DuplicateDocumentError): await document_store.write_documents_async(docs, DuplicatePolicy.FAIL) - @pytest.mark.asyncio async def test_sparse_configuration_async(self): document_store = QdrantDocumentStore( ":memory:", @@ -58,7 +151,6 @@ async def test_sparse_configuration_async(self): assert hasattr(sparse_config[SPARSE_VECTORS_NAME], "modifier") assert sparse_config[SPARSE_VECTORS_NAME].modifier == rest.Modifier.IDF - @pytest.mark.asyncio async def test_query_hybrid_async(self, generate_sparse_embedding): document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False) @@ -83,7 +175,6 @@ async def test_query_hybrid_async(self, generate_sparse_embedding): assert document.sparse_embedding assert document.embedding - @pytest.mark.asyncio async def test_query_hybrid_with_group_by_async(self, generate_sparse_embedding): document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True, progress_bar=False) @@ -117,7 +208,6 @@ async def test_query_hybrid_with_group_by_async(self, generate_sparse_embedding) assert document.sparse_embedding assert document.embedding - @pytest.mark.asyncio async def test_query_hybrid_fail_without_sparse_embedding_async(self, document_store): sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33]) embedding = [0.1] * 768 @@ -128,103 +218,6 @@ async def test_query_hybrid_fail_without_sparse_embedding_async(self, document_s query_embedding=embedding, ) - @pytest.mark.asyncio - async def test_query_hybrid_search_batch_failure_async(self): - document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True) - await document_store._initialize_async_client() - sparse_embedding = SparseEmbedding(indices=[0, 1, 2, 3], values=[0.1, 0.8, 0.05, 0.33]) - embedding = [0.1] * 768 - - with patch.object(document_store._async_client, "query_points", side_effect=Exception("query_points")): - with pytest.raises(QdrantStoreError): - await document_store._query_hybrid_async( - query_sparse_embedding=sparse_embedding, query_embedding=embedding - ) - - @pytest.mark.asyncio - async def test_set_up_collection_with_dimension_mismatch_async(self): - document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False, similarity="cosine") - await document_store._initialize_async_client() - # Mock collection info with different vector size - mock_collection_info = MagicMock() - mock_collection_info.config.params.vectors = MagicMock() - mock_collection_info.config.params.vectors.distance = rest.Distance.COSINE - mock_collection_info.config.params.vectors.size = 512 - - with ( - patch.object(document_store._async_client, "collection_exists", return_value=True), - patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), - ): - with pytest.raises(ValueError, match="different vector size"): - await document_store._set_up_collection_async("test_collection", 768, False, "cosine", False, False) - - @pytest.mark.asyncio - async def test_set_up_collection_with_existing_incompatible_collection_async(self): - document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True) - await document_store._initialize_async_client() - # Mock collection info with named vectors but missing DENSE_VECTORS_NAME - mock_collection_info = MagicMock() - mock_collection_info.config.params.vectors = {"some_other_vector": MagicMock()} - - with ( - patch.object(document_store._async_client, "collection_exists", return_value=True), - patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), - ): - with pytest.raises(QdrantStoreError, match="created outside of Haystack"): - await document_store._set_up_collection_async("test_collection", 768, False, "cosine", True, False) - - @pytest.mark.asyncio - async def test_set_up_collection_use_sparse_embeddings_true_without_named_vectors_async(self): - """Test that an error is raised when use_sparse_embeddings is True but collection doesn't have named vectors""" - document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=True) - await document_store._initialize_async_client() - - # Mock collection info without named vectors - mock_collection_info = MagicMock() - mock_collection_info.config.params.vectors = MagicMock(spec=rest.VectorsConfig) - - with ( - patch.object(document_store._async_client, "collection_exists", return_value=True), - patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), - ): - with pytest.raises(QdrantStoreError, match="without sparse embedding vectors"): - await document_store._set_up_collection_async("test_collection", 768, False, "cosine", True, False) - - @pytest.mark.asyncio - async def test_set_up_collection_use_sparse_embeddings_false_with_named_vectors_async(self): - """Test that an error is raised when use_sparse_embeddings is False but collection has named vectors""" - document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False) - await document_store._initialize_async_client() - # Mock collection info with named vectors - mock_collection_info = MagicMock() - mock_collection_info.config.params.vectors = {DENSE_VECTORS_NAME: MagicMock()} - - with ( - patch.object(document_store._async_client, "collection_exists", return_value=True), - patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), - ): - with pytest.raises(QdrantStoreError, match="with sparse embedding vectors"): - await document_store._set_up_collection_async("test_collection", 768, False, "cosine", False, False) - - @pytest.mark.asyncio - async def test_set_up_collection_with_distance_mismatch_async(self): - document_store = QdrantDocumentStore(location=":memory:", use_sparse_embeddings=False, similarity="cosine") - await document_store._initialize_async_client() - - # Mock collection info with different distance - mock_collection_info = MagicMock() - mock_collection_info.config.params.vectors = MagicMock() - mock_collection_info.config.params.vectors.distance = rest.Distance.DOT - mock_collection_info.config.params.vectors.size = 768 - - with ( - patch.object(document_store._async_client, "collection_exists", return_value=True), - patch.object(document_store._async_client, "get_collection", return_value=mock_collection_info), - ): - with pytest.raises(ValueError, match="different similarity"): - await document_store._set_up_collection_async("test_collection", 768, False, "cosine", False, False) - - @pytest.mark.asyncio async def test_delete_all_documents_async_no_index_recreation(self, document_store): await document_store._initialize_async_client() @@ -240,7 +233,6 @@ async def test_delete_all_documents_async_no_index_recreation(self, document_sto await document_store.write_documents_async(docs) assert await document_store.count_documents_async() == 5 - @pytest.mark.asyncio async def test_delete_all_documents_async_index_recreation(self, document_store): await document_store._initialize_async_client() @@ -264,7 +256,6 @@ async def test_delete_all_documents_async_index_recreation(self, document_store) await document_store.write_documents_async(docs) assert await document_store.count_documents_async() == 5 - @pytest.mark.asyncio async def test_delete_by_filter_async(self, document_store: QdrantDocumentStore): docs = [ Document(content="Doc 1", meta={"category": "A", "year": 2023}), @@ -295,7 +286,6 @@ async def test_delete_by_filter_async(self, document_store: QdrantDocumentStore) assert deleted_count == 1 assert await document_store.count_documents_async() == 0 - @pytest.mark.asyncio async def test_delete_by_filter_async_no_matches(self, document_store: QdrantDocumentStore): docs = [ Document(content="Doc 1", meta={"category": "A"}), @@ -311,7 +301,6 @@ async def test_delete_by_filter_async_no_matches(self, document_store: QdrantDoc assert deleted_count == 0 assert await document_store.count_documents_async() == 2 - @pytest.mark.asyncio async def test_delete_by_filter_async_advanced_filters(self, document_store: QdrantDocumentStore): docs = [ Document(content="Doc 1", meta={"category": "A", "year": 2023, "status": "draft"}), @@ -347,7 +336,6 @@ async def test_delete_by_filter_async_advanced_filters(self, document_store: Qdr assert deleted_count == 2 assert await document_store.count_documents_async() == 0 - @pytest.mark.asyncio async def test_update_by_filter_async(self, document_store: QdrantDocumentStore): docs = [ Document(content="Doc 1", meta={"category": "A", "status": "draft"}), @@ -383,7 +371,6 @@ async def test_update_by_filter_async(self, document_store: QdrantDocumentStore) assert len(draft_docs) == 1 assert draft_docs[0].meta["category"] == "B" - @pytest.mark.asyncio async def test_update_by_filter_async_multiple_fields(self, document_store: QdrantDocumentStore): docs = [ Document(content="Doc 1", meta={"category": "A", "year": 2023}), @@ -413,7 +400,6 @@ async def test_update_by_filter_async_multiple_fields(self, document_store: Qdra assert doc.meta["category"] == "A" assert doc.meta["year"] == 2023 # Existing field preserved - @pytest.mark.asyncio async def test_update_by_filter_async_no_matches(self, document_store: QdrantDocumentStore): docs = [ Document(content="Doc 1", meta={"category": "A"}), @@ -429,7 +415,6 @@ async def test_update_by_filter_async_no_matches(self, document_store: QdrantDoc assert updated_count == 0 assert await document_store.count_documents_async() == 2 - @pytest.mark.asyncio async def test_update_by_filter_async_advanced_filters(self, document_store: QdrantDocumentStore): docs = [ Document(content="Doc 1", meta={"category": "A", "year": 2023, "status": "draft"}), @@ -462,7 +447,6 @@ async def test_update_by_filter_async_advanced_filters(self, document_store: Qdr assert published_docs[0].meta["category"] == "A" assert published_docs[0].meta["year"] == 2023 - @pytest.mark.asyncio async def test_update_by_filter_async_preserves_vectors(self, document_store: QdrantDocumentStore): """Test that update_by_filter_async preserves document embeddings.""" docs = [ @@ -487,7 +471,6 @@ async def test_update_by_filter_async_preserves_vectors(self, document_store: Qd assert updated_docs[0].embedding is not None assert len(updated_docs[0].embedding) == 768 - @pytest.mark.asyncio async def test_count_documents_by_filter_async(self, document_store: QdrantDocumentStore): """Test counting documents with filters (async).""" docs = [ @@ -520,7 +503,6 @@ async def test_count_documents_by_filter_async(self, document_store: QdrantDocum ) assert count == 1 - @pytest.mark.asyncio async def test_get_metadata_fields_info_async(self, document_store: QdrantDocumentStore): """Test getting metadata field information (async).""" docs = [ @@ -533,7 +515,6 @@ async def test_get_metadata_fields_info_async(self, document_store: QdrantDocume # Should return empty dict or field info depending on Qdrant collection setup assert isinstance(fields_info, dict) - @pytest.mark.asyncio async def test_get_metadata_field_min_max_async(self, document_store: QdrantDocumentStore): """Test getting min/max values for a metadata field (async).""" docs = [ @@ -547,7 +528,6 @@ async def test_get_metadata_field_min_max_async(self, document_store: QdrantDocu assert result.get("min") == 0.3 assert result.get("max") == 0.8 - @pytest.mark.asyncio async def test_count_unique_metadata_by_filter_async(self, document_store: QdrantDocumentStore): """Test counting unique metadata field values (async).""" docs = [ @@ -561,7 +541,6 @@ async def test_count_unique_metadata_by_filter_async(self, document_store: Qdran result = await document_store.count_unique_metadata_by_filter_async(filters={}, metadata_fields=["category"]) assert result == {"category": 3} - @pytest.mark.asyncio async def test_count_unique_metadata_by_filter_async_multiple_fields(self, document_store: QdrantDocumentStore): """Test counting unique values for multiple metadata fields (async).""" docs = [ @@ -576,7 +555,6 @@ async def test_count_unique_metadata_by_filter_async_multiple_fields(self, docum ) assert result == {"category": 2, "status": 2} - @pytest.mark.asyncio async def test_count_unique_metadata_by_filter_async_with_filter(self, document_store: QdrantDocumentStore): """Test counting unique metadata field values with filtering (async).""" docs = [ @@ -592,7 +570,6 @@ async def test_count_unique_metadata_by_filter_async_with_filter(self, document_ ) assert result == {"category": 2} - @pytest.mark.asyncio async def test_get_metadata_field_unique_values_async(self, document_store: QdrantDocumentStore): """Test getting unique metadata field values (async).""" docs = [ @@ -607,7 +584,6 @@ async def test_get_metadata_field_unique_values_async(self, document_store: Qdra assert len(values) == 3 assert set(values) == {"A", "B", "C"} - @pytest.mark.asyncio async def test_get_metadata_field_unique_values_async_pagination(self, document_store: QdrantDocumentStore): """Test getting unique metadata field values with pagination (async).""" docs = [Document(content=f"Doc {i}", meta={"value": i % 5}) for i in range(10)] @@ -624,7 +600,6 @@ async def test_get_metadata_field_unique_values_async_pagination(self, document_ # Values should not overlap assert set(values_page_1) != set(values_page_2) - @pytest.mark.asyncio async def test_get_metadata_field_unique_values_async_with_filter(self, document_store: QdrantDocumentStore): """Test getting unique metadata field values with filtering (async).""" docs = [