diff --git a/integrations/elasticsearch/pyproject.toml b/integrations/elasticsearch/pyproject.toml index ffdb831dc2..6af9236d3e 100644 --- a/integrations/elasticsearch/pyproject.toml +++ b/integrations/elasticsearch/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "haystack-ai>=2.26.0", + "haystack-ai>=2.26.1", "elasticsearch>=8,<9", "aiohttp>=3.9.0" # for async support https://elasticsearch-py.readthedocs.io/en/latest/async.html#valueerror-when-initializing-asyncelasticsearch ] diff --git a/integrations/elasticsearch/tests/conftest.py b/integrations/elasticsearch/tests/conftest.py index d3c6c4063d..de192a7c19 100644 --- a/integrations/elasticsearch/tests/conftest.py +++ b/integrations/elasticsearch/tests/conftest.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import asyncio import uuid import pytest @@ -37,6 +38,8 @@ def document_store(): store._ensure_initialized() store.client.options(ignore_status=[400, 404]).indices.delete(index=index) store.client.close() + if store._async_client is not None: + asyncio.run(store._async_client.close()) @pytest.fixture @@ -58,3 +61,5 @@ def document_store_2(): store._ensure_initialized() store.client.options(ignore_status=[400, 404]).indices.delete(index=index) store.client.close() + if store._async_client is not None: + asyncio.run(store._async_client.close()) diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py index 1966c341b9..59c93a0dce 100644 --- a/integrations/elasticsearch/tests/test_document_store.py +++ b/integrations/elasticsearch/tests/test_document_store.py @@ -10,7 +10,14 @@ from haystack.dataclasses.document import Document from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy -from haystack.testing.document_store import DocumentStoreBaseExtendedTests +from haystack.testing.document_store import ( + CountDocumentsByFilterTest, + CountUniqueMetadataByFilterTest, + DocumentStoreBaseExtendedTests, + GetMetadataFieldMinMaxTest, + GetMetadataFieldsInfoTest, + GetMetadataFieldUniqueValuesTest, +) from haystack.utils import Secret from haystack.utils.auth import TokenSecret @@ -226,7 +233,14 @@ def test_client_initialization_with_api_key_string(_mock_async_es, _mock_es): @pytest.mark.integration -class TestDocumentStore(DocumentStoreBaseExtendedTests): +class TestDocumentStore( + DocumentStoreBaseExtendedTests, + CountDocumentsByFilterTest, + CountUniqueMetadataByFilterTest, + GetMetadataFieldsInfoTest, + GetMetadataFieldMinMaxTest, + GetMetadataFieldUniqueValuesTest, +): """ Common test cases will be provided by `DocumentStoreBaseExtendedTests` but you can add more to this class. @@ -528,31 +542,6 @@ def test_delete_all_documents_index_recreation(self, document_store: Elasticsear assert len(results) == 1 assert results[0].content == "New document after delete all" - def test_count_documents_by_filter(self, document_store: ElasticsearchDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A", "status": "active"}), - Document(content="Doc 2", meta={"category": "B", "status": "active"}), - Document(content="Doc 3", meta={"category": "A", "status": "inactive"}), - Document(content="Doc 4", meta={"category": "A", "status": "active"}), - ] - document_store.write_documents(docs) - - count_a = document_store.count_documents_by_filter( - filters={"field": "category", "operator": "==", "value": "A"} - ) - assert count_a == 3 - - count_a_active = document_store.count_documents_by_filter( - filters={ - "operator": "AND", - "conditions": [ - {"field": "category", "operator": "==", "value": "A"}, - {"field": "status", "operator": "==", "value": "active"}, - ], - } - ) - assert count_a_active == 2 - def test_count_unique_metadata_by_filter(self, document_store: ElasticsearchDocumentStore): docs = [ Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), @@ -563,47 +552,6 @@ def test_count_unique_metadata_by_filter(self, document_store: ElasticsearchDocu ] document_store.write_documents(docs) - # count distinct values for all documents - distinct_counts = document_store.count_unique_metadata_by_filter( - filters={}, metadata_fields=["category", "status", "priority"] - ) - assert distinct_counts["category"] == 3 # A, B, C - assert distinct_counts["status"] == 2 # active, inactive - assert distinct_counts["priority"] == 3 # 1, 2, 3 - - # distinct values for documents with category="A" - distinct_counts_a = document_store.count_unique_metadata_by_filter( - filters={"field": "category", "operator": "==", "value": "A"}, - metadata_fields=["category", "status", "priority"], - ) - assert distinct_counts_a["category"] == 1 # Only A - assert distinct_counts_a["status"] == 2 # active, inactive - assert distinct_counts_a["priority"] == 2 # 1, 3 - - # distinct values for documents with status="active" - distinct_counts_active = document_store.count_unique_metadata_by_filter( - filters={"field": "status", "operator": "==", "value": "active"}, - metadata_fields=["category", "status", "priority"], - ) - assert distinct_counts_active["category"] == 3 # A, B, C - assert distinct_counts_active["status"] == 1 # Only active - assert distinct_counts_active["priority"] == 3 # 1, 2, 3 - - # distinct values with complex filter (category="A" AND status="active") - distinct_counts_a_active = document_store.count_unique_metadata_by_filter( - filters={ - "operator": "AND", - "conditions": [ - {"field": "category", "operator": "==", "value": "A"}, - {"field": "status", "operator": "==", "value": "active"}, - ], - }, - metadata_fields=["category", "status", "priority"], - ) - assert distinct_counts_a_active["category"] == 1 # Only A - assert distinct_counts_a_active["status"] == 1 # Only active - assert distinct_counts_a_active["priority"] == 2 # 1, 3 - # test with only a subset of fields distinct_counts_subset = document_store.count_unique_metadata_by_filter( filters={}, metadata_fields=["category", "status"] @@ -642,36 +590,6 @@ def test_get_metadata_fields_info(self, document_store: ElasticsearchDocumentSto assert fields_info["status"]["type"] == "keyword" assert fields_info["priority"]["type"] == "long" - def test_get_metadata_field_min_max(self, document_store: ElasticsearchDocumentStore): - docs = [ - Document(content="Doc 1", meta={"priority": 1, "age": 10}), - Document(content="Doc 2", meta={"priority": 5, "age": 20}), - Document(content="Doc 3", meta={"priority": 3, "age": 15}), - Document(content="Doc 4", meta={"priority": 10, "age": 5}), - Document(content="Doc 6", meta={"rating": 10.5}), - Document(content="Doc 7", meta={"rating": 20.3}), - Document(content="Doc 8", meta={"rating": 15.7}), - Document(content="Doc 9", meta={"rating": 5.2}), - ] - document_store.write_documents(docs) - - # test with "meta." prefix for integer field - min_max_priority = document_store.get_metadata_field_min_max("meta.priority") - assert min_max_priority["min"] == 1 - assert min_max_priority["max"] == 10 - - # test with single value - single_doc = [Document(content="Doc 5", meta={"single_value": 42})] - document_store.write_documents(single_doc) - min_max_single = document_store.get_metadata_field_min_max("meta.single_value") - assert min_max_single["min"] == 42 - assert min_max_single["max"] == 42 - - # test with float values - min_max_score = document_store.get_metadata_field_min_max("meta.rating") - assert min_max_score["min"] == pytest.approx(5.2) - assert min_max_score["max"] == pytest.approx(20.3) - def test_get_metadata_field_unique_values(self, document_store: ElasticsearchDocumentStore): docs = [ Document(content="Python programming", meta={"category": "A", "language": "Python"}),