Skip to content

Commit 545492e

Browse files
test: ElasticSearchDocumentStore relying on Mixin tests (#2995)
* initial import * isort * reducing the tests an relying on Mixin * increasing lowest haystack dependency * close async fixtures * Apply suggestion from @anakin87 Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> --------- Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
1 parent d46837e commit 545492e

3 files changed

Lines changed: 22 additions & 99 deletions

File tree

integrations/elasticsearch/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ classifiers = [
2323
"Programming Language :: Python :: Implementation :: PyPy",
2424
]
2525
dependencies = [
26-
"haystack-ai>=2.26.0",
26+
"haystack-ai>=2.26.1",
2727
"elasticsearch>=8,<9",
2828
"aiohttp>=3.9.0" # for async support https://elasticsearch-py.readthedocs.io/en/latest/async.html#valueerror-when-initializing-asyncelasticsearch
2929
]

integrations/elasticsearch/tests/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import asyncio
56
import uuid
67

78
import pytest
@@ -37,6 +38,8 @@ def document_store():
3738
store._ensure_initialized()
3839
store.client.options(ignore_status=[400, 404]).indices.delete(index=index)
3940
store.client.close()
41+
if store._async_client is not None:
42+
asyncio.run(store._async_client.close())
4043

4144

4245
@pytest.fixture
@@ -58,3 +61,5 @@ def document_store_2():
5861
store._ensure_initialized()
5962
store.client.options(ignore_status=[400, 404]).indices.delete(index=index)
6063
store.client.close()
64+
if store._async_client is not None:
65+
asyncio.run(store._async_client.close())

integrations/elasticsearch/tests/test_document_store.py

Lines changed: 16 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,14 @@
1010
from haystack.dataclasses.document import Document
1111
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
1212
from haystack.document_stores.types import DuplicatePolicy
13-
from haystack.testing.document_store import DocumentStoreBaseExtendedTests
13+
from haystack.testing.document_store import (
14+
CountDocumentsByFilterTest,
15+
CountUniqueMetadataByFilterTest,
16+
DocumentStoreBaseExtendedTests,
17+
GetMetadataFieldMinMaxTest,
18+
GetMetadataFieldsInfoTest,
19+
GetMetadataFieldUniqueValuesTest,
20+
)
1421
from haystack.utils import Secret
1522
from haystack.utils.auth import TokenSecret
1623

@@ -226,7 +233,14 @@ def test_client_initialization_with_api_key_string(_mock_async_es, _mock_es):
226233

227234

228235
@pytest.mark.integration
229-
class TestDocumentStore(DocumentStoreBaseExtendedTests):
236+
class TestDocumentStore(
237+
DocumentStoreBaseExtendedTests,
238+
CountDocumentsByFilterTest,
239+
CountUniqueMetadataByFilterTest,
240+
GetMetadataFieldsInfoTest,
241+
GetMetadataFieldMinMaxTest,
242+
GetMetadataFieldUniqueValuesTest,
243+
):
230244
"""
231245
Common test cases will be provided by `DocumentStoreBaseExtendedTests` but
232246
you can add more to this class.
@@ -528,31 +542,6 @@ def test_delete_all_documents_index_recreation(self, document_store: Elasticsear
528542
assert len(results) == 1
529543
assert results[0].content == "New document after delete all"
530544

531-
def test_count_documents_by_filter(self, document_store: ElasticsearchDocumentStore):
532-
docs = [
533-
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
534-
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
535-
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
536-
Document(content="Doc 4", meta={"category": "A", "status": "active"}),
537-
]
538-
document_store.write_documents(docs)
539-
540-
count_a = document_store.count_documents_by_filter(
541-
filters={"field": "category", "operator": "==", "value": "A"}
542-
)
543-
assert count_a == 3
544-
545-
count_a_active = document_store.count_documents_by_filter(
546-
filters={
547-
"operator": "AND",
548-
"conditions": [
549-
{"field": "category", "operator": "==", "value": "A"},
550-
{"field": "status", "operator": "==", "value": "active"},
551-
],
552-
}
553-
)
554-
assert count_a_active == 2
555-
556545
def test_count_unique_metadata_by_filter(self, document_store: ElasticsearchDocumentStore):
557546
docs = [
558547
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
@@ -563,47 +552,6 @@ def test_count_unique_metadata_by_filter(self, document_store: ElasticsearchDocu
563552
]
564553
document_store.write_documents(docs)
565554

566-
# count distinct values for all documents
567-
distinct_counts = document_store.count_unique_metadata_by_filter(
568-
filters={}, metadata_fields=["category", "status", "priority"]
569-
)
570-
assert distinct_counts["category"] == 3 # A, B, C
571-
assert distinct_counts["status"] == 2 # active, inactive
572-
assert distinct_counts["priority"] == 3 # 1, 2, 3
573-
574-
# distinct values for documents with category="A"
575-
distinct_counts_a = document_store.count_unique_metadata_by_filter(
576-
filters={"field": "category", "operator": "==", "value": "A"},
577-
metadata_fields=["category", "status", "priority"],
578-
)
579-
assert distinct_counts_a["category"] == 1 # Only A
580-
assert distinct_counts_a["status"] == 2 # active, inactive
581-
assert distinct_counts_a["priority"] == 2 # 1, 3
582-
583-
# distinct values for documents with status="active"
584-
distinct_counts_active = document_store.count_unique_metadata_by_filter(
585-
filters={"field": "status", "operator": "==", "value": "active"},
586-
metadata_fields=["category", "status", "priority"],
587-
)
588-
assert distinct_counts_active["category"] == 3 # A, B, C
589-
assert distinct_counts_active["status"] == 1 # Only active
590-
assert distinct_counts_active["priority"] == 3 # 1, 2, 3
591-
592-
# distinct values with complex filter (category="A" AND status="active")
593-
distinct_counts_a_active = document_store.count_unique_metadata_by_filter(
594-
filters={
595-
"operator": "AND",
596-
"conditions": [
597-
{"field": "category", "operator": "==", "value": "A"},
598-
{"field": "status", "operator": "==", "value": "active"},
599-
],
600-
},
601-
metadata_fields=["category", "status", "priority"],
602-
)
603-
assert distinct_counts_a_active["category"] == 1 # Only A
604-
assert distinct_counts_a_active["status"] == 1 # Only active
605-
assert distinct_counts_a_active["priority"] == 2 # 1, 3
606-
607555
# test with only a subset of fields
608556
distinct_counts_subset = document_store.count_unique_metadata_by_filter(
609557
filters={}, metadata_fields=["category", "status"]
@@ -642,36 +590,6 @@ def test_get_metadata_fields_info(self, document_store: ElasticsearchDocumentSto
642590
assert fields_info["status"]["type"] == "keyword"
643591
assert fields_info["priority"]["type"] == "long"
644592

645-
def test_get_metadata_field_min_max(self, document_store: ElasticsearchDocumentStore):
646-
docs = [
647-
Document(content="Doc 1", meta={"priority": 1, "age": 10}),
648-
Document(content="Doc 2", meta={"priority": 5, "age": 20}),
649-
Document(content="Doc 3", meta={"priority": 3, "age": 15}),
650-
Document(content="Doc 4", meta={"priority": 10, "age": 5}),
651-
Document(content="Doc 6", meta={"rating": 10.5}),
652-
Document(content="Doc 7", meta={"rating": 20.3}),
653-
Document(content="Doc 8", meta={"rating": 15.7}),
654-
Document(content="Doc 9", meta={"rating": 5.2}),
655-
]
656-
document_store.write_documents(docs)
657-
658-
# test with "meta." prefix for integer field
659-
min_max_priority = document_store.get_metadata_field_min_max("meta.priority")
660-
assert min_max_priority["min"] == 1
661-
assert min_max_priority["max"] == 10
662-
663-
# test with single value
664-
single_doc = [Document(content="Doc 5", meta={"single_value": 42})]
665-
document_store.write_documents(single_doc)
666-
min_max_single = document_store.get_metadata_field_min_max("meta.single_value")
667-
assert min_max_single["min"] == 42
668-
assert min_max_single["max"] == 42
669-
670-
# test with float values
671-
min_max_score = document_store.get_metadata_field_min_max("meta.rating")
672-
assert min_max_score["min"] == pytest.approx(5.2)
673-
assert min_max_score["max"] == pytest.approx(20.3)
674-
675593
def test_get_metadata_field_unique_values(self, document_store: ElasticsearchDocumentStore):
676594
docs = [
677595
Document(content="Python programming", meta={"category": "A", "language": "Python"}),

0 commit comments

Comments
 (0)