Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/elasticsearch/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"haystack-ai>=2.26.0",
"haystack-ai>=2.26.1",
"elasticsearch>=8,<9",
"aiohttp>=3.9.0" # for async support https://elasticsearch-py.readthedocs.io/en/latest/async.html#valueerror-when-initializing-asyncelasticsearch
]
Expand Down
5 changes: 5 additions & 0 deletions integrations/elasticsearch/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

import asyncio
import uuid

import pytest
Expand Down Expand Up @@ -37,6 +38,8 @@ def document_store():
store._ensure_initialized()
store.client.options(ignore_status=[400, 404]).indices.delete(index=index)
store.client.close()
if store._async_client is not None:
asyncio.run(store._async_client.close())


@pytest.fixture
Expand All @@ -58,3 +61,5 @@ def document_store_2():
store._ensure_initialized()
store.client.options(ignore_status=[400, 404]).indices.delete(index=index)
store.client.close()
if store._async_client is not None:
asyncio.run(store._async_client.close())
114 changes: 16 additions & 98 deletions integrations/elasticsearch/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,14 @@
from haystack.dataclasses.document import Document
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
from haystack.document_stores.types import DuplicatePolicy
from haystack.testing.document_store import DocumentStoreBaseExtendedTests
from haystack.testing.document_store import (
CountDocumentsByFilterTest,
CountUniqueMetadataByFilterTest,
DocumentStoreBaseExtendedTests,
GetMetadataFieldMinMaxTest,
GetMetadataFieldsInfoTest,
GetMetadataFieldUniqueValuesTest,
)
from haystack.utils import Secret
from haystack.utils.auth import TokenSecret

Expand Down Expand Up @@ -226,7 +233,14 @@ def test_client_initialization_with_api_key_string(_mock_async_es, _mock_es):


@pytest.mark.integration
class TestDocumentStore(DocumentStoreBaseExtendedTests):
class TestDocumentStore(
DocumentStoreBaseExtendedTests,
CountDocumentsByFilterTest,
CountUniqueMetadataByFilterTest,
GetMetadataFieldsInfoTest,
GetMetadataFieldMinMaxTest,
GetMetadataFieldUniqueValuesTest,
):
"""
Common test cases will be provided by `DocumentStoreBaseExtendedTests` but
you can add more to this class.
Expand Down Expand Up @@ -528,31 +542,6 @@ def test_delete_all_documents_index_recreation(self, document_store: Elasticsear
assert len(results) == 1
assert results[0].content == "New document after delete all"

def test_count_documents_by_filter(self, document_store: ElasticsearchDocumentStore):
docs = [
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
Document(content="Doc 4", meta={"category": "A", "status": "active"}),
]
document_store.write_documents(docs)

count_a = document_store.count_documents_by_filter(
filters={"field": "category", "operator": "==", "value": "A"}
)
assert count_a == 3

count_a_active = document_store.count_documents_by_filter(
filters={
"operator": "AND",
"conditions": [
{"field": "category", "operator": "==", "value": "A"},
{"field": "status", "operator": "==", "value": "active"},
],
}
)
assert count_a_active == 2

def test_count_unique_metadata_by_filter(self, document_store: ElasticsearchDocumentStore):
docs = [
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
Expand All @@ -563,47 +552,6 @@ def test_count_unique_metadata_by_filter(self, document_store: ElasticsearchDocu
]
document_store.write_documents(docs)

# count distinct values for all documents
distinct_counts = document_store.count_unique_metadata_by_filter(
filters={}, metadata_fields=["category", "status", "priority"]
)
assert distinct_counts["category"] == 3 # A, B, C
assert distinct_counts["status"] == 2 # active, inactive
assert distinct_counts["priority"] == 3 # 1, 2, 3

# distinct values for documents with category="A"
distinct_counts_a = document_store.count_unique_metadata_by_filter(
filters={"field": "category", "operator": "==", "value": "A"},
metadata_fields=["category", "status", "priority"],
)
assert distinct_counts_a["category"] == 1 # Only A
assert distinct_counts_a["status"] == 2 # active, inactive
assert distinct_counts_a["priority"] == 2 # 1, 3

# distinct values for documents with status="active"
distinct_counts_active = document_store.count_unique_metadata_by_filter(
filters={"field": "status", "operator": "==", "value": "active"},
metadata_fields=["category", "status", "priority"],
)
assert distinct_counts_active["category"] == 3 # A, B, C
assert distinct_counts_active["status"] == 1 # Only active
assert distinct_counts_active["priority"] == 3 # 1, 2, 3

# distinct values with complex filter (category="A" AND status="active")
distinct_counts_a_active = document_store.count_unique_metadata_by_filter(
filters={
"operator": "AND",
"conditions": [
{"field": "category", "operator": "==", "value": "A"},
{"field": "status", "operator": "==", "value": "active"},
],
},
metadata_fields=["category", "status", "priority"],
)
assert distinct_counts_a_active["category"] == 1 # Only A
assert distinct_counts_a_active["status"] == 1 # Only active
assert distinct_counts_a_active["priority"] == 2 # 1, 3

# test with only a subset of fields
distinct_counts_subset = document_store.count_unique_metadata_by_filter(
filters={}, metadata_fields=["category", "status"]
Expand Down Expand Up @@ -642,36 +590,6 @@ def test_get_metadata_fields_info(self, document_store: ElasticsearchDocumentSto
assert fields_info["status"]["type"] == "keyword"
assert fields_info["priority"]["type"] == "long"

def test_get_metadata_field_min_max(self, document_store: ElasticsearchDocumentStore):
docs = [
Document(content="Doc 1", meta={"priority": 1, "age": 10}),
Document(content="Doc 2", meta={"priority": 5, "age": 20}),
Document(content="Doc 3", meta={"priority": 3, "age": 15}),
Document(content="Doc 4", meta={"priority": 10, "age": 5}),
Document(content="Doc 6", meta={"rating": 10.5}),
Document(content="Doc 7", meta={"rating": 20.3}),
Document(content="Doc 8", meta={"rating": 15.7}),
Document(content="Doc 9", meta={"rating": 5.2}),
]
document_store.write_documents(docs)

# test with "meta." prefix for integer field
min_max_priority = document_store.get_metadata_field_min_max("meta.priority")
assert min_max_priority["min"] == 1
assert min_max_priority["max"] == 10

# test with single value
single_doc = [Document(content="Doc 5", meta={"single_value": 42})]
document_store.write_documents(single_doc)
min_max_single = document_store.get_metadata_field_min_max("meta.single_value")
assert min_max_single["min"] == 42
assert min_max_single["max"] == 42

# test with float values
min_max_score = document_store.get_metadata_field_min_max("meta.rating")
assert min_max_score["min"] == pytest.approx(5.2)
assert min_max_score["max"] == pytest.approx(20.3)

def test_get_metadata_field_unique_values(self, document_store: ElasticsearchDocumentStore):
docs = [
Document(content="Python programming", meta={"category": "A", "language": "Python"}),
Expand Down
Loading