Skip to content

Commit 9eabba4

Browse files
test: OpensearchDocumentStore use more Mixin tests (#3002)
* adding Mixin tests + removing repetead tests * fix formatting
1 parent 545492e commit 9eabba4

2 files changed

Lines changed: 18 additions & 157 deletions

File tree

integrations/opensearch/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ classifiers = [
2424
]
2525

2626
dependencies = [
27-
"haystack-ai>=2.26.0",
27+
"haystack-ai>=2.26.1",
2828
"opensearch-py[async]>=3.0.0"
2929
]
3030

integrations/opensearch/tests/test_document_store.py

Lines changed: 17 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99
from haystack.dataclasses.document import Document
1010
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
1111
from haystack.document_stores.types import DuplicatePolicy
12-
from haystack.testing.document_store import DocumentStoreBaseExtendedTests
12+
from haystack.testing.document_store import (
13+
CountDocumentsByFilterTest,
14+
CountUniqueMetadataByFilterTest,
15+
DocumentStoreBaseExtendedTests,
16+
GetMetadataFieldMinMaxTest,
17+
GetMetadataFieldsInfoTest,
18+
GetMetadataFieldUniqueValuesTest,
19+
)
1320
from opensearchpy.exceptions import RequestError
1421

1522
from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
@@ -218,10 +225,16 @@ def test_routing_in_delete(mock_bulk, _mock_opensearch_client):
218225

219226

220227
@pytest.mark.integration
221-
class TestDocumentStore(DocumentStoreBaseExtendedTests):
228+
class TestDocumentStore(
229+
CountDocumentsByFilterTest,
230+
CountUniqueMetadataByFilterTest,
231+
DocumentStoreBaseExtendedTests,
232+
GetMetadataFieldsInfoTest,
233+
GetMetadataFieldMinMaxTest,
234+
GetMetadataFieldUniqueValuesTest,
235+
):
222236
"""
223-
Common test cases will be provided by `DocumentStoreBaseExtendedTests` but
224-
you can add more to this class.
237+
Common test cases will be provided by `DocumentStoreBaseExtendedTests` but you can add more to this class.
225238
"""
226239

227240
@pytest.fixture
@@ -607,158 +620,6 @@ def test_delete_all_documents_index_recreation(self, document_store: OpenSearchD
607620
assert len(results) == 1
608621
assert results[0].content == "New document after delete all"
609622

610-
def test_count_documents_by_filter(self, document_store: OpenSearchDocumentStore):
611-
docs = [
612-
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
613-
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
614-
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
615-
Document(content="Doc 4", meta={"category": "A", "status": "active"}),
616-
]
617-
document_store.write_documents(docs)
618-
assert document_store.count_documents() == 4
619-
620-
count_a = document_store.count_documents_by_filter(
621-
filters={"field": "meta.category", "operator": "==", "value": "A"}
622-
)
623-
assert count_a == 3
624-
625-
count_a_active = document_store.count_documents_by_filter(
626-
filters={
627-
"operator": "AND",
628-
"conditions": [
629-
{"field": "meta.category", "operator": "==", "value": "A"},
630-
{"field": "meta.status", "operator": "==", "value": "active"},
631-
],
632-
}
633-
)
634-
assert count_a_active == 2
635-
636-
def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumentStore):
637-
docs = [
638-
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
639-
Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
640-
Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}),
641-
Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}),
642-
Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}),
643-
]
644-
document_store.write_documents(docs)
645-
assert document_store.count_documents() == 5
646-
647-
# Count distinct values for all documents
648-
distinct_counts = document_store.count_unique_metadata_by_filter(
649-
filters={}, metadata_fields=["category", "status", "priority"]
650-
)
651-
assert distinct_counts["category"] == 3 # A, B, C
652-
assert distinct_counts["status"] == 2 # active, inactive
653-
assert distinct_counts["priority"] == 3 # 1, 2, 3
654-
655-
# Count distinct values for documents with category="A"
656-
distinct_counts_a = document_store.count_unique_metadata_by_filter(
657-
filters={"field": "meta.category", "operator": "==", "value": "A"},
658-
metadata_fields=["category", "status", "priority"],
659-
)
660-
assert distinct_counts_a["category"] == 1 # Only A
661-
assert distinct_counts_a["status"] == 2 # active, inactive
662-
assert distinct_counts_a["priority"] == 2 # 1, 3
663-
664-
# Count distinct values for documents with status="active"
665-
distinct_counts_active = document_store.count_unique_metadata_by_filter(
666-
filters={"field": "meta.status", "operator": "==", "value": "active"},
667-
metadata_fields=["category", "status", "priority"],
668-
)
669-
assert distinct_counts_active["category"] == 3 # A, B, C
670-
assert distinct_counts_active["status"] == 1 # Only active
671-
assert distinct_counts_active["priority"] == 3 # 1, 2, 3
672-
673-
# Count distinct values with complex filter (category="A" AND status="active")
674-
distinct_counts_a_active = document_store.count_unique_metadata_by_filter(
675-
filters={
676-
"operator": "AND",
677-
"conditions": [
678-
{"field": "meta.category", "operator": "==", "value": "A"},
679-
{"field": "meta.status", "operator": "==", "value": "active"},
680-
],
681-
},
682-
metadata_fields=["category", "status", "priority"],
683-
)
684-
assert distinct_counts_a_active["category"] == 1 # Only A
685-
assert distinct_counts_a_active["status"] == 1 # Only active
686-
assert distinct_counts_a_active["priority"] == 2 # 1, 3
687-
688-
# Test with only a subset of fields
689-
distinct_counts_subset = document_store.count_unique_metadata_by_filter(
690-
filters={}, metadata_fields=["category", "status"]
691-
)
692-
assert distinct_counts_subset["category"] == 3
693-
assert distinct_counts_subset["status"] == 2
694-
assert "priority" not in distinct_counts_subset
695-
696-
# Test field name normalization (with "meta." prefix)
697-
distinct_counts_normalized = document_store.count_unique_metadata_by_filter(
698-
filters={}, metadata_fields=["meta.category", "status", "meta.priority"]
699-
)
700-
assert distinct_counts_normalized["category"] == 3
701-
assert distinct_counts_normalized["status"] == 2
702-
assert distinct_counts_normalized["priority"] == 3
703-
704-
# Test error handling when field doesn't exist
705-
with pytest.raises(ValueError, match="Fields not found in index mapping"):
706-
document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["nonexistent_field"])
707-
708-
def test_get_metadata_fields_info(self, document_store: OpenSearchDocumentStore):
709-
docs = [
710-
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
711-
Document(content="Doc 2", meta={"category": "B", "status": "inactive"}),
712-
]
713-
document_store.write_documents(docs)
714-
715-
fields_info = document_store.get_metadata_fields_info()
716-
717-
# Verify that fields_info contains expected fields
718-
assert "category" in fields_info
719-
assert "status" in fields_info
720-
assert "priority" in fields_info
721-
722-
assert fields_info["category"]["type"] == "keyword"
723-
assert fields_info["status"]["type"] == "keyword"
724-
assert fields_info["priority"]["type"] == "long"
725-
726-
def test_get_metadata_field_min_max(self, document_store: OpenSearchDocumentStore):
727-
# Test with integer values
728-
docs = [
729-
Document(content="Doc 1", meta={"priority": 1, "age": 10}),
730-
Document(content="Doc 2", meta={"priority": 5, "age": 20}),
731-
Document(content="Doc 3", meta={"priority": 3, "age": 15}),
732-
Document(content="Doc 4", meta={"priority": 10, "age": 5}),
733-
Document(content="Doc 6", meta={"rating": 10.5}),
734-
Document(content="Doc 7", meta={"rating": 20.3}),
735-
Document(content="Doc 8", meta={"rating": 15.7}),
736-
Document(content="Doc 9", meta={"rating": 5.2}),
737-
]
738-
document_store.write_documents(docs)
739-
740-
# Test with "meta." prefix for integer field
741-
min_max_priority = document_store.get_metadata_field_min_max("meta.priority")
742-
assert min_max_priority["min"] == 1
743-
assert min_max_priority["max"] == 10
744-
745-
# Test with "meta." prefix for another integer field
746-
min_max_rating = document_store.get_metadata_field_min_max("meta.age")
747-
assert min_max_rating["min"] == 5
748-
assert min_max_rating["max"] == 20
749-
750-
# Test with single value
751-
single_doc = [Document(content="Doc 5", meta={"single_value": 42})]
752-
document_store.write_documents(single_doc)
753-
min_max_single = document_store.get_metadata_field_min_max("meta.single_value")
754-
assert min_max_single["min"] == 42
755-
assert min_max_single["max"] == 42
756-
757-
# Test with float values
758-
min_max_score = document_store.get_metadata_field_min_max("meta.rating")
759-
assert min_max_score["min"] == pytest.approx(5.2)
760-
assert min_max_score["max"] == pytest.approx(20.3)
761-
762623
def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocumentStore):
763624
# Test with string values
764625
docs = [

0 commit comments

Comments
 (0)