diff --git a/integrations/opensearch/pyproject.toml b/integrations/opensearch/pyproject.toml index 22547cfd9e..53276952a2 100644 --- a/integrations/opensearch/pyproject.toml +++ b/integrations/opensearch/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ ] dependencies = [ - "haystack-ai>=2.26.0", + "haystack-ai>=2.26.1", "opensearch-py[async]>=3.0.0" ] diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py index fac751035f..37a83884ac 100644 --- a/integrations/opensearch/tests/test_document_store.py +++ b/integrations/opensearch/tests/test_document_store.py @@ -9,7 +9,14 @@ from haystack.dataclasses.document import Document from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy -from haystack.testing.document_store import DocumentStoreBaseExtendedTests +from haystack.testing.document_store import ( + CountDocumentsByFilterTest, + CountUniqueMetadataByFilterTest, + DocumentStoreBaseExtendedTests, + GetMetadataFieldMinMaxTest, + GetMetadataFieldsInfoTest, + GetMetadataFieldUniqueValuesTest, +) from opensearchpy.exceptions import RequestError from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore @@ -218,10 +225,16 @@ def test_routing_in_delete(mock_bulk, _mock_opensearch_client): @pytest.mark.integration -class TestDocumentStore(DocumentStoreBaseExtendedTests): +class TestDocumentStore( + CountDocumentsByFilterTest, + CountUniqueMetadataByFilterTest, + DocumentStoreBaseExtendedTests, + GetMetadataFieldsInfoTest, + GetMetadataFieldMinMaxTest, + GetMetadataFieldUniqueValuesTest, +): """ - Common test cases will be provided by `DocumentStoreBaseExtendedTests` but - you can add more to this class. + Common test cases will be provided by `DocumentStoreBaseExtendedTests` but you can add more to this class. """ @pytest.fixture @@ -607,158 +620,6 @@ def test_delete_all_documents_index_recreation(self, document_store: OpenSearchD assert len(results) == 1 assert results[0].content == "New document after delete all" - def test_count_documents_by_filter(self, document_store: OpenSearchDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A", "status": "active"}), - Document(content="Doc 2", meta={"category": "B", "status": "active"}), - Document(content="Doc 3", meta={"category": "A", "status": "inactive"}), - Document(content="Doc 4", meta={"category": "A", "status": "active"}), - ] - document_store.write_documents(docs) - assert document_store.count_documents() == 4 - - count_a = document_store.count_documents_by_filter( - filters={"field": "meta.category", "operator": "==", "value": "A"} - ) - assert count_a == 3 - - count_a_active = document_store.count_documents_by_filter( - filters={ - "operator": "AND", - "conditions": [ - {"field": "meta.category", "operator": "==", "value": "A"}, - {"field": "meta.status", "operator": "==", "value": "active"}, - ], - } - ) - assert count_a_active == 2 - - def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), - Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}), - Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}), - Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}), - Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}), - ] - document_store.write_documents(docs) - assert document_store.count_documents() == 5 - - # Count distinct values for all documents - distinct_counts = document_store.count_unique_metadata_by_filter( - filters={}, metadata_fields=["category", "status", "priority"] - ) - assert distinct_counts["category"] == 3 # A, B, C - assert distinct_counts["status"] == 2 # active, inactive - assert distinct_counts["priority"] == 3 # 1, 2, 3 - - # Count distinct values for documents with category="A" - distinct_counts_a = document_store.count_unique_metadata_by_filter( - filters={"field": "meta.category", "operator": "==", "value": "A"}, - metadata_fields=["category", "status", "priority"], - ) - assert distinct_counts_a["category"] == 1 # Only A - assert distinct_counts_a["status"] == 2 # active, inactive - assert distinct_counts_a["priority"] == 2 # 1, 3 - - # Count distinct values for documents with status="active" - distinct_counts_active = document_store.count_unique_metadata_by_filter( - filters={"field": "meta.status", "operator": "==", "value": "active"}, - metadata_fields=["category", "status", "priority"], - ) - assert distinct_counts_active["category"] == 3 # A, B, C - assert distinct_counts_active["status"] == 1 # Only active - assert distinct_counts_active["priority"] == 3 # 1, 2, 3 - - # Count distinct values with complex filter (category="A" AND status="active") - distinct_counts_a_active = document_store.count_unique_metadata_by_filter( - filters={ - "operator": "AND", - "conditions": [ - {"field": "meta.category", "operator": "==", "value": "A"}, - {"field": "meta.status", "operator": "==", "value": "active"}, - ], - }, - metadata_fields=["category", "status", "priority"], - ) - assert distinct_counts_a_active["category"] == 1 # Only A - assert distinct_counts_a_active["status"] == 1 # Only active - assert distinct_counts_a_active["priority"] == 2 # 1, 3 - - # Test with only a subset of fields - distinct_counts_subset = document_store.count_unique_metadata_by_filter( - filters={}, metadata_fields=["category", "status"] - ) - assert distinct_counts_subset["category"] == 3 - assert distinct_counts_subset["status"] == 2 - assert "priority" not in distinct_counts_subset - - # Test field name normalization (with "meta." prefix) - distinct_counts_normalized = document_store.count_unique_metadata_by_filter( - filters={}, metadata_fields=["meta.category", "status", "meta.priority"] - ) - assert distinct_counts_normalized["category"] == 3 - assert distinct_counts_normalized["status"] == 2 - assert distinct_counts_normalized["priority"] == 3 - - # Test error handling when field doesn't exist - with pytest.raises(ValueError, match="Fields not found in index mapping"): - document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["nonexistent_field"]) - - def test_get_metadata_fields_info(self, document_store: OpenSearchDocumentStore): - docs = [ - Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), - Document(content="Doc 2", meta={"category": "B", "status": "inactive"}), - ] - document_store.write_documents(docs) - - fields_info = document_store.get_metadata_fields_info() - - # Verify that fields_info contains expected fields - assert "category" in fields_info - assert "status" in fields_info - assert "priority" in fields_info - - assert fields_info["category"]["type"] == "keyword" - assert fields_info["status"]["type"] == "keyword" - assert fields_info["priority"]["type"] == "long" - - def test_get_metadata_field_min_max(self, document_store: OpenSearchDocumentStore): - # Test with integer values - docs = [ - Document(content="Doc 1", meta={"priority": 1, "age": 10}), - Document(content="Doc 2", meta={"priority": 5, "age": 20}), - Document(content="Doc 3", meta={"priority": 3, "age": 15}), - Document(content="Doc 4", meta={"priority": 10, "age": 5}), - Document(content="Doc 6", meta={"rating": 10.5}), - Document(content="Doc 7", meta={"rating": 20.3}), - Document(content="Doc 8", meta={"rating": 15.7}), - Document(content="Doc 9", meta={"rating": 5.2}), - ] - document_store.write_documents(docs) - - # Test with "meta." prefix for integer field - min_max_priority = document_store.get_metadata_field_min_max("meta.priority") - assert min_max_priority["min"] == 1 - assert min_max_priority["max"] == 10 - - # Test with "meta." prefix for another integer field - min_max_rating = document_store.get_metadata_field_min_max("meta.age") - assert min_max_rating["min"] == 5 - assert min_max_rating["max"] == 20 - - # Test with single value - single_doc = [Document(content="Doc 5", meta={"single_value": 42})] - document_store.write_documents(single_doc) - min_max_single = document_store.get_metadata_field_min_max("meta.single_value") - assert min_max_single["min"] == 42 - assert min_max_single["max"] == 42 - - # Test with float values - min_max_score = document_store.get_metadata_field_min_max("meta.rating") - assert min_max_score["min"] == pytest.approx(5.2) - assert min_max_score["max"] == pytest.approx(20.3) - def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocumentStore): # Test with string values docs = [