diff --git a/integrations/azure_ai_search/pyproject.toml b/integrations/azure_ai_search/pyproject.toml index c69eba8b28..066011bd31 100644 --- a/integrations/azure_ai_search/pyproject.toml +++ b/integrations/azure_ai_search/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "haystack-ai>=2.26.0", + "haystack-ai>=2.26.1", "azure-search-documents>=11.5", "azure-identity" ] diff --git a/integrations/azure_ai_search/tests/test_document_store.py b/integrations/azure_ai_search/tests/test_document_store.py index 98f014502c..1c693f7943 100644 --- a/integrations/azure_ai_search/tests/test_document_store.py +++ b/integrations/azure_ai_search/tests/test_document_store.py @@ -19,12 +19,17 @@ from haystack.dataclasses.document import Document from haystack.errors import FilterError from haystack.testing.document_store import ( + CountDocumentsByFilterTest, CountDocumentsTest, + CountUniqueMetadataByFilterTest, DeleteAllTest, DeleteByFilterTest, DeleteDocumentsTest, FilterableDocsFixtureMixin, FilterDocumentsTest, + GetMetadataFieldMinMaxTest, + GetMetadataFieldsInfoTest, + GetMetadataFieldUniqueValuesTest, UpdateByFilterTest, WriteDocumentsTest, ) @@ -256,80 +261,6 @@ def _build_mock_document_store_with_schema(index_fields): return store, search_client, index_client -def test_count_documents_by_filter(): - index_fields = [ - SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), - SearchableField(name="content", type=SearchFieldDataType.String), - SimpleField(name="category", type=SearchFieldDataType.String, filterable=True), - ] - document_store, search_client, _ = _build_mock_document_store_with_schema(index_fields) - count_result = Mock() - count_result.get_count.return_value = 3 - search_client.search.return_value = count_result - - count = document_store.count_documents_by_filter({"field": "meta.category", "operator": "==", "value": "news"}) - - assert count == 3 - search_client.search.assert_called_once() - assert search_client.search.call_args.kwargs["include_total_count"] is True - - -def test_count_unique_metadata_by_filter(): - index_fields = [ - SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), - SearchableField(name="content", type=SearchFieldDataType.String), - SimpleField(name="category", type=SearchFieldDataType.String, filterable=True), - SimpleField(name="status", type=SearchFieldDataType.String, filterable=True), - ] - document_store, search_client, _ = _build_mock_document_store_with_schema(index_fields) - search_client.search.return_value = [ - {"category": "news", "status": "draft"}, - {"category": "docs", "status": "draft"}, - {"category": "news", "status": "published"}, - ] - - counts = document_store.count_unique_metadata_by_filter( - filters={"field": "meta.status", "operator": "!=", "value": "archived"}, - metadata_fields=["meta.category", "status"], - ) - - assert counts == {"category": 2, "status": 2} - - -def test_get_metadata_fields_info(): - index_fields = [ - SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), - SearchableField(name="content", type=SearchFieldDataType.String), - SimpleField(name="category", type=SearchFieldDataType.String, filterable=True), - SimpleField(name="status", type=SearchFieldDataType.String, filterable=True), - SimpleField(name="priority", type=SearchFieldDataType.Int32, filterable=True), - ] - document_store, _, _ = _build_mock_document_store_with_schema(index_fields) - - info = document_store.get_metadata_fields_info() - - assert info == { - "content": {"type": "text"}, - "category": {"type": "keyword"}, - "status": {"type": "keyword"}, - "priority": {"type": "long"}, - } - - -def test_get_metadata_field_min_max(): - index_fields = [ - SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), - SearchableField(name="content", type=SearchFieldDataType.String), - SimpleField(name="priority", type=SearchFieldDataType.Int32, filterable=True), - ] - document_store, search_client, _ = _build_mock_document_store_with_schema(index_fields) - search_client.search.return_value = [{"priority": 10}, {"priority": 2}, {"priority": 7}] - - result = document_store.get_metadata_field_min_max("meta.priority") - - assert result == {"min": 2, "max": 10} - - def test_get_metadata_field_unique_values(): index_fields = [ SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True), @@ -397,6 +328,11 @@ class TestDocumentStore( FilterableDocsFixtureMixin, WriteDocumentsTest, UpdateByFilterTest, + CountDocumentsByFilterTest, + CountUniqueMetadataByFilterTest, + GetMetadataFieldsInfoTest, + GetMetadataFieldMinMaxTest, + GetMetadataFieldUniqueValuesTest, ): def assert_documents_are_equal(self, received: list[Document], expected: list[Document]): _assert_documents_are_equal(received, expected) @@ -526,6 +462,135 @@ def test_update_by_filter_invalid_field(self, document_store: AzureAISearchDocum assert "nonexistent_field" in str(exc_info.value) assert "not defined in index schema" in str(exc_info.value) + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"category": str, "status": str}}], + indirect=True, + ) + def test_count_documents_by_filter_simple(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + CountDocumentsByFilterTest.test_count_documents_by_filter_simple(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"category": str, "status": str}}], + indirect=True, + ) + def test_count_documents_by_filter_compound(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + CountDocumentsByFilterTest.test_count_documents_by_filter_compound(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"category": str}}], + indirect=True, + ) + def test_count_documents_by_filter_no_matches(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + CountDocumentsByFilterTest.test_count_documents_by_filter_no_matches(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"category": str}}], + indirect=True, + ) + def test_count_documents_by_filter_empty_collection(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + CountDocumentsByFilterTest.test_count_documents_by_filter_empty_collection(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"category": str, "status": str, "priority": int}}], + indirect=True, + ) + def test_count_unique_metadata_by_filter_all_documents(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + CountUniqueMetadataByFilterTest.test_count_unique_metadata_by_filter_all_documents(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"category": str, "status": str, "priority": int}}], + indirect=True, + ) + def test_count_unique_metadata_by_filter_with_filter(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + CountUniqueMetadataByFilterTest.test_count_unique_metadata_by_filter_with_filter(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"category": str, "year": int}}], + indirect=True, + ) + def test_count_unique_metadata_by_filter_with_multiple_filters(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + CountUniqueMetadataByFilterTest.test_count_unique_metadata_by_filter_with_multiple_filters(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"category": str, "status": str, "priority": int, "rating": float}}], + indirect=True, + ) + def test_get_metadata_fields_info(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + GetMetadataFieldsInfoTest.test_get_metadata_fields_info(document_store) + + @pytest.mark.skip(reason="Azure AI Search returns index schema fields even on empty collections.") + def test_get_metadata_fields_info_empty_collection(self, document_store: AzureAISearchDocumentStore): ... + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"priority": int}}], + indirect=True, + ) + def test_get_metadata_field_min_max_numeric(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_numeric(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"rating": float}}], + indirect=True, + ) + def test_get_metadata_field_min_max_float(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_float(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"priority": int}}], + indirect=True, + ) + def test_get_metadata_field_min_max_single_value(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_single_value(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"priority": int}}], + indirect=True, + ) + def test_get_metadata_field_min_max_empty_collection(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_empty_collection(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"priority": int, "age": int, "rating": float}}], + indirect=True, + ) + def test_get_metadata_field_min_max_meta_prefix(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_meta_prefix(document_store) + + @pytest.mark.parametrize( + "document_store", + [{"metadata_fields": {"category": str}}], + indirect=True, + ) + def test_get_metadata_field_unique_values_basic(self, document_store: AzureAISearchDocumentStore): + """Override to use a document_store with required metadata fields.""" + GetMetadataFieldUniqueValuesTest.test_get_metadata_field_unique_values_basic(document_store) + @pytest.mark.parametrize( "document_store", [{"metadata_fields": {"category": str, "status": str, "priority": int}}],