Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/azure_ai_search/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"haystack-ai>=2.26.0",
"haystack-ai>=2.26.1",
"azure-search-documents>=11.5",
"azure-identity"
]
Expand Down
213 changes: 139 additions & 74 deletions integrations/azure_ai_search/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@
from haystack.dataclasses.document import Document
from haystack.errors import FilterError
from haystack.testing.document_store import (
CountDocumentsByFilterTest,
CountDocumentsTest,
CountUniqueMetadataByFilterTest,
DeleteAllTest,
DeleteByFilterTest,
DeleteDocumentsTest,
FilterableDocsFixtureMixin,
FilterDocumentsTest,
GetMetadataFieldMinMaxTest,
GetMetadataFieldsInfoTest,
GetMetadataFieldUniqueValuesTest,
UpdateByFilterTest,
WriteDocumentsTest,
)
Expand Down Expand Up @@ -256,80 +261,6 @@ def _build_mock_document_store_with_schema(index_fields):
return store, search_client, index_client


def test_count_documents_by_filter():
index_fields = [
SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True),
SearchableField(name="content", type=SearchFieldDataType.String),
SimpleField(name="category", type=SearchFieldDataType.String, filterable=True),
]
document_store, search_client, _ = _build_mock_document_store_with_schema(index_fields)
count_result = Mock()
count_result.get_count.return_value = 3
search_client.search.return_value = count_result

count = document_store.count_documents_by_filter({"field": "meta.category", "operator": "==", "value": "news"})

assert count == 3
search_client.search.assert_called_once()
assert search_client.search.call_args.kwargs["include_total_count"] is True


def test_count_unique_metadata_by_filter():
index_fields = [
SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True),
SearchableField(name="content", type=SearchFieldDataType.String),
SimpleField(name="category", type=SearchFieldDataType.String, filterable=True),
SimpleField(name="status", type=SearchFieldDataType.String, filterable=True),
]
document_store, search_client, _ = _build_mock_document_store_with_schema(index_fields)
search_client.search.return_value = [
{"category": "news", "status": "draft"},
{"category": "docs", "status": "draft"},
{"category": "news", "status": "published"},
]

counts = document_store.count_unique_metadata_by_filter(
filters={"field": "meta.status", "operator": "!=", "value": "archived"},
metadata_fields=["meta.category", "status"],
)

assert counts == {"category": 2, "status": 2}


def test_get_metadata_fields_info():
index_fields = [
SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True),
SearchableField(name="content", type=SearchFieldDataType.String),
SimpleField(name="category", type=SearchFieldDataType.String, filterable=True),
SimpleField(name="status", type=SearchFieldDataType.String, filterable=True),
SimpleField(name="priority", type=SearchFieldDataType.Int32, filterable=True),
]
document_store, _, _ = _build_mock_document_store_with_schema(index_fields)

info = document_store.get_metadata_fields_info()

assert info == {
"content": {"type": "text"},
"category": {"type": "keyword"},
"status": {"type": "keyword"},
"priority": {"type": "long"},
}


def test_get_metadata_field_min_max():
index_fields = [
SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True),
SearchableField(name="content", type=SearchFieldDataType.String),
SimpleField(name="priority", type=SearchFieldDataType.Int32, filterable=True),
]
document_store, search_client, _ = _build_mock_document_store_with_schema(index_fields)
search_client.search.return_value = [{"priority": 10}, {"priority": 2}, {"priority": 7}]

result = document_store.get_metadata_field_min_max("meta.priority")

assert result == {"min": 2, "max": 10}


def test_get_metadata_field_unique_values():
index_fields = [
SimpleField(name="id", type=SearchFieldDataType.String, key=True, filterable=True),
Expand Down Expand Up @@ -397,6 +328,11 @@ class TestDocumentStore(
FilterableDocsFixtureMixin,
WriteDocumentsTest,
UpdateByFilterTest,
CountDocumentsByFilterTest,
CountUniqueMetadataByFilterTest,
GetMetadataFieldsInfoTest,
GetMetadataFieldMinMaxTest,
GetMetadataFieldUniqueValuesTest,
):
def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
_assert_documents_are_equal(received, expected)
Expand Down Expand Up @@ -526,6 +462,135 @@ def test_update_by_filter_invalid_field(self, document_store: AzureAISearchDocum
assert "nonexistent_field" in str(exc_info.value)
assert "not defined in index schema" in str(exc_info.value)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str, "status": str}}],
indirect=True,
)
def test_count_documents_by_filter_simple(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
CountDocumentsByFilterTest.test_count_documents_by_filter_simple(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str, "status": str}}],
indirect=True,
)
def test_count_documents_by_filter_compound(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
CountDocumentsByFilterTest.test_count_documents_by_filter_compound(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str}}],
indirect=True,
)
def test_count_documents_by_filter_no_matches(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
CountDocumentsByFilterTest.test_count_documents_by_filter_no_matches(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str}}],
indirect=True,
)
def test_count_documents_by_filter_empty_collection(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
CountDocumentsByFilterTest.test_count_documents_by_filter_empty_collection(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str, "status": str, "priority": int}}],
indirect=True,
)
def test_count_unique_metadata_by_filter_all_documents(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
CountUniqueMetadataByFilterTest.test_count_unique_metadata_by_filter_all_documents(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str, "status": str, "priority": int}}],
indirect=True,
)
def test_count_unique_metadata_by_filter_with_filter(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
CountUniqueMetadataByFilterTest.test_count_unique_metadata_by_filter_with_filter(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str, "year": int}}],
indirect=True,
)
def test_count_unique_metadata_by_filter_with_multiple_filters(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
CountUniqueMetadataByFilterTest.test_count_unique_metadata_by_filter_with_multiple_filters(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str, "status": str, "priority": int, "rating": float}}],
indirect=True,
)
def test_get_metadata_fields_info(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
GetMetadataFieldsInfoTest.test_get_metadata_fields_info(document_store)

@pytest.mark.skip(reason="Azure AI Search returns index schema fields even on empty collections.")
def test_get_metadata_fields_info_empty_collection(self, document_store: AzureAISearchDocumentStore): ...

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"priority": int}}],
indirect=True,
)
def test_get_metadata_field_min_max_numeric(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_numeric(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"rating": float}}],
indirect=True,
)
def test_get_metadata_field_min_max_float(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_float(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"priority": int}}],
indirect=True,
)
def test_get_metadata_field_min_max_single_value(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_single_value(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"priority": int}}],
indirect=True,
)
def test_get_metadata_field_min_max_empty_collection(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_empty_collection(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"priority": int, "age": int, "rating": float}}],
indirect=True,
)
def test_get_metadata_field_min_max_meta_prefix(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
GetMetadataFieldMinMaxTest.test_get_metadata_field_min_max_meta_prefix(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str}}],
indirect=True,
)
def test_get_metadata_field_unique_values_basic(self, document_store: AzureAISearchDocumentStore):
"""Override to use a document_store with required metadata fields."""
GetMetadataFieldUniqueValuesTest.test_get_metadata_field_unique_values_basic(document_store)

@pytest.mark.parametrize(
"document_store",
[{"metadata_fields": {"category": str, "status": str, "priority": int}}],
Expand Down
Loading