Skip to content

Commit bb28cc4

Browse files
test: AstraDocumentStore use Mixin tests (#3027)
* _treat_meta_field and comparison functions now return Composed * using Composable in function sigs, base class of both SQL and Composed * removing all str safeguards + adding back a comment * get_metadata_field_min_max and get_metadata_field_unique_values now strip the 'meta.' prefix from the field name before querying to avoid double-prefixing. * removing redundant/duplicated tests now that Mixin is being used * chore: update haystack-ai dependency to >=2.26.1 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * small fix * isort * strip the meta. prefix before building the field path --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 0341bf9 commit bb28cc4

3 files changed

Lines changed: 22 additions & 79 deletions

File tree

integrations/astra/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ classifiers = [
2424
]
2525
dependencies = [
2626
"astrapy>=1.5.0,<2.0",
27-
"haystack-ai>=2.24.0",
27+
"haystack-ai>=2.26.1",
2828
"pydantic",
2929
"typing_extensions",
3030
]

integrations/astra/src/haystack_integrations/document_stores/astra/document_store.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,9 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
602602
:param metadata_field: The metadata field to inspect.
603603
:returns: A dictionary with `min` and `max`.
604604
"""
605-
distinct_values = self.index.distinct(f"meta.{metadata_field}")
605+
606+
field = metadata_field.removeprefix("meta.")
607+
distinct_values = self.index.distinct(f"meta.{field}")
606608
comparable_values = [value for value in distinct_values if isinstance(value, str | int | float | bool)]
607609
if not comparable_values:
608610
return {"min": None, "max": None}
@@ -621,7 +623,8 @@ def get_metadata_field_unique_values(
621623
:param size: The number of values to return.
622624
:returns: A tuple containing the paginated values and the total count.
623625
"""
624-
values = AstraDocumentStore._normalize_distinct_values(self.index.distinct(f"meta.{metadata_field}"))
626+
field = metadata_field.removeprefix("meta.")
627+
values = AstraDocumentStore._normalize_distinct_values(self.index.distinct(f"meta.{field}"))
625628
if search_term:
626629
search_term_lower = search_term.lower()
627630
values = [value for value in values if search_term_lower in value.lower()]

integrations/astra/tests/test_document_store.py

Lines changed: 16 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,14 @@
1010
from haystack import Document
1111
from haystack.document_stores.errors import MissingDocumentError
1212
from haystack.document_stores.types import DuplicatePolicy
13-
from haystack.testing.document_store import DocumentStoreBaseExtendedTests
13+
from haystack.testing.document_store import (
14+
CountDocumentsByFilterTest,
15+
CountUniqueMetadataByFilterTest,
16+
DocumentStoreBaseExtendedTests,
17+
GetMetadataFieldMinMaxTest,
18+
GetMetadataFieldsInfoTest,
19+
GetMetadataFieldUniqueValuesTest,
20+
)
1421

1522
from haystack_integrations.document_stores.astra import AstraDocumentStore
1623

@@ -135,7 +142,14 @@ def test_get_metadata_field_unique_values(mock_astra_client):
135142
os.environ.get("ASTRA_DB_APPLICATION_TOKEN", "") == "", reason="ASTRA_DB_APPLICATION_TOKEN env var not set"
136143
)
137144
@pytest.mark.skipif(os.environ.get("ASTRA_DB_API_ENDPOINT", "") == "", reason="ASTRA_DB_API_ENDPOINT env var not set")
138-
class TestDocumentStore(DocumentStoreBaseExtendedTests):
145+
class TestDocumentStore(
146+
DocumentStoreBaseExtendedTests,
147+
CountDocumentsByFilterTest,
148+
CountUniqueMetadataByFilterTest,
149+
GetMetadataFieldsInfoTest,
150+
GetMetadataFieldMinMaxTest,
151+
GetMetadataFieldUniqueValuesTest,
152+
):
139153
"""
140154
Common test cases will be provided by `DocumentStoreBaseExtendedTests` but
141155
you can add more to this class.
@@ -292,80 +306,6 @@ def test_filter_documents_by_in_operator(self, document_store):
292306
TestDocumentStore.assert_documents_are_equal([result[0]], [docs[0]])
293307
TestDocumentStore.assert_documents_are_equal([result[1]], [docs[1]])
294308

295-
def test_count_documents_by_filter(self, document_store: AstraDocumentStore):
296-
docs = [
297-
Document(id="1", content="Doc 1", meta={"category": "news", "status": "published", "priority": 3}),
298-
Document(id="2", content="Doc 2", meta={"category": "docs", "status": "draft", "priority": 1}),
299-
Document(id="3", content="Doc 3", meta={"category": "news", "status": "published", "priority": 5}),
300-
]
301-
document_store.write_documents(docs)
302-
303-
count = document_store.count_documents_by_filter(
304-
{"field": "meta.status", "operator": "==", "value": "published"}
305-
)
306-
307-
assert count == 2
308-
309-
def test_count_unique_metadata_by_filter(self, document_store: AstraDocumentStore):
310-
docs = [
311-
Document(id="1", content="Doc 1", meta={"category": "news", "status": "published", "priority": 1}),
312-
Document(id="2", content="Doc 2", meta={"category": "docs", "status": "published", "priority": 2}),
313-
Document(id="3", content="Doc 3", meta={"category": "news", "status": "published", "priority": 2}),
314-
Document(id="4", content="Doc 4", meta={"category": "faq", "status": "draft", "priority": 3}),
315-
]
316-
document_store.write_documents(docs)
317-
318-
counts = document_store.count_unique_metadata_by_filter(
319-
{"field": "meta.status", "operator": "==", "value": "published"},
320-
["category", "priority"],
321-
)
322-
323-
assert counts == {"category": 2, "priority": 2}
324-
325-
def test_get_metadata_fields_info(self, document_store: AstraDocumentStore):
326-
docs = [
327-
Document(id="1", content="Doc 1", meta={"category": "news", "status": "published", "priority": 1}),
328-
Document(id="2", content="Doc 2", meta={"category": "docs", "status": "draft", "priority": 2}),
329-
]
330-
document_store.write_documents(docs)
331-
332-
fields_info = document_store.get_metadata_fields_info()
333-
334-
assert fields_info == {
335-
"content": {"type": "text"},
336-
"category": {"type": "keyword"},
337-
"status": {"type": "keyword"},
338-
"priority": {"type": "long"},
339-
}
340-
341-
def test_get_metadata_field_min_max(self, document_store: AstraDocumentStore):
342-
docs = [
343-
Document(id="1", content="Doc 1", meta={"priority": 3}),
344-
Document(id="2", content="Doc 2", meta={"priority": 1}),
345-
Document(id="3", content="Doc 3", meta={"priority": 7}),
346-
]
347-
document_store.write_documents(docs)
348-
349-
result = document_store.get_metadata_field_min_max("priority")
350-
351-
assert result == {"min": 1, "max": 7}
352-
353-
def test_get_metadata_field_unique_values(self, document_store: AstraDocumentStore):
354-
docs = [
355-
Document(id="1", content="Doc 1", meta={"category": "alpha"}),
356-
Document(id="2", content="Doc 2", meta={"category": "beta"}),
357-
Document(id="3", content="Doc 3", meta={"category": "alphabet"}),
358-
Document(id="4", content="Doc 4", meta={"category": "gamma"}),
359-
]
360-
document_store.write_documents(docs)
361-
362-
values, total_count = document_store.get_metadata_field_unique_values(
363-
"category", search_term="alp", from_=0, size=10
364-
)
365-
366-
assert values == ["alpha", "alphabet"]
367-
assert total_count == 2
368-
369309
@pytest.mark.skip(reason="Unsupported filter operator not.")
370310
def test_not_operator(self, document_store, filterable_docs):
371311
pass

0 commit comments

Comments
 (0)