Skip to content

Commit 23c25d4

Browse files
feat: add get_meta_data async mixin tests to haystack.testing + InMemoryDocumentStore async operations and tests (#10963)
* adding get_metadata async related Mixin tests * adding get_metadata async methods to the InMemoryDocumentStore * using Mixin async metadata tests to InMemoryDocumentstore tests * adding release notes * double ticks in release notes * Update haystack/testing/document_store_async.py Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com> --------- Co-authored-by: Stefano Fiorucci <stefanofiorucci@gmail.com>
1 parent 5b818ff commit 23c25d4

4 files changed

Lines changed: 222 additions & 0 deletions

File tree

haystack/document_stores/in_memory/document_store.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,44 @@ async def count_unique_metadata_by_filter_async(
908908
lambda: self.count_unique_metadata_by_filter(filters=filters, metadata_fields=metadata_fields),
909909
)
910910

911+
async def get_metadata_fields_info_async(self) -> dict[str, dict[str, str]]:
912+
"""
913+
Returns information about the metadata fields present in the stored documents.
914+
915+
Types are inferred from the stored values (keyword, int, float, boolean).
916+
917+
:returns: A dictionary mapping each metadata field name to a dict with a "type" key.
918+
"""
919+
return await asyncio.get_running_loop().run_in_executor(self.executor, self.get_metadata_fields_info)
920+
921+
async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
922+
"""
923+
Returns the minimum and maximum values for the given metadata field across all documents.
924+
925+
:param metadata_field: The metadata field name. Can include or omit the "meta." prefix.
926+
:returns: A dictionary with "min" and "max" keys. Returns `{"min": None, "max": None}`
927+
if the field is missing or has no values.
928+
"""
929+
return await asyncio.get_running_loop().run_in_executor(
930+
self.executor, lambda: self.get_metadata_field_min_max(metadata_field=metadata_field)
931+
)
932+
933+
async def get_metadata_field_unique_values_async(
934+
self, metadata_field: str, search_term: str | None = None
935+
) -> tuple[list[str], int]:
936+
"""
937+
Returns unique values for a metadata field, optionally filtered by a search term in content.
938+
939+
:param metadata_field: The metadata field name. Can include or omit the "meta." prefix.
940+
:param search_term: If set, only documents whose content contains this term (case-insensitive)
941+
are considered.
942+
:returns: A tuple of (list of unique values, total count of unique values).
943+
"""
944+
return await asyncio.get_running_loop().run_in_executor(
945+
self.executor,
946+
lambda: self.get_metadata_field_unique_values(metadata_field=metadata_field, search_term=search_term),
947+
)
948+
911949
async def delete_all_documents_async(self) -> None:
912950
"""
913951
Deletes all documents in the document store.

haystack/testing/document_store_async.py

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,3 +379,174 @@ async def test_delete_documents_non_existing_document_async(document_store: Asyn
379379

380380
# No Document has been deleted
381381
assert await document_store.count_documents_async() == 1
382+
383+
384+
class GetMetadataFieldsInfoAsyncTest:
385+
"""
386+
Tests for Document Store get_metadata_fields_info_async().
387+
388+
Only mix in for stores that implement get_metadata_fields_info_async.
389+
"""
390+
391+
@staticmethod
392+
@pytest.mark.asyncio
393+
async def test_get_metadata_fields_info_async(document_store: AsyncDocumentStore):
394+
"""Test get_metadata_fields_info_async() returns field names and types after writing documents."""
395+
docs = [
396+
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
397+
Document(content="Doc 2", meta={"category": "B", "status": "inactive", "rating": 0.5}),
398+
]
399+
await document_store.write_documents_async(docs)
400+
assert await document_store.count_documents_async() == 2
401+
402+
fields_info = await document_store.get_metadata_fields_info_async() # type:ignore[attr-defined]
403+
404+
assert "category" in fields_info
405+
assert "status" in fields_info
406+
assert "priority" in fields_info
407+
assert "rating" in fields_info
408+
for info in fields_info.values():
409+
assert isinstance(info, dict)
410+
assert "type" in info
411+
412+
@staticmethod
413+
@pytest.mark.asyncio
414+
async def test_get_metadata_fields_info_empty_collection_async(document_store: AsyncDocumentStore):
415+
"""Test get_metadata_fields_info_async() on an empty store."""
416+
assert await document_store.count_documents_async() == 0
417+
418+
fields_info = await document_store.get_metadata_fields_info_async() # type:ignore[attr-defined]
419+
assert fields_info == {}
420+
421+
422+
class GetMetadataFieldMinMaxAsyncTest:
423+
"""
424+
Tests for Document Store get_metadata_field_min_max_async().
425+
426+
Only mix in for stores that implement get_metadata_field_min_max_async.
427+
"""
428+
429+
@staticmethod
430+
@pytest.mark.asyncio
431+
async def test_get_metadata_field_min_max_numeric_async(document_store: AsyncDocumentStore):
432+
"""Test get_metadata_field_min_max_async() with integer field."""
433+
docs = [
434+
Document(content="Doc 1", meta={"priority": 1}),
435+
Document(content="Doc 2", meta={"priority": 5}),
436+
Document(content="Doc 3", meta={"priority": 3}),
437+
Document(content="Doc 4", meta={"priority": 10}),
438+
]
439+
await document_store.write_documents_async(docs)
440+
assert await document_store.count_documents_async() == 4
441+
442+
result = await document_store.get_metadata_field_min_max_async("priority") # type:ignore[attr-defined]
443+
assert result["min"] == 1
444+
assert result["max"] == 10
445+
446+
@staticmethod
447+
@pytest.mark.asyncio
448+
async def test_get_metadata_field_min_max_float_async(document_store: AsyncDocumentStore):
449+
"""Test get_metadata_field_min_max_async() with float field."""
450+
docs = [
451+
Document(content="Doc 1", meta={"rating": 0.6}),
452+
Document(content="Doc 2", meta={"rating": 0.95}),
453+
Document(content="Doc 3", meta={"rating": 0.8}),
454+
]
455+
await document_store.write_documents_async(docs)
456+
assert await document_store.count_documents_async() == 3
457+
458+
result = await document_store.get_metadata_field_min_max_async("rating") # type:ignore[attr-defined]
459+
460+
assert result["min"] == pytest.approx(0.6)
461+
assert result["max"] == pytest.approx(0.95)
462+
463+
@staticmethod
464+
@pytest.mark.asyncio
465+
async def test_get_metadata_field_min_max_single_value_async(document_store: AsyncDocumentStore):
466+
"""Test get_metadata_field_min_max_async() when field has only one value."""
467+
docs = [Document(content="Doc 1", meta={"priority": 42})]
468+
await document_store.write_documents_async(docs)
469+
assert await document_store.count_documents_async() == 1
470+
471+
result = await document_store.get_metadata_field_min_max_async("priority") # type:ignore[attr-defined]
472+
assert result["min"] == 42
473+
assert result["max"] == 42
474+
475+
@staticmethod
476+
@pytest.mark.asyncio
477+
async def test_get_metadata_field_min_max_empty_collection_async(document_store: AsyncDocumentStore):
478+
"""Test get_metadata_field_min_max_async() on an empty store."""
479+
assert await document_store.count_documents_async() == 0
480+
481+
result = await document_store.get_metadata_field_min_max_async("priority") # type:ignore[attr-defined]
482+
assert result["min"] is None
483+
assert result["max"] is None
484+
485+
@staticmethod
486+
@pytest.mark.asyncio
487+
async def test_get_metadata_field_min_max_meta_prefix_async(document_store: AsyncDocumentStore):
488+
"""Test get_metadata_field_min_max_async() with field names that include 'meta.' prefix."""
489+
docs = [
490+
Document(content="Doc 1", meta={"priority": 1, "age": 10}),
491+
Document(content="Doc 2", meta={"priority": 5, "age": 20}),
492+
Document(content="Doc 3", meta={"priority": 3, "age": 15}),
493+
Document(content="Doc 4", meta={"priority": 10, "age": 5}),
494+
Document(content="Doc 6", meta={"rating": 10.5}),
495+
Document(content="Doc 7", meta={"rating": 20.3}),
496+
Document(content="Doc 8", meta={"rating": 15.7}),
497+
Document(content="Doc 9", meta={"rating": 5.2}),
498+
]
499+
await document_store.write_documents_async(docs)
500+
501+
min_max_priority = await document_store.get_metadata_field_min_max_async("meta.priority") # type:ignore[attr-defined]
502+
assert min_max_priority["min"] == 1
503+
assert min_max_priority["max"] == 10
504+
505+
# Test with float values and "meta." prefix
506+
min_max_score = await document_store.get_metadata_field_min_max_async("meta.rating") # type:ignore[attr-defined]
507+
assert min_max_score["min"] == pytest.approx(5.2)
508+
assert min_max_score["max"] == pytest.approx(20.3)
509+
510+
511+
class GetMetadataFieldUniqueValuesAsyncTest:
512+
"""
513+
Tests for Document Store get_metadata_field_unique_values_async().
514+
515+
Only mix in for stores that implement get_metadata_field_unique_values_async.
516+
Expects the method to return (values_list, total_count) or (values_list, pagination_key).
517+
"""
518+
519+
@staticmethod
520+
@pytest.mark.asyncio
521+
async def test_get_metadata_field_unique_values_basic_async(document_store: AsyncDocumentStore):
522+
"""Test get_metadata_field_unique_values_async() returns unique values and total count."""
523+
docs = [
524+
Document(content="Doc 1", meta={"category": "A"}),
525+
Document(content="Doc 2", meta={"category": "B"}),
526+
Document(content="Doc 3", meta={"category": "A"}),
527+
Document(content="Doc 4", meta={"category": "C"}),
528+
Document(content="Doc 5", meta={"category": "B"}),
529+
]
530+
await document_store.write_documents_async(docs)
531+
assert await document_store.count_documents_async() == 5
532+
533+
sig = inspect.signature(document_store.get_metadata_field_unique_values_async) # type:ignore[attr-defined]
534+
params: dict = {}
535+
if "search_term" in sig.parameters:
536+
params["search_term"] = None
537+
if "from_" in sig.parameters:
538+
params["from_"] = 0
539+
elif "offset" in sig.parameters:
540+
params["offset"] = 0
541+
if "size" in sig.parameters:
542+
params["size"] = 10
543+
elif "limit" in sig.parameters:
544+
params["limit"] = 10
545+
546+
result = await document_store.get_metadata_field_unique_values_async("category", **params) # type:ignore[attr-defined]
547+
548+
values = result[0] if isinstance(result, tuple) else result
549+
assert isinstance(values, list)
550+
assert set(values) == {"A", "B", "C"}
551+
if isinstance(result, tuple) and len(result) >= 2 and isinstance(result[1], int):
552+
assert result[1] == 3
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
enhancements:
3+
- |
4+
Add async variants of metadata methods to ``InMemoryDocumentStore``:
5+
``get_metadata_fields_info_async()``, ``get_metadata_field_min_max_async()``, and
6+
``get_metadata_field_unique_values_async()``. These rely on the store's thread-pool executor,
7+
consistent with the existing async method pattern.

test/document_stores/test_in_memory.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
from haystack.testing.document_store_async import (
3030
CountDocumentsAsyncTest,
3131
DeleteDocumentsAsyncTest,
32+
GetMetadataFieldMinMaxAsyncTest,
33+
GetMetadataFieldsInfoAsyncTest,
34+
GetMetadataFieldUniqueValuesAsyncTest,
3235
WriteDocumentsAsyncTest,
3336
)
3437

@@ -47,6 +50,9 @@ class TestMemoryDocumentStore(
4750
GetMetadataFieldMinMaxTest,
4851
GetMetadataFieldUniqueValuesTest,
4952
GetMetadataFieldsInfoTest,
53+
GetMetadataFieldsInfoAsyncTest,
54+
GetMetadataFieldMinMaxAsyncTest,
55+
GetMetadataFieldUniqueValuesAsyncTest,
5056
):
5157
"""
5258
Test InMemoryDocumentStore's specific features

0 commit comments

Comments
 (0)