Skip to content

Commit d047e93

Browse files
fix: UpdateByFilterAsyncTest, CountDocumentsByFilterAsyncTest, CountUniqueMetadataByFilterAsyncTest (#10953)
* fix: address #10920 * formatting --------- Co-authored-by: David S. Batista <dsbatista@gmail.com>
1 parent bfd35a3 commit d047e93

4 files changed

Lines changed: 244 additions & 0 deletions

File tree

haystack/document_stores/in_memory/document_store.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,51 @@ async def delete_documents_async(self, document_ids: list[str]) -> None:
863863
self.executor, lambda: self.delete_documents(document_ids=document_ids)
864864
)
865865

866+
async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str, Any]) -> int:
867+
"""
868+
Updates the metadata of all documents that match the provided filters.
869+
870+
:param filters: The filters to apply to select documents for updating.
871+
For filter syntax, see filter_documents.
872+
:param meta: The metadata fields to update. These will be merged with existing metadata.
873+
:returns: The number of documents updated.
874+
"""
875+
return await asyncio.get_running_loop().run_in_executor(
876+
self.executor, lambda: self.update_by_filter(filters=filters, meta=meta)
877+
)
878+
879+
async def count_documents_by_filter_async(self, filters: dict[str, Any]) -> int:
880+
"""
881+
Returns the number of documents that match the provided filters.
882+
883+
:param filters: The filters to apply.
884+
For a detailed specification of the filters, refer to the
885+
[documentation](https://docs.haystack.deepset.ai/docs/metadata-filtering).
886+
:returns: The number of documents that match the filters.
887+
"""
888+
return await asyncio.get_running_loop().run_in_executor(
889+
self.executor, lambda: self.count_documents_by_filter(filters=filters)
890+
)
891+
892+
async def count_unique_metadata_by_filter_async(
893+
self, filters: dict[str, Any], metadata_fields: list[str]
894+
) -> dict[str, int]:
895+
"""
896+
Returns the number of unique values for each specified metadata field from documents matching the filters.
897+
898+
:param filters: The filters to apply.
899+
For a detailed specification of the filters, refer to the
900+
[documentation](https://docs.haystack.deepset.ai/docs/metadata-filtering).
901+
:param metadata_fields: List of field names to count unique values for.
902+
Field names can include or omit the "meta." prefix.
903+
:returns: A dictionary mapping each metadata field name (without "meta." prefix)
904+
to the count of its unique values among the filtered documents.
905+
"""
906+
return await asyncio.get_running_loop().run_in_executor(
907+
self.executor,
908+
lambda: self.count_unique_metadata_by_filter(filters=filters, metadata_fields=metadata_fields),
909+
)
910+
866911
async def delete_all_documents_async(self) -> None:
867912
"""
868913
Deletes all documents in the document store.

haystack/testing/document_store.py

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,43 @@ def test_update_by_filter_advanced_filters(document_store: DocumentStore):
930930
assert len(featured_docs) == 2
931931

932932

933+
class UpdateByFilterAsyncTest:
934+
"""
935+
Tests for Document Store update_by_filter_async().
936+
937+
Only mix in for stores that implement update_by_filter_async.
938+
"""
939+
940+
@staticmethod
941+
@pytest.mark.asyncio
942+
async def test_update_by_filter_async(document_store: DocumentStore, filterable_docs: list[Document]):
943+
"""Update documents matching a filter asynchronously and verify count and meta changes."""
944+
document_store.write_documents(filterable_docs)
945+
expected_count = len([d for d in filterable_docs if d.meta.get("chapter") == "intro"])
946+
assert document_store.count_documents() == len(filterable_docs)
947+
948+
sig = inspect.signature(document_store.update_by_filter_async) # type:ignore[attr-defined]
949+
params = {"refresh": True} if "refresh" in sig.parameters else {}
950+
updated_count = await document_store.update_by_filter_async( # type:ignore[attr-defined]
951+
filters={"field": "meta.chapter", "operator": "==", "value": "intro"}, meta={"updated": True}, **params
952+
)
953+
assert updated_count == expected_count
954+
955+
updated_docs = document_store.filter_documents(
956+
filters={"field": "meta.updated", "operator": "==", "value": True}
957+
)
958+
assert len(updated_docs) == expected_count
959+
for doc in updated_docs:
960+
assert doc.meta["chapter"] == "intro"
961+
assert doc.meta["updated"] is True
962+
963+
not_updated_docs = document_store.filter_documents(
964+
filters={"field": "meta.chapter", "operator": "==", "value": "abstract"}
965+
)
966+
for doc in not_updated_docs:
967+
assert doc.meta.get("updated") is not True
968+
969+
933970
class CountDocumentsByFilterTest:
934971
"""
935972
Tests for Document Store count_documents_by_filter().
@@ -1005,6 +1042,85 @@ def test_count_documents_by_filter_empty_collection(document_store: DocumentStor
10051042
assert count == 0
10061043

10071044

1045+
class CountDocumentsByFilterAsyncTest:
1046+
"""
1047+
Tests for Document Store count_documents_by_filter_async().
1048+
1049+
Only mix in for stores that implement count_documents_by_filter_async.
1050+
"""
1051+
1052+
@staticmethod
1053+
@pytest.mark.asyncio
1054+
async def test_count_documents_by_filter_async_simple(document_store: DocumentStore):
1055+
"""Test count_documents_by_filter_async() with a simple equality filter."""
1056+
docs = [
1057+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
1058+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
1059+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
1060+
Document(content="Doc 4", meta={"category": "A", "status": "active"}),
1061+
]
1062+
document_store.write_documents(docs)
1063+
assert document_store.count_documents() == 4
1064+
1065+
count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined]
1066+
filters={"field": "meta.category", "operator": "==", "value": "A"}
1067+
)
1068+
assert count == 3
1069+
1070+
count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined]
1071+
filters={"field": "meta.category", "operator": "==", "value": "B"}
1072+
)
1073+
assert count == 1
1074+
1075+
@staticmethod
1076+
@pytest.mark.asyncio
1077+
async def test_count_documents_by_filter_async_compound(document_store: DocumentStore):
1078+
"""Test count_documents_by_filter_async() with AND filter."""
1079+
docs = [
1080+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
1081+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
1082+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
1083+
Document(content="Doc 4", meta={"category": "A", "status": "active"}),
1084+
]
1085+
document_store.write_documents(docs)
1086+
assert document_store.count_documents() == 4
1087+
1088+
count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined]
1089+
filters={
1090+
"operator": "AND",
1091+
"conditions": [
1092+
{"field": "meta.category", "operator": "==", "value": "A"},
1093+
{"field": "meta.status", "operator": "==", "value": "active"},
1094+
],
1095+
}
1096+
)
1097+
assert count == 2
1098+
1099+
@staticmethod
1100+
@pytest.mark.asyncio
1101+
async def test_count_documents_by_filter_async_no_matches(document_store: DocumentStore):
1102+
"""Test count_documents_by_filter_async() when filter matches no documents."""
1103+
docs = [Document(content="Doc 1", meta={"category": "A"}), Document(content="Doc 2", meta={"category": "B"})]
1104+
document_store.write_documents(docs)
1105+
assert document_store.count_documents() == 2
1106+
1107+
count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined]
1108+
filters={"field": "meta.category", "operator": "==", "value": "Z"}
1109+
)
1110+
assert count == 0
1111+
1112+
@staticmethod
1113+
@pytest.mark.asyncio
1114+
async def test_count_documents_by_filter_async_empty_collection(document_store: DocumentStore):
1115+
"""Test count_documents_by_filter_async() on an empty store."""
1116+
assert document_store.count_documents() == 0
1117+
1118+
count = await document_store.count_documents_by_filter_async( # type:ignore[attr-defined]
1119+
filters={"field": "meta.category", "operator": "==", "value": "A"}
1120+
)
1121+
assert count == 0
1122+
1123+
10081124
class CountUniqueMetadataByFilterTest:
10091125
"""
10101126
Tests for Document Store count_unique_metadata_by_filter().
@@ -1072,6 +1188,78 @@ def test_count_unique_metadata_by_filter_with_multiple_filters(document_store: D
10721188
assert count == 1
10731189

10741190

1191+
class CountUniqueMetadataByFilterAsyncTest:
1192+
"""
1193+
Tests for Document Store count_unique_metadata_by_filter_async().
1194+
1195+
Only mix in for stores that implement count_unique_metadata_by_filter_async.
1196+
"""
1197+
1198+
@staticmethod
1199+
@pytest.mark.asyncio
1200+
async def test_count_unique_metadata_by_filter_async_all_documents(document_store: DocumentStore):
1201+
"""Test count_unique_metadata_by_filter_async() with no filter returns distinct counts for all docs."""
1202+
docs = [
1203+
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
1204+
Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
1205+
Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}),
1206+
Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}),
1207+
Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}),
1208+
]
1209+
document_store.write_documents(docs)
1210+
assert document_store.count_documents() == 5
1211+
1212+
counts = await document_store.count_unique_metadata_by_filter_async( # type:ignore[attr-defined]
1213+
filters={}, metadata_fields=["category", "status", "priority"]
1214+
)
1215+
assert counts["category"] == 3
1216+
assert counts["status"] == 2
1217+
assert counts["priority"] == 3
1218+
1219+
@staticmethod
1220+
@pytest.mark.asyncio
1221+
async def test_count_unique_metadata_by_filter_async_with_filter(document_store: DocumentStore):
1222+
"""Test count_unique_metadata_by_filter_async() with a filter."""
1223+
docs = [
1224+
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
1225+
Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
1226+
Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}),
1227+
Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}),
1228+
]
1229+
document_store.write_documents(docs)
1230+
assert document_store.count_documents() == 4
1231+
1232+
counts = await document_store.count_unique_metadata_by_filter_async( # type:ignore[attr-defined]
1233+
filters={"field": "meta.category", "operator": "==", "value": "A"}, metadata_fields=["status", "priority"]
1234+
)
1235+
assert counts["status"] == 2
1236+
assert counts["priority"] == 2
1237+
1238+
@staticmethod
1239+
@pytest.mark.asyncio
1240+
async def test_count_unique_metadata_by_filter_async_with_multiple_filters(document_store: DocumentStore):
1241+
"""Test counting unique metadata asynchronously with multiple filters."""
1242+
docs = [
1243+
Document(content="Doc 1", meta={"category": "A", "year": 2023}),
1244+
Document(content="Doc 2", meta={"category": "A", "year": 2024}),
1245+
Document(content="Doc 3", meta={"category": "B", "year": 2023}),
1246+
Document(content="Doc 4", meta={"category": "B", "year": 2024}),
1247+
]
1248+
document_store.write_documents(docs)
1249+
1250+
counts = await document_store.count_unique_metadata_by_filter_async( # type:ignore[attr-defined]
1251+
filters={
1252+
"operator": "AND",
1253+
"conditions": [
1254+
{"field": "meta.category", "operator": "==", "value": "B"},
1255+
{"field": "meta.year", "operator": "==", "value": 2023},
1256+
],
1257+
},
1258+
metadata_fields=["category", "year"],
1259+
)
1260+
assert counts == {"category": 1, "year": 1}
1261+
1262+
10751263
class GetMetadataFieldsInfoTest:
10761264
"""
10771265
Tests for Document Store get_metadata_fields_info().
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
features:
3+
- |
4+
Added async filter helpers to the ``InMemoryDocumentStore``: ``update_by_filter_async()``,
5+
``count_documents_by_filter_async()``, and ``count_unique_metadata_by_filter_async()``.

test/document_stores/test_in_memory.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,17 @@
1414
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
1515
from haystack.document_stores.in_memory import InMemoryDocumentStore
1616
from haystack.testing.document_store import (
17+
CountDocumentsByFilterAsyncTest,
1718
CountDocumentsByFilterTest,
19+
CountUniqueMetadataByFilterAsyncTest,
1820
CountUniqueMetadataByFilterTest,
1921
DocumentStoreBaseExtendedTests,
2022
DocumentStoreBaseTests,
2123
FilterableDocsFixtureMixin,
2224
GetMetadataFieldMinMaxTest,
2325
GetMetadataFieldsInfoTest,
2426
GetMetadataFieldUniqueValuesTest,
27+
UpdateByFilterAsyncTest,
2528
)
2629
from haystack.testing.document_store_async import (
2730
CountDocumentsAsyncTest,
@@ -32,10 +35,13 @@
3235

3336
class TestMemoryDocumentStore(
3437
DocumentStoreBaseExtendedTests,
38+
UpdateByFilterAsyncTest,
39+
CountDocumentsByFilterAsyncTest,
3540
CountDocumentsAsyncTest,
3641
WriteDocumentsAsyncTest,
3742
DeleteDocumentsAsyncTest,
3843
CountDocumentsByFilterTest,
44+
CountUniqueMetadataByFilterAsyncTest,
3945
CountUniqueMetadataByFilterTest,
4046
FilterableDocsFixtureMixin,
4147
GetMetadataFieldMinMaxTest,

0 commit comments

Comments
 (0)