Skip to content

Commit 2a7fe89

Browse files
committed
updating count_unique_metadata_by_filter to receive multiple metadata fields instead of only one
1 parent 0901305 commit 2a7fe89

3 files changed

Lines changed: 77 additions & 48 deletions

File tree

integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,21 +1047,22 @@ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[st
10471047
except Exception:
10481048
return {}
10491049

1050-
def count_unique_metadata_by_filter(self, metadata_field: str, filters: dict[str, Any] | None = None) -> int:
1050+
def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
10511051
"""
1052-
Returns the number of unique values for the given metadata field among documents that match the filters.
1052+
Returns the number of unique values for each specified metadata field among documents that match the filters.
10531053
1054-
:param metadata_field: The metadata field key (inside ``meta``) to count unique values for.
1055-
:param filters: Optional filters to restrict the documents considered.
1054+
:param filters: The filters to restrict the documents considered.
10561055
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1056+
:param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
10571057
1058-
:returns: The number of unique values for the field among the filtered documents.
1058+
:returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
1059+
documents.
10591060
"""
10601061
self._initialize_client()
10611062
assert self._client is not None
10621063

10631064
qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1064-
unique_values = set()
1065+
unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
10651066

10661067
try:
10671068
next_offset = None
@@ -1087,37 +1088,38 @@ def count_unique_metadata_by_filter(self, metadata_field: str, filters: dict[str
10871088
for record in records:
10881089
if record.payload and "meta" in record.payload:
10891090
meta = record.payload["meta"]
1090-
if metadata_field in meta:
1091-
value = meta[metadata_field]
1092-
if value is not None:
1093-
# Convert to hashable type if needed
1094-
if isinstance(value, (list, dict)):
1095-
unique_values.add(str(value))
1096-
else:
1097-
unique_values.add(value)
1098-
1099-
return len(unique_values)
1091+
for field in metadata_fields:
1092+
if field in meta:
1093+
value = meta[field]
1094+
if value is not None:
1095+
if isinstance(value, (list, dict)):
1096+
unique_values_by_field[field].add(str(value))
1097+
else:
1098+
unique_values_by_field[field].add(value)
1099+
1100+
return {field: len(unique_values_by_field[field]) for field in metadata_fields}
11001101
except Exception:
1101-
return 0
1102+
return dict.fromkeys(metadata_fields, 0)
11021103

11031104
async def count_unique_metadata_by_filter_async(
1104-
self, metadata_field: str, filters: dict[str, Any] | None = None
1105-
) -> int:
1105+
self, filters: dict[str, Any], metadata_fields: list[str]
1106+
) -> dict[str, int]:
11061107
"""
1107-
Asynchronously returns the number of unique values for the given metadata field among documents that match
1108-
the filters.
1108+
Asynchronously returns the number of unique values for each specified metadata field among documents that
1109+
match the filters.
11091110
1110-
:param metadata_field: The metadata field key (inside ``meta``) to count unique values for.
1111-
:param filters: Optional filters to restrict the documents considered.
1111+
:param filters: The filters to restrict the documents considered.
11121112
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1113+
:param metadata_fields: List of metadata field keys (inside ``meta``) to count unique values for.
11131114
1114-
:returns: The number of unique values for the field among the filtered documents.
1115+
:returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
1116+
documents.
11151117
"""
11161118
await self._initialize_async_client()
11171119
assert self._async_client is not None
11181120

11191121
qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1120-
unique_values = set()
1122+
unique_values_by_field: dict[str, set[Any]] = {field: set() for field in metadata_fields}
11211123

11221124
try:
11231125
next_offset = None
@@ -1143,18 +1145,18 @@ async def count_unique_metadata_by_filter_async(
11431145
for record in records:
11441146
if record.payload and "meta" in record.payload:
11451147
meta = record.payload["meta"]
1146-
if metadata_field in meta:
1147-
value = meta[metadata_field]
1148-
if value is not None:
1149-
# Convert to hashable type if needed
1150-
if isinstance(value, (list, dict)):
1151-
unique_values.add(str(value))
1152-
else:
1153-
unique_values.add(value)
1154-
1155-
return len(unique_values)
1148+
for field in metadata_fields:
1149+
if field in meta:
1150+
value = meta[field]
1151+
if value is not None:
1152+
if isinstance(value, (list, dict)):
1153+
unique_values_by_field[field].add(str(value))
1154+
else:
1155+
unique_values_by_field[field].add(value)
1156+
1157+
return {field: len(unique_values_by_field[field]) for field in metadata_fields}
11561158
except Exception:
1157-
return 0
1159+
return dict.fromkeys(metadata_fields, 0)
11581160

11591161
def get_metadata_field_unique_values(
11601162
self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0

integrations/qdrant/tests/test_document_store.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -595,8 +595,20 @@ def test_count_unique_metadata_by_filter(self, document_store: QdrantDocumentSto
595595
]
596596
document_store.write_documents(docs)
597597

598-
count = document_store.count_unique_metadata_by_filter("category")
599-
assert count == 3
598+
result = document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["category"])
599+
assert result == {"category": 3}
600+
601+
def test_count_unique_metadata_by_filter_multiple_fields(self, document_store: QdrantDocumentStore):
602+
"""Test counting unique values for multiple metadata fields."""
603+
docs = [
604+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
605+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
606+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
607+
]
608+
document_store.write_documents(docs)
609+
610+
result = document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["category", "status"])
611+
assert result == {"category": 2, "status": 2}
600612

601613
def test_count_unique_metadata_by_filter_with_filter(self, document_store: QdrantDocumentStore):
602614
"""Test counting unique metadata field values with filtering."""
@@ -607,11 +619,11 @@ def test_count_unique_metadata_by_filter_with_filter(self, document_store: Qdran
607619
]
608620
document_store.write_documents(docs)
609621

610-
# Count unique categories where status is active
611-
count = document_store.count_unique_metadata_by_filter(
612-
"category", filters={"field": "meta.status", "operator": "==", "value": "active"}
622+
result = document_store.count_unique_metadata_by_filter(
623+
filters={"field": "meta.status", "operator": "==", "value": "active"},
624+
metadata_fields=["category"],
613625
)
614-
assert count == 2
626+
assert result == {"category": 2}
615627

616628
def test_get_metadata_field_unique_values(self, document_store: QdrantDocumentStore):
617629
"""Test getting unique metadata field values."""

integrations/qdrant/tests/test_document_store_async.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -547,8 +547,23 @@ async def test_count_unique_metadata_by_filter_async(self, document_store: Qdran
547547
]
548548
await document_store.write_documents_async(docs)
549549

550-
count = await document_store.count_unique_metadata_by_filter_async("category")
551-
assert count == 3
550+
result = await document_store.count_unique_metadata_by_filter_async(filters={}, metadata_fields=["category"])
551+
assert result == {"category": 3}
552+
553+
@pytest.mark.asyncio
554+
async def test_count_unique_metadata_by_filter_async_multiple_fields(self, document_store: QdrantDocumentStore):
555+
"""Test counting unique values for multiple metadata fields (async)."""
556+
docs = [
557+
Document(content="Doc 1", meta={"category": "A", "status": "active"}),
558+
Document(content="Doc 2", meta={"category": "B", "status": "active"}),
559+
Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
560+
]
561+
await document_store.write_documents_async(docs)
562+
563+
result = await document_store.count_unique_metadata_by_filter_async(
564+
filters={}, metadata_fields=["category", "status"]
565+
)
566+
assert result == {"category": 2, "status": 2}
552567

553568
@pytest.mark.asyncio
554569
async def test_count_unique_metadata_by_filter_async_with_filter(self, document_store: QdrantDocumentStore):
@@ -560,11 +575,11 @@ async def test_count_unique_metadata_by_filter_async_with_filter(self, document_
560575
]
561576
await document_store.write_documents_async(docs)
562577

563-
# Count unique categories where status is active
564-
count = await document_store.count_unique_metadata_by_filter_async(
565-
"category", filters={"field": "meta.status", "operator": "==", "value": "active"}
578+
result = await document_store.count_unique_metadata_by_filter_async(
579+
filters={"field": "meta.status", "operator": "==", "value": "active"},
580+
metadata_fields=["category"],
566581
)
567-
assert count == 2
582+
assert result == {"category": 2}
568583

569584
@pytest.mark.asyncio
570585
async def test_get_metadata_field_unique_values_async(self, document_store: QdrantDocumentStore):

0 commit comments

Comments
 (0)