Skip to content

Commit 0901305

Browse files
committed
updating docstrings and removing extra param filters from get_metadata_field_min_max
1 parent 13b8103 commit 0901305

3 files changed

Lines changed: 51 additions & 97 deletions

File tree

integrations/qdrant/src/haystack_integrations/document_stores/qdrant/document_store.py

Lines changed: 51 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -941,20 +941,18 @@ async def get_metadata_fields_info_async(self) -> dict[str, str]:
941941
except (UnexpectedResponse, ValueError):
942942
return {}
943943

944-
def get_metadata_field_min_max(self, field_name: str, filters: dict[str, Any] | None = None) -> dict[str, Any]:
944+
def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
945945
"""
946946
Returns the minimum and maximum values for the given metadata field.
947947
948-
:param metadata_field: The metadata field to get the minimum and maximum values for.
948+
:param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
949949
950950
:returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
951-
metadata field across all documents.
951+
metadata field across all documents. Returns an empty dict if no documents have the field.
952952
"""
953953
self._initialize_client()
954954
assert self._client is not None
955955

956-
qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
957-
958956
try:
959957
min_value = None
960958
max_value = None
@@ -965,7 +963,7 @@ def get_metadata_field_min_max(self, field_name: str, filters: dict[str, Any] |
965963
while not stop_scrolling:
966964
records, next_offset = self._client.scroll(
967965
collection_name=self.index,
968-
scroll_filter=qdrant_filter,
966+
scroll_filter=None,
969967
limit=self.scroll_size,
970968
offset=next_offset,
971969
with_payload=True,
@@ -982,8 +980,8 @@ def get_metadata_field_min_max(self, field_name: str, filters: dict[str, Any] |
982980
for record in records:
983981
if record.payload and "meta" in record.payload:
984982
meta = record.payload["meta"]
985-
if field_name in meta:
986-
value = meta[field_name]
983+
if metadata_field in meta:
984+
value = meta[metadata_field]
987985
if value is not None:
988986
if min_value is None or value < min_value:
989987
min_value = value
@@ -996,22 +994,18 @@ def get_metadata_field_min_max(self, field_name: str, filters: dict[str, Any] |
996994
except Exception:
997995
return {}
998996

999-
async def get_metadata_field_min_max_async(
1000-
self, field_name: str, filters: dict[str, Any] | None = None
1001-
) -> dict[str, Any]:
997+
async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
1002998
"""
1003999
Asynchronously returns the minimum and maximum values for the given metadata field.
10041000
1005-
:param metadata_field: The metadata field to get the minimum and maximum values for.
1001+
:param metadata_field: The metadata field key (inside ``meta``) to get the minimum and maximum values for.
10061002
10071003
:returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
1008-
metadata field across all documents.
1004+
metadata field across all documents. Returns an empty dict if no documents have the field.
10091005
"""
10101006
await self._initialize_async_client()
10111007
assert self._async_client is not None
10121008

1013-
qdrant_filter = convert_filters_to_qdrant(filters) if filters else None
1014-
10151009
try:
10161010
min_value = None
10171011
max_value = None
@@ -1022,7 +1016,7 @@ async def get_metadata_field_min_max_async(
10221016
while not stop_scrolling:
10231017
records, next_offset = await self._async_client.scroll(
10241018
collection_name=self.index,
1025-
scroll_filter=qdrant_filter,
1019+
scroll_filter=None,
10261020
limit=self.scroll_size,
10271021
offset=next_offset,
10281022
with_payload=True,
@@ -1039,8 +1033,8 @@ async def get_metadata_field_min_max_async(
10391033
for record in records:
10401034
if record.payload and "meta" in record.payload:
10411035
meta = record.payload["meta"]
1042-
if field_name in meta:
1043-
value = meta[field_name]
1036+
if metadata_field in meta:
1037+
value = meta[metadata_field]
10441038
if value is not None:
10451039
if min_value is None or value < min_value:
10461040
min_value = value
@@ -1053,19 +1047,15 @@ async def get_metadata_field_min_max_async(
10531047
except Exception:
10541048
return {}
10551049

1056-
def count_unique_metadata_by_filter(self, field_name: str, filters: dict[str, Any] | None = None) -> int:
1050+
def count_unique_metadata_by_filter(self, metadata_field: str, filters: dict[str, Any] | None = None) -> int:
10571051
"""
1058-
Returns the number of unique values for each specified metadata field of the documents
1059-
that match the provided filters.
1052+
Returns the number of unique values for the given metadata field among documents that match the filters.
10601053
1061-
:param filters: The filters to apply to count documents.
1054+
:param metadata_field: The metadata field key (inside ``meta``) to count unique values for.
1055+
:param filters: Optional filters to restrict the documents considered.
10621056
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1063-
:param metadata_fields: List of field names to calculate unique values for.
1064-
Field names can include or omit the "meta." prefix.
10651057
1066-
:returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
1067-
documents.
1068-
:raises ValueError: If any of the requested fields don't exist in the index mapping.
1058+
:returns: The number of unique values for the field among the filtered documents.
10691059
"""
10701060
self._initialize_client()
10711061
assert self._client is not None
@@ -1097,8 +1087,8 @@ def count_unique_metadata_by_filter(self, field_name: str, filters: dict[str, An
10971087
for record in records:
10981088
if record.payload and "meta" in record.payload:
10991089
meta = record.payload["meta"]
1100-
if field_name in meta:
1101-
value = meta[field_name]
1090+
if metadata_field in meta:
1091+
value = meta[metadata_field]
11021092
if value is not None:
11031093
# Convert to hashable type if needed
11041094
if isinstance(value, (list, dict)):
@@ -1111,20 +1101,17 @@ def count_unique_metadata_by_filter(self, field_name: str, filters: dict[str, An
11111101
return 0
11121102

11131103
async def count_unique_metadata_by_filter_async(
1114-
self, field_name: str, filters: dict[str, Any] | None = None
1104+
self, metadata_field: str, filters: dict[str, Any] | None = None
11151105
) -> int:
11161106
"""
1117-
Asynchronously returns the number of unique values for each specified metadata field of the documents
1118-
that match the provided filters.
1107+
Asynchronously returns the number of unique values for the given metadata field among documents that match
1108+
the filters.
11191109
1120-
:param filters: The filters to apply to count documents.
1110+
:param metadata_field: The metadata field key (inside ``meta``) to count unique values for.
1111+
:param filters: Optional filters to restrict the documents considered.
11211112
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1122-
:param metadata_fields: List of field names to calculate unique values for.
1123-
Field names can include or omit the "meta." prefix.
11241113
1125-
:returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
1126-
documents.
1127-
:raises ValueError: If any of the requested fields don't exist in the index mapping.
1114+
:returns: The number of unique values for the field among the filtered documents.
11281115
"""
11291116
await self._initialize_async_client()
11301117
assert self._async_client is not None
@@ -1156,8 +1143,8 @@ async def count_unique_metadata_by_filter_async(
11561143
for record in records:
11571144
if record.payload and "meta" in record.payload:
11581145
meta = record.payload["meta"]
1159-
if field_name in meta:
1160-
value = meta[field_name]
1146+
if metadata_field in meta:
1147+
value = meta[metadata_field]
11611148
if value is not None:
11621149
# Convert to hashable type if needed
11631150
if isinstance(value, (list, dict)):
@@ -1170,21 +1157,20 @@ async def count_unique_metadata_by_filter_async(
11701157
return 0
11711158

11721159
def get_metadata_field_unique_values(
1173-
self, field_name: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
1160+
self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
11741161
) -> list[Any]:
11751162
"""
1176-
Returns unique values for a metadata field, optionally filtered by a search term in the content.
1177-
Uses composite aggregations for proper pagination beyond 10k results.
1163+
Returns unique values for a metadata field, with optional filters and offset/limit pagination.
1164+
1165+
Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
11781166
1179-
:param metadata_field: The metadata field to get unique values for.
1180-
:param search_term: Optional search term to filter documents by matching in the content field.
1181-
:param size: The number of unique values to return per page. Defaults to 10000.
1182-
:param after: Optional pagination key from the previous response. Use None for the first page.
1183-
For subsequent pages, pass the `after_key` from the previous response.
1167+
:param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
1168+
:param filters: Optional filters to restrict the documents considered.
1169+
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1170+
:param limit: Maximum number of unique values to return per page. Defaults to 100.
1171+
:param offset: Number of unique values to skip (for pagination). Defaults to 0.
11841172
1185-
:returns: A tuple containing (list of unique values, after_key for pagination).
1186-
The after_key is None when there are no more results. Use it in the `after` parameter
1187-
for the next page.
1173+
:returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
11881174
"""
11891175
self._initialize_client()
11901176
assert self._client is not None
@@ -1217,8 +1203,8 @@ def get_metadata_field_unique_values(
12171203
for record in records:
12181204
if record.payload and "meta" in record.payload:
12191205
meta = record.payload["meta"]
1220-
if field_name in meta:
1221-
value = meta[field_name]
1206+
if metadata_field in meta:
1207+
value = meta[metadata_field]
12221208
if value is not None:
12231209
# Convert to hashable type for deduplication
12241210
hashable_value = str(value) if isinstance(value, (list, dict)) else value
@@ -1233,21 +1219,20 @@ def get_metadata_field_unique_values(
12331219
return []
12341220

12351221
async def get_metadata_field_unique_values_async(
1236-
self, field_name: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
1222+
self, metadata_field: str, filters: dict[str, Any] | None = None, limit: int = 100, offset: int = 0
12371223
) -> list[Any]:
12381224
"""
1239-
Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content.
1240-
Uses composite aggregations for proper pagination beyond 10k results.
1225+
Asynchronously returns unique values for a metadata field, with optional filters and offset/limit pagination.
1226+
1227+
Unique values are ordered by first occurrence during scroll. Pagination is offset-based over that order.
12411228
1242-
:param metadata_field: The metadata field to get unique values for.
1243-
:param search_term: Optional search term to filter documents by matching in the content field.
1244-
:param size: The number of unique values to return per page. Defaults to 10000.
1245-
:param after: Optional pagination key from the previous response. Use None for the first page.
1246-
For subsequent pages, pass the `after_key` from the previous response.
1229+
:param metadata_field: The metadata field key (inside ``meta``) to get unique values for.
1230+
:param filters: Optional filters to restrict the documents considered.
1231+
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
1232+
:param limit: Maximum number of unique values to return per page. Defaults to 100.
1233+
:param offset: Number of unique values to skip (for pagination). Defaults to 0.
12471234
1248-
:returns: A tuple containing (list of unique values, after_key for pagination).
1249-
The after_key is None when there are no more results. Use it in the `after` parameter
1250-
for the next page.
1235+
:returns: A list of unique values for the field (at most ``limit`` items, starting at ``offset``).
12511236
"""
12521237
await self._initialize_async_client()
12531238
assert self._async_client is not None
@@ -1280,8 +1265,8 @@ async def get_metadata_field_unique_values_async(
12801265
for record in records:
12811266
if record.payload and "meta" in record.payload:
12821267
meta = record.payload["meta"]
1283-
if field_name in meta:
1284-
value = meta[field_name]
1268+
if metadata_field in meta:
1269+
value = meta[metadata_field]
12851270
if value is not None:
12861271
# Convert to hashable type for deduplication
12871272
hashable_value = str(value) if isinstance(value, (list, dict)) else value

integrations/qdrant/tests/test_document_store.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -585,21 +585,6 @@ def test_get_metadata_field_min_max(self, document_store: QdrantDocumentStore):
585585
assert result.get("min") == 0.3
586586
assert result.get("max") == 0.8
587587

588-
def test_get_metadata_field_min_max_with_filter(self, document_store: QdrantDocumentStore):
589-
"""Test getting min/max values with filtering."""
590-
docs = [
591-
Document(content="Doc 1", meta={"score": 0.5, "category": "A"}),
592-
Document(content="Doc 2", meta={"score": 0.8, "category": "A"}),
593-
Document(content="Doc 3", meta={"score": 0.3, "category": "B"}),
594-
]
595-
document_store.write_documents(docs)
596-
597-
result = document_store.get_metadata_field_min_max(
598-
"score", filters={"field": "meta.category", "operator": "==", "value": "A"}
599-
)
600-
assert result.get("min") == 0.5
601-
assert result.get("max") == 0.8
602-
603588
def test_count_unique_metadata_by_filter(self, document_store: QdrantDocumentStore):
604589
"""Test counting unique metadata field values."""
605590
docs = [

integrations/qdrant/tests/test_document_store_async.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -536,22 +536,6 @@ async def test_get_metadata_field_min_max_async(self, document_store: QdrantDocu
536536
assert result.get("min") == 0.3
537537
assert result.get("max") == 0.8
538538

539-
@pytest.mark.asyncio
540-
async def test_get_metadata_field_min_max_async_with_filter(self, document_store: QdrantDocumentStore):
541-
"""Test getting min/max values with filtering (async)."""
542-
docs = [
543-
Document(content="Doc 1", meta={"score": 0.5, "category": "A"}),
544-
Document(content="Doc 2", meta={"score": 0.8, "category": "A"}),
545-
Document(content="Doc 3", meta={"score": 0.3, "category": "B"}),
546-
]
547-
await document_store.write_documents_async(docs)
548-
549-
result = await document_store.get_metadata_field_min_max_async(
550-
"score", filters={"field": "meta.category", "operator": "==", "value": "A"}
551-
)
552-
assert result.get("min") == 0.5
553-
assert result.get("max") == 0.8
554-
555539
@pytest.mark.asyncio
556540
async def test_count_unique_metadata_by_filter_async(self, document_store: QdrantDocumentStore):
557541
"""Test counting unique metadata field values (async)."""

0 commit comments

Comments
 (0)