Skip to content

Commit b3f17e0

Browse files
refactor: weaviate, chroma, elasticsearch, opensearch, azure_ai_search use _normalize_metadata_field_name from haystack.utils (#2953)
* initial import * formatting * increasing lowest haystack dependency + formatting * increasing lowest haystack dependency for Azure AI
1 parent 1f89211 commit b3f17e0

10 files changed

Lines changed: 37 additions & 73 deletions

File tree

integrations/azure_ai_search/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ classifiers = [
2323
"Programming Language :: Python :: Implementation :: PyPy",
2424
]
2525
dependencies = [
26-
"haystack-ai>=2.24.0",
26+
"haystack-ai>=2.26.0",
2727
"azure-search-documents>=11.5",
2828
"azure-identity"
2929
]

integrations/azure_ai_search/src/haystack_integrations/document_stores/azure_ai_search/document_store.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from haystack.dataclasses import Document
4343
from haystack.document_stores.types import DuplicatePolicy
4444
from haystack.utils import Secret, deserialize_secrets_inplace
45+
from haystack.utils.misc import _normalize_metadata_field_name
4546

4647
from .errors import AzureAISearchDocumentStoreConfigError, AzureAISearchDocumentStoreError
4748
from .filters import _normalize_filters
@@ -364,13 +365,6 @@ def count_documents(self) -> int:
364365
"""
365366
return self.client.get_document_count()
366367

367-
@staticmethod
368-
def _normalize_metadata_field_name(metadata_field: str) -> str:
369-
"""
370-
Normalizes a metadata field name by removing the `meta.` prefix if present.
371-
"""
372-
return metadata_field[5:] if metadata_field.startswith("meta.") else metadata_field
373-
374368
def _get_index_schema_fields(self) -> dict[str, Any]:
375369
"""
376370
Returns the index schema fields keyed by field name.
@@ -486,7 +480,7 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fiel
486480
:param metadata_fields: List of field names to count unique values for.
487481
:returns: Dictionary mapping field names to counts of unique values.
488482
"""
489-
normalized_metadata_fields = [self._normalize_metadata_field_name(field) for field in metadata_fields]
483+
normalized_metadata_fields = [_normalize_metadata_field_name(field) for field in metadata_fields]
490484
self._validate_index_fields(normalized_metadata_fields)
491485

492486
documents = self._fetch_raw_documents(filters=filters, select=normalized_metadata_fields)
@@ -515,7 +509,7 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
515509
:param metadata_field: The metadata field to get the minimum and maximum values for.
516510
:returns: A dictionary with the keys "min" and "max".
517511
"""
518-
field_name = self._normalize_metadata_field_name(metadata_field)
512+
field_name = _normalize_metadata_field_name(metadata_field)
519513
self._validate_index_fields([field_name])
520514

521515
documents = self._fetch_raw_documents(select=[field_name])
@@ -533,7 +527,7 @@ def get_metadata_field_unique_values(
533527
:param size: Number of values to return.
534528
:returns: Tuple of (list of unique values, total count of matching values).
535529
"""
536-
field_name = self._normalize_metadata_field_name(metadata_field)
530+
field_name = _normalize_metadata_field_name(metadata_field)
537531
self._validate_index_fields([field_name])
538532

539533
documents = self._fetch_raw_documents(select=[field_name])

integrations/chroma/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ classifiers = [
2323
"Programming Language :: Python :: Implementation :: PyPy",
2424
]
2525
dependencies = [
26-
"haystack-ai>=2.24.0",
26+
"haystack-ai>=2.26.0",
2727
"chromadb>=1.5.0"
2828
]
2929

integrations/chroma/src/haystack_integrations/document_stores/chroma/document_store.py

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from haystack.dataclasses import Document
1414
from haystack.document_stores.errors import DocumentStoreError
1515
from haystack.document_stores.types import DuplicatePolicy
16+
from haystack.utils.misc import _normalize_metadata_field_name
1617

1718
from .filters import _convert_filters
1819
from .utils import get_embedding_function
@@ -242,16 +243,6 @@ def _prepare_query_kwargs(filters: dict[str, Any] | None = None) -> dict[str, An
242243
"include": ["embeddings", "documents", "metadatas", "distances"],
243244
}
244245

245-
@staticmethod
246-
def _normalize_metadata_field_name(metadata_field: str) -> str:
247-
"""
248-
Normalizes a metadata field name by removing the "meta." prefix if present.
249-
250-
:param metadata_field: The metadata field name to normalize.
251-
:returns: The normalized field name without "meta." prefix.
252-
"""
253-
return metadata_field[5:] if metadata_field.startswith("meta.") else metadata_field
254-
255246
@staticmethod
256247
def _infer_type_from_value(value: Any) -> str:
257248
"""
@@ -1087,7 +1078,7 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fiel
10871078
self._ensure_initialized()
10881079
assert self._collection is not None
10891080

1090-
normalized_fields = [self._normalize_metadata_field_name(field) for field in metadata_fields]
1081+
normalized_fields = [_normalize_metadata_field_name(field) for field in metadata_fields]
10911082

10921083
kwargs = ChromaDocumentStore._prepare_get_kwargs(filters)
10931084
kwargs["include"] = ["metadatas"]
@@ -1114,7 +1105,7 @@ async def count_unique_metadata_by_filter_async(
11141105
await self._ensure_initialized_async()
11151106
assert self._async_collection is not None
11161107

1117-
normalized_fields = [self._normalize_metadata_field_name(field) for field in metadata_fields]
1108+
normalized_fields = [_normalize_metadata_field_name(field) for field in metadata_fields]
11181109

11191110
kwargs = ChromaDocumentStore._prepare_get_kwargs(filters)
11201111
kwargs["include"] = ["metadatas"]
@@ -1205,7 +1196,7 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
12051196
self._ensure_initialized()
12061197
assert self._collection is not None
12071198

1208-
field_name = self._normalize_metadata_field_name(metadata_field)
1199+
field_name = _normalize_metadata_field_name(metadata_field)
12091200

12101201
result = self._collection.get(include=["metadatas"])
12111202
return self._compute_field_min_max(result.get("metadatas", []), field_name)
@@ -1229,7 +1220,7 @@ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[st
12291220
await self._ensure_initialized_async()
12301221
assert self._async_collection is not None
12311222

1232-
field_name = self._normalize_metadata_field_name(metadata_field)
1223+
field_name = _normalize_metadata_field_name(metadata_field)
12331224

12341225
result = await self._async_collection.get(include=["metadatas"])
12351226
return self._compute_field_min_max(result.get("metadatas", []), field_name)
@@ -1256,7 +1247,7 @@ def get_metadata_field_unique_values(
12561247
self._ensure_initialized()
12571248
assert self._collection is not None
12581249

1259-
field_name = self._normalize_metadata_field_name(metadata_field)
1250+
field_name = _normalize_metadata_field_name(metadata_field)
12601251

12611252
kwargs: dict[str, Any] = {"include": ["metadatas"]}
12621253
if search_term:
@@ -1289,7 +1280,7 @@ async def get_metadata_field_unique_values_async(
12891280
await self._ensure_initialized_async()
12901281
assert self._async_collection is not None
12911282

1292-
field_name = self._normalize_metadata_field_name(metadata_field)
1283+
field_name = _normalize_metadata_field_name(metadata_field)
12931284

12941285
kwargs: dict[str, Any] = {"include": ["metadatas"]}
12951286
if search_term:

integrations/elasticsearch/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ classifiers = [
2323
"Programming Language :: Python :: Implementation :: PyPy",
2424
]
2525
dependencies = [
26-
"haystack-ai>=2.24.0",
26+
"haystack-ai>=2.26.0",
2727
"elasticsearch>=8,<9",
2828
"aiohttp>=3.9.0" # for async support https://elasticsearch-py.readthedocs.io/en/latest/async.html#valueerror-when-initializing-asyncelasticsearch
2929
]

integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
1818
from haystack.document_stores.types import DuplicatePolicy
1919
from haystack.utils import Secret
20+
from haystack.utils.misc import _normalize_metadata_field_name
2021
from haystack.version import __version__ as haystack_version
2122

2223
from elasticsearch import AsyncElasticsearch, Elasticsearch, helpers
@@ -1077,13 +1078,6 @@ async def count_documents_by_filter_async(self, filters: dict[str, Any]) -> int:
10771078
result = await self.async_client.count(index=self._index, body=body)
10781079
return result["count"]
10791080

1080-
@staticmethod
1081-
def _normalize_metadata_field_name(metadata_field: str) -> str:
1082-
"""
1083-
Normalizes a metadata field name by removing the "meta." prefix if present.
1084-
"""
1085-
return metadata_field[5:] if metadata_field.startswith("meta.") else metadata_field
1086-
10871081
@staticmethod
10881082
def _build_cardinality_aggregations(index_mapping: dict[str, Any], fields: list[str]) -> dict[str, Any]:
10891083
"""
@@ -1160,7 +1154,7 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fiel
11601154
index_mapping = mapping[self._index]["mappings"]["properties"]
11611155

11621156
# normalize field names, e.g: remove "meta." prefix if present
1163-
normalized_metadata_fields = [self._normalize_metadata_field_name(field) for field in metadata_fields]
1157+
normalized_metadata_fields = [_normalize_metadata_field_name(field) for field in metadata_fields]
11641158

11651159
# validate that all requested fields exist in the index mapping
11661160
missing_fields = [f for f in normalized_metadata_fields if f not in index_mapping]
@@ -1204,7 +1198,7 @@ async def count_unique_metadata_by_filter_async(
12041198
index_mapping = mapping[self._index]["mappings"]["properties"]
12051199

12061200
# normalize field names
1207-
normalized_metadata_fields = [self._normalize_metadata_field_name(field) for field in metadata_fields]
1201+
normalized_metadata_fields = [_normalize_metadata_field_name(field) for field in metadata_fields]
12081202
# validate that all requested fields exist in the index mapping
12091203
missing_fields = [f for f in normalized_metadata_fields if f not in index_mapping]
12101204
if missing_fields:
@@ -1325,7 +1319,7 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, int | Non
13251319
"""
13261320
self._ensure_initialized()
13271321

1328-
field_name = self._normalize_metadata_field_name(metadata_field)
1322+
field_name = _normalize_metadata_field_name(metadata_field)
13291323
body = self._build_min_max_query_body(field_name)
13301324
result = self.client.search(index=self._index, body=body)
13311325
stats = result.get("aggregations", {}).get("field_stats", {})
@@ -1342,7 +1336,7 @@ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[st
13421336
"""
13431337
self._ensure_initialized()
13441338

1345-
field_name = self._normalize_metadata_field_name(metadata_field)
1339+
field_name = _normalize_metadata_field_name(metadata_field)
13461340
body = self._build_min_max_query_body(field_name)
13471341
result = await self.async_client.search(index=self._index, body=body)
13481342
stats = result.get("aggregations", {}).get("field_stats", {})
@@ -1373,7 +1367,7 @@ def get_metadata_field_unique_values(
13731367
"""
13741368
self._ensure_initialized()
13751369

1376-
field_name = self._normalize_metadata_field_name(metadata_field)
1370+
field_name = _normalize_metadata_field_name(metadata_field)
13771371

13781372
# filter by search_term if provided
13791373
query: dict[str, Any] = {"match_all": {}}
@@ -1439,7 +1433,7 @@ async def get_metadata_field_unique_values_async(
14391433
"""
14401434
self._ensure_initialized()
14411435

1442-
field_name = self._normalize_metadata_field_name(metadata_field)
1436+
field_name = _normalize_metadata_field_name(metadata_field)
14431437

14441438
# filter by search_term if provided
14451439
query: dict[str, Any] = {"match_all": {}}

integrations/opensearch/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ classifiers = [
2424
]
2525

2626
dependencies = [
27-
"haystack-ai>=2.24.0",
27+
"haystack-ai>=2.26.0",
2828
"opensearch-py[async]>=3.0.0"
2929
]
3030

integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
1414
from haystack.document_stores.types import DuplicatePolicy
1515
from haystack.utils.auth import Secret
16+
from haystack.utils.misc import _normalize_metadata_field_name
1617
from opensearchpy import AsyncHttpConnection, AsyncOpenSearch, OpenSearch
1718
from opensearchpy.helpers import async_bulk, bulk
1819

@@ -1659,7 +1660,7 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fiel
16591660
index_mapping = mapping[self._index]["mappings"]["properties"]
16601661

16611662
# normalize field names
1662-
normalized_metadata_fields = [self._normalize_metadata_field_name(field) for field in metadata_fields]
1663+
normalized_metadata_fields = [_normalize_metadata_field_name(field) for field in metadata_fields]
16631664
# validate that all requested fields exist in the index mapping
16641665
missing_fields = [f for f in normalized_metadata_fields if f not in index_mapping]
16651666
if missing_fields:
@@ -1703,7 +1704,7 @@ async def count_unique_metadata_by_filter_async(
17031704
index_mapping = mapping[self._index]["mappings"]["properties"]
17041705

17051706
# normalize field names
1706-
normalized_metadata_fields = [self._normalize_metadata_field_name(field) for field in metadata_fields]
1707+
normalized_metadata_fields = [_normalize_metadata_field_name(field) for field in metadata_fields]
17071708
# validate that all requested fields exist in the index mapping
17081709
missing_fields = [f for f in normalized_metadata_fields if f not in index_mapping]
17091710
if missing_fields:
@@ -1790,13 +1791,6 @@ async def get_metadata_fields_info_async(self) -> dict[str, dict[str, str]]:
17901791
index_mapping = {k: v for k, v in index_mapping.items() if k not in SPECIAL_FIELDS}
17911792
return index_mapping
17921793

1793-
@staticmethod
1794-
def _normalize_metadata_field_name(metadata_field: str) -> str:
1795-
"""
1796-
Normalizes a metadata field name by removing the "meta." prefix if present.
1797-
"""
1798-
return metadata_field[5:] if metadata_field.startswith("meta.") else metadata_field
1799-
18001794
@staticmethod
18011795
def _build_min_max_query_body(field_name: str) -> dict[str, Any]:
18021796
"""
@@ -1834,7 +1828,7 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, int | Non
18341828
self._ensure_initialized()
18351829
assert self._client is not None
18361830

1837-
field_name = self._normalize_metadata_field_name(metadata_field)
1831+
field_name = _normalize_metadata_field_name(metadata_field)
18381832
body = self._build_min_max_query_body(field_name)
18391833
result = self._client.search(index=self._index, body=body)
18401834
stats = result.get("aggregations", {}).get("field_stats", {})
@@ -1852,7 +1846,7 @@ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[st
18521846
await self._ensure_initialized_async()
18531847
assert self._async_client is not None
18541848

1855-
field_name = self._normalize_metadata_field_name(metadata_field)
1849+
field_name = _normalize_metadata_field_name(metadata_field)
18561850
body = self._build_min_max_query_body(field_name)
18571851
result = await self._async_client.search(index=self._index, body=body)
18581852
stats = result.get("aggregations", {}).get("field_stats", {})
@@ -1882,7 +1876,7 @@ def get_metadata_field_unique_values(
18821876
self._ensure_initialized()
18831877
assert self._client is not None
18841878

1885-
field_name = self._normalize_metadata_field_name(metadata_field)
1879+
field_name = _normalize_metadata_field_name(metadata_field)
18861880

18871881
# filter by search_term if provided
18881882
query: dict[str, Any] = {"match_all": {}}
@@ -1947,7 +1941,7 @@ async def get_metadata_field_unique_values_async(
19471941
await self._ensure_initialized_async()
19481942
assert self._async_client is not None
19491943

1950-
field_name = self._normalize_metadata_field_name(metadata_field)
1944+
field_name = _normalize_metadata_field_name(metadata_field)
19511945

19521946
# filter by search_term if provided
19531947
query: dict[str, Any] = {"match_all": {}}

integrations/weaviate/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ classifiers = [
2323
"Programming Language :: Python :: Implementation :: PyPy",
2424
]
2525
dependencies = [
26-
"haystack-ai>=2.24.0",
26+
"haystack-ai>=2.26.0",
2727
"weaviate-client>=4.20",
2828
"python-dateutil",
2929
]

0 commit comments

Comments
 (0)