From 172f8978697f344194bbc218263cb81e30e1a2e2 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 15:17:22 +0100
Subject: [PATCH 01/58] fixed metadata merging to properly update the meta key

---
 .../opensearch/document_store.py              | 190 +++++++++++-------
 .../opensearch/tests/test_document_store.py   |  26 +++
 .../tests/test_document_store_async.py        |  32 +++
 3 files changed, 176 insertions(+), 72 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 541466326d..b9c45e690e 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -64,25 +64,25 @@ class OpenSearchDocumentStore:
     """
 
     def __init__(
-        self,
-        *,
-        hosts: Optional[Hosts] = None,
-        index: str = "default",
-        max_chunk_bytes: int = DEFAULT_MAX_CHUNK_BYTES,
-        embedding_dim: int = 768,
-        return_embedding: bool = False,
-        method: Optional[dict[str, Any]] = None,
-        mappings: Optional[dict[str, Any]] = None,
-        settings: Optional[dict[str, Any]] = DEFAULT_SETTINGS,
-        create_index: bool = True,
-        http_auth: Any = (
-            Secret.from_env_var("OPENSEARCH_USERNAME", strict=False),  # noqa: B008
-            Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False),  # noqa: B008
-        ),
-        use_ssl: Optional[bool] = None,
-        verify_certs: Optional[bool] = None,
-        timeout: Optional[int] = None,
-        **kwargs: Any,
+            self,
+            *,
+            hosts: Optional[Hosts] = None,
+            index: str = "default",
+            max_chunk_bytes: int = DEFAULT_MAX_CHUNK_BYTES,
+            embedding_dim: int = 768,
+            return_embedding: bool = False,
+            method: Optional[dict[str, Any]] = None,
+            mappings: Optional[dict[str, Any]] = None,
+            settings: Optional[dict[str, Any]] = DEFAULT_SETTINGS,
+            create_index: bool = True,
+            http_auth: Any = (
+                    Secret.from_env_var("OPENSEARCH_USERNAME", strict=False),  # noqa: B008
+                    Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False),  # noqa: B008
+            ),
+            use_ssl: Optional[bool] = None,
+            verify_certs: Optional[bool] = None,
+            timeout: Optional[int] = None,
+            **kwargs: Any,
     ) -> None:
         """
         Creates a new OpenSearchDocumentStore instance.
@@ -174,10 +174,10 @@ def _get_default_mappings(self) -> dict[str, Any]:
         return default_mappings
 
     def create_index(
-        self,
-        index: Optional[str] = None,
-        mappings: Optional[dict[str, Any]] = None,
-        settings: Optional[dict[str, Any]] = None,
+            self,
+            index: Optional[str] = None,
+            mappings: Optional[dict[str, Any]] = None,
+            settings: Optional[dict[str, Any]] = None,
     ) -> None:
         """
         Creates an index in OpenSearch.
@@ -399,7 +399,7 @@ async def filter_documents_async(self, filters: Optional[dict[str, Any]] = None)
         return await self._search_documents_async(self._prepare_filter_search_request(filters))
 
     def _prepare_bulk_write_request(
-        self, *, documents: list[Document], policy: DuplicatePolicy, is_async: bool
+            self, *, documents: list[Document], policy: DuplicatePolicy, is_async: bool
     ) -> dict[str, Any]:
         if len(documents) > 0 and not isinstance(documents[0], Document):
             msg = "param 'documents' must contain a list of objects of type Document"
@@ -487,7 +487,7 @@ def write_documents(self, documents: list[Document], policy: DuplicatePolicy = D
         return documents_written
 
     async def write_documents_async(
-        self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE
+            self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE
     ) -> int:
         """
         Asynchronously writes documents to the document store.
@@ -757,14 +757,14 @@ async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str,
             raise DocumentStoreError(msg) from e
 
     def _prepare_bm25_search_request(
-        self,
-        *,
-        query: str,
-        filters: Optional[dict[str, Any]],
-        fuzziness: Union[int, str],
-        top_k: int,
-        all_terms_must_match: bool,
-        custom_query: Optional[dict[str, Any]],
+            self,
+            *,
+            query: str,
+            filters: Optional[dict[str, Any]],
+            fuzziness: Union[int, str],
+            top_k: int,
+            all_terms_must_match: bool,
+            custom_query: Optional[dict[str, Any]],
     ) -> dict[str, Any]:
         if not query:
             body: dict[str, Any] = {"query": {"bool": {"must": {"match_all": {}}}}}
@@ -822,15 +822,15 @@ def _postprocess_bm25_search_results(*, results: list[Document], scale_score: bo
             doc.score = float(1 / (1 + exp(-(doc.score / float(BM25_SCALING_FACTOR)))))
 
     def _bm25_retrieval(
-        self,
-        query: str,
-        *,
-        filters: Optional[dict[str, Any]] = None,
-        fuzziness: Union[int, str] = "AUTO",
-        top_k: int = 10,
-        scale_score: bool = False,
-        all_terms_must_match: bool = False,
-        custom_query: Optional[dict[str, Any]] = None,
+            self,
+            query: str,
+            *,
+            filters: Optional[dict[str, Any]] = None,
+            fuzziness: Union[int, str] = "AUTO",
+            top_k: int = 10,
+            scale_score: bool = False,
+            all_terms_must_match: bool = False,
+            custom_query: Optional[dict[str, Any]] = None,
     ) -> list[Document]:
         """
         Retrieves documents that match the provided `query` using the BM25 search algorithm.
@@ -860,15 +860,15 @@ def _bm25_retrieval(
         return documents
 
     async def _bm25_retrieval_async(
-        self,
-        query: str,
-        *,
-        filters: Optional[dict[str, Any]] = None,
-        fuzziness: str = "AUTO",
-        top_k: int = 10,
-        scale_score: bool = False,
-        all_terms_must_match: bool = False,
-        custom_query: Optional[dict[str, Any]] = None,
+            self,
+            query: str,
+            *,
+            filters: Optional[dict[str, Any]] = None,
+            fuzziness: str = "AUTO",
+            top_k: int = 10,
+            scale_score: bool = False,
+            all_terms_must_match: bool = False,
+            custom_query: Optional[dict[str, Any]] = None,
     ) -> list[Document]:
         """
         Asynchronously retrieves documents that match the provided `query` using the BM25 search algorithm.
@@ -900,13 +900,13 @@ async def _bm25_retrieval_async(
         return documents
 
     def _prepare_embedding_search_request(
-        self,
-        *,
-        query_embedding: list[float],
-        filters: Optional[dict[str, Any]],
-        top_k: int,
-        custom_query: Optional[dict[str, Any]],
-        efficient_filtering: bool = False,
+            self,
+            *,
+            query_embedding: list[float],
+            filters: Optional[dict[str, Any]],
+            top_k: int,
+            custom_query: Optional[dict[str, Any]],
+            efficient_filtering: bool = False,
     ) -> dict[str, Any]:
         if not query_embedding:
             msg = "query_embedding must be a non-empty list of floats"
@@ -956,13 +956,13 @@ def _prepare_embedding_search_request(
         return body
 
     def _embedding_retrieval(
-        self,
-        query_embedding: list[float],
-        *,
-        filters: Optional[dict[str, Any]] = None,
-        top_k: int = 10,
-        custom_query: Optional[dict[str, Any]] = None,
-        efficient_filtering: bool = False,
+            self,
+            query_embedding: list[float],
+            *,
+            filters: Optional[dict[str, Any]] = None,
+            top_k: int = 10,
+            custom_query: Optional[dict[str, Any]] = None,
+            efficient_filtering: bool = False,
     ) -> list[Document]:
         """
         Retrieves documents that are most similar to the query embedding using a vector similarity metric.
@@ -986,13 +986,13 @@ def _embedding_retrieval(
         return self._search_documents(search_params)
 
     async def _embedding_retrieval_async(
-        self,
-        query_embedding: list[float],
-        *,
-        filters: Optional[dict[str, Any]] = None,
-        top_k: int = 10,
-        custom_query: Optional[dict[str, Any]] = None,
-        efficient_filtering: bool = False,
+            self,
+            query_embedding: list[float],
+            *,
+            filters: Optional[dict[str, Any]] = None,
+            top_k: int = 10,
+            custom_query: Optional[dict[str, Any]] = None,
+            efficient_filtering: bool = False,
     ) -> list[Document]:
         """
         Asynchronously retrieves documents that are most similar to the query embedding using a vector similarity
@@ -1032,3 +1032,49 @@ def _render_custom_query(self, custom_query: Any, substitutions: dict[str, Any])
             return substitutions.get(custom_query, custom_query)
 
         return custom_query
+
+    def count_documents_by_filter(self, filters: dict) -> int:
+        """
+        Returns the number of documents that match the provided filters.
+
+        :param filters: The filters to apply to count documents.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns: The number of documents that match the filters.
+        """
+        self._ensure_initialized()
+        assert self._client is not None
+
+        normalized_filters = normalize_filters(filters)
+        body = {"query": {"bool": {"filter": normalized_filters}}}
+        return self._client.count(index=self._index, body=body)["count"]
+
+    async def count_documents_by_filter_async(self, filters: dict) -> int:
+        """
+        Asynchronously returns the number of documents that match the provided filters.
+
+        :param filters: The filters to apply to count documents.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns: The number of documents that match the filters.
+        """
+        await self._ensure_initialized_async()
+        assert self._async_client is not None
+
+        normalized_filters = normalize_filters(filters)
+        body = {"query": {"bool": {"filter": normalized_filters}}}
+        return (await self._async_client.count(index=self._index, body=body))["count"]
+
+    def count_distinct_values_by_filter(self, filters: dict) -> dict[str, int]:
+        pass
+
+    def get_fields_info(self) -> dict[str, dict]:
+        pass
+
+    def get_field_min_max(self, metadata_field: str) -> dict[str, Any]:
+        pass
+
+    def get_field_unique_values(
+            self, metadata_field: str, search_term: str | None, from_: int, size: int)-> tuple[list[str], int]:
+        pass
+
+    def query_sql(self, query: str):
+        pass
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index d74cdbaa80..c35a9449b2 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -576,3 +576,29 @@ def test_update_by_filter(self, document_store: OpenSearchDocumentStore):
         )
         assert len(draft_docs) == 1
         assert draft_docs[0].meta["category"] == "B"
+
+    def test_count_documents_by_filter(self, document_store: OpenSearchDocumentStore):
+        docs = [
+            Document(content="Doc 1", meta={"category": "A", "status": "active"}),
+            Document(content="Doc 2", meta={"category": "B", "status": "active"}),
+            Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
+            Document(content="Doc 4", meta={"category": "A", "status": "active"}),
+        ]
+        document_store.write_documents(docs)
+        assert document_store.count_documents() == 4
+
+        count_a = document_store.count_documents_by_filter(
+            filters={"field": "meta.category", "operator": "==", "value": "A"}
+        )
+        assert count_a == 3
+
+        count_a_active = document_store.count_documents_by_filter(
+            filters={
+                "operator": "AND",
+                "conditions": [
+                    {"field": "meta.category", "operator": "==", "value": "A"},
+                    {"field": "meta.status", "operator": "==", "value": "active"},
+                ],
+            }
+        )
+        assert count_a_active == 2
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 95444dae4d..783bfa6d11 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -238,6 +238,38 @@ async def test_filter_documents(self, document_store: OpenSearchDocumentStore):
         assert result[0].content == "2"
         assert result[0].meta["number"] == 100
 
+    @pytest.mark.asyncio
+    async def test_count_documents_by_filter(self, document_store: OpenSearchDocumentStore):
+        filterable_docs = [
+            Document(content="Doc 1", meta={"category": "A", "status": "active"}),
+            Document(content="Doc 2", meta={"category": "B", "status": "active"}),
+            Document(content="Doc 3", meta={"category": "A", "status": "inactive"}),
+            Document(content="Doc 4", meta={"category": "A", "status": "active"}),
+        ]
+        await document_store.write_documents_async(filterable_docs)
+        assert await document_store.count_documents_async() == 4
+
+        count_a = await document_store.count_documents_by_filter_async(
+            filters={"field": "meta.category", "operator": "==", "value": "A"}
+        )
+        assert count_a == 3
+
+        count_active = await document_store.count_documents_by_filter_async(
+            filters={"field": "meta.status", "operator": "==", "value": "active"}
+        )
+        assert count_active == 3
+
+        count_a_active = await document_store.count_documents_by_filter_async(
+            filters={
+                "operator": "AND",
+                "conditions": [
+                    {"field": "meta.category", "operator": "==", "value": "A"},
+                    {"field": "meta.status", "operator": "==", "value": "active"},
+                ],
+            }
+        )
+        assert count_a_active == 2
+
     @pytest.mark.asyncio
     async def test_delete_documents(self, document_store: OpenSearchDocumentStore):
         doc = Document(content="test doc")

From 842da6a86acab107a1518118c67391e586b45532 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 15:18:13 +0100
Subject: [PATCH 02/58] formmatting

---
 .../opensearch/document_store.py              | 147 +++++++++---------
 1 file changed, 74 insertions(+), 73 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index b9c45e690e..53a81b182d 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -64,25 +64,25 @@ class OpenSearchDocumentStore:
     """
 
     def __init__(
-            self,
-            *,
-            hosts: Optional[Hosts] = None,
-            index: str = "default",
-            max_chunk_bytes: int = DEFAULT_MAX_CHUNK_BYTES,
-            embedding_dim: int = 768,
-            return_embedding: bool = False,
-            method: Optional[dict[str, Any]] = None,
-            mappings: Optional[dict[str, Any]] = None,
-            settings: Optional[dict[str, Any]] = DEFAULT_SETTINGS,
-            create_index: bool = True,
-            http_auth: Any = (
-                    Secret.from_env_var("OPENSEARCH_USERNAME", strict=False),  # noqa: B008
-                    Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False),  # noqa: B008
-            ),
-            use_ssl: Optional[bool] = None,
-            verify_certs: Optional[bool] = None,
-            timeout: Optional[int] = None,
-            **kwargs: Any,
+        self,
+        *,
+        hosts: Optional[Hosts] = None,
+        index: str = "default",
+        max_chunk_bytes: int = DEFAULT_MAX_CHUNK_BYTES,
+        embedding_dim: int = 768,
+        return_embedding: bool = False,
+        method: Optional[dict[str, Any]] = None,
+        mappings: Optional[dict[str, Any]] = None,
+        settings: Optional[dict[str, Any]] = DEFAULT_SETTINGS,
+        create_index: bool = True,
+        http_auth: Any = (
+            Secret.from_env_var("OPENSEARCH_USERNAME", strict=False),  # noqa: B008
+            Secret.from_env_var("OPENSEARCH_PASSWORD", strict=False),  # noqa: B008
+        ),
+        use_ssl: Optional[bool] = None,
+        verify_certs: Optional[bool] = None,
+        timeout: Optional[int] = None,
+        **kwargs: Any,
     ) -> None:
         """
         Creates a new OpenSearchDocumentStore instance.
@@ -174,10 +174,10 @@ def _get_default_mappings(self) -> dict[str, Any]:
         return default_mappings
 
     def create_index(
-            self,
-            index: Optional[str] = None,
-            mappings: Optional[dict[str, Any]] = None,
-            settings: Optional[dict[str, Any]] = None,
+        self,
+        index: Optional[str] = None,
+        mappings: Optional[dict[str, Any]] = None,
+        settings: Optional[dict[str, Any]] = None,
     ) -> None:
         """
         Creates an index in OpenSearch.
@@ -399,7 +399,7 @@ async def filter_documents_async(self, filters: Optional[dict[str, Any]] = None)
         return await self._search_documents_async(self._prepare_filter_search_request(filters))
 
     def _prepare_bulk_write_request(
-            self, *, documents: list[Document], policy: DuplicatePolicy, is_async: bool
+        self, *, documents: list[Document], policy: DuplicatePolicy, is_async: bool
     ) -> dict[str, Any]:
         if len(documents) > 0 and not isinstance(documents[0], Document):
             msg = "param 'documents' must contain a list of objects of type Document"
@@ -487,7 +487,7 @@ def write_documents(self, documents: list[Document], policy: DuplicatePolicy = D
         return documents_written
 
     async def write_documents_async(
-            self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE
+        self, documents: list[Document], policy: DuplicatePolicy = DuplicatePolicy.NONE
     ) -> int:
         """
         Asynchronously writes documents to the document store.
@@ -757,14 +757,14 @@ async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str,
             raise DocumentStoreError(msg) from e
 
     def _prepare_bm25_search_request(
-            self,
-            *,
-            query: str,
-            filters: Optional[dict[str, Any]],
-            fuzziness: Union[int, str],
-            top_k: int,
-            all_terms_must_match: bool,
-            custom_query: Optional[dict[str, Any]],
+        self,
+        *,
+        query: str,
+        filters: Optional[dict[str, Any]],
+        fuzziness: Union[int, str],
+        top_k: int,
+        all_terms_must_match: bool,
+        custom_query: Optional[dict[str, Any]],
     ) -> dict[str, Any]:
         if not query:
             body: dict[str, Any] = {"query": {"bool": {"must": {"match_all": {}}}}}
@@ -822,15 +822,15 @@ def _postprocess_bm25_search_results(*, results: list[Document], scale_score: bo
             doc.score = float(1 / (1 + exp(-(doc.score / float(BM25_SCALING_FACTOR)))))
 
     def _bm25_retrieval(
-            self,
-            query: str,
-            *,
-            filters: Optional[dict[str, Any]] = None,
-            fuzziness: Union[int, str] = "AUTO",
-            top_k: int = 10,
-            scale_score: bool = False,
-            all_terms_must_match: bool = False,
-            custom_query: Optional[dict[str, Any]] = None,
+        self,
+        query: str,
+        *,
+        filters: Optional[dict[str, Any]] = None,
+        fuzziness: Union[int, str] = "AUTO",
+        top_k: int = 10,
+        scale_score: bool = False,
+        all_terms_must_match: bool = False,
+        custom_query: Optional[dict[str, Any]] = None,
     ) -> list[Document]:
         """
         Retrieves documents that match the provided `query` using the BM25 search algorithm.
@@ -860,15 +860,15 @@ def _bm25_retrieval(
         return documents
 
     async def _bm25_retrieval_async(
-            self,
-            query: str,
-            *,
-            filters: Optional[dict[str, Any]] = None,
-            fuzziness: str = "AUTO",
-            top_k: int = 10,
-            scale_score: bool = False,
-            all_terms_must_match: bool = False,
-            custom_query: Optional[dict[str, Any]] = None,
+        self,
+        query: str,
+        *,
+        filters: Optional[dict[str, Any]] = None,
+        fuzziness: str = "AUTO",
+        top_k: int = 10,
+        scale_score: bool = False,
+        all_terms_must_match: bool = False,
+        custom_query: Optional[dict[str, Any]] = None,
     ) -> list[Document]:
         """
         Asynchronously retrieves documents that match the provided `query` using the BM25 search algorithm.
@@ -900,13 +900,13 @@ async def _bm25_retrieval_async(
         return documents
 
     def _prepare_embedding_search_request(
-            self,
-            *,
-            query_embedding: list[float],
-            filters: Optional[dict[str, Any]],
-            top_k: int,
-            custom_query: Optional[dict[str, Any]],
-            efficient_filtering: bool = False,
+        self,
+        *,
+        query_embedding: list[float],
+        filters: Optional[dict[str, Any]],
+        top_k: int,
+        custom_query: Optional[dict[str, Any]],
+        efficient_filtering: bool = False,
     ) -> dict[str, Any]:
         if not query_embedding:
             msg = "query_embedding must be a non-empty list of floats"
@@ -956,13 +956,13 @@ def _prepare_embedding_search_request(
         return body
 
     def _embedding_retrieval(
-            self,
-            query_embedding: list[float],
-            *,
-            filters: Optional[dict[str, Any]] = None,
-            top_k: int = 10,
-            custom_query: Optional[dict[str, Any]] = None,
-            efficient_filtering: bool = False,
+        self,
+        query_embedding: list[float],
+        *,
+        filters: Optional[dict[str, Any]] = None,
+        top_k: int = 10,
+        custom_query: Optional[dict[str, Any]] = None,
+        efficient_filtering: bool = False,
     ) -> list[Document]:
         """
         Retrieves documents that are most similar to the query embedding using a vector similarity metric.
@@ -986,13 +986,13 @@ def _embedding_retrieval(
         return self._search_documents(search_params)
 
     async def _embedding_retrieval_async(
-            self,
-            query_embedding: list[float],
-            *,
-            filters: Optional[dict[str, Any]] = None,
-            top_k: int = 10,
-            custom_query: Optional[dict[str, Any]] = None,
-            efficient_filtering: bool = False,
+        self,
+        query_embedding: list[float],
+        *,
+        filters: Optional[dict[str, Any]] = None,
+        top_k: int = 10,
+        custom_query: Optional[dict[str, Any]] = None,
+        efficient_filtering: bool = False,
     ) -> list[Document]:
         """
         Asynchronously retrieves documents that are most similar to the query embedding using a vector similarity
@@ -1073,7 +1073,8 @@ def get_field_min_max(self, metadata_field: str) -> dict[str, Any]:
         pass
 
     def get_field_unique_values(
-            self, metadata_field: str, search_term: str | None, from_: int, size: int)-> tuple[list[str], int]:
+        self, metadata_field: str, search_term: str | None, from_: int, size: int
+    ) -> tuple[list[str], int]:
         pass
 
     def query_sql(self, query: str):

From a28bb2aecfeb2699c5ba6310b748c6347abbf388 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 16:19:32 +0100
Subject: [PATCH 03/58] adding count distinct metadata values

---
 .../opensearch/document_store.py              | 106 +++++++++++++++++-
 .../opensearch/tests/test_document_store.py   |  47 ++++++++
 .../tests/test_document_store_async.py        |  48 ++++++++
 3 files changed, 200 insertions(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 53a81b182d..09a1ab9502 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1064,7 +1064,111 @@ async def count_documents_by_filter_async(self, filters: dict) -> int:
         return (await self._async_client.count(index=self._index, body=body))["count"]
 
     def count_distinct_values_by_filter(self, filters: dict) -> dict[str, int]:
-        pass
+        """
+        Returns the number of unique values for each meta field of the documents that match the provided filters.
+
+        :param filters: The filters to apply to count documents.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns: The number of unique values for each meta field of the documents that match the filters.
+        """
+        self._ensure_initialized()
+        assert self._client is not None
+
+        # use index mapping to get all fields
+        mapping = self._client.indices.get_mapping(index=self._index)
+        index_mapping = mapping[self._index]["mappings"]["properties"]
+
+        # aggregations for each metadata field (exclude special fields)
+        special_fields = {"content", "embedding", "id", "score", "blob", "sparse_embedding"}
+        aggs = {}
+        for field_name in index_mapping.keys():
+            if field_name not in special_fields:
+                aggs[f"{field_name}_cardinality"] = {"cardinality": {"field": field_name}}
+
+        if not aggs:
+            return {}
+
+        # search query with filters and aggregations
+        if filters:
+            normalized_filters = normalize_filters(filters)
+            body = {
+                "query": {"bool": {"filter": normalized_filters}},
+                "aggs": aggs,
+                "size": 0,  # We only need aggregations, not documents
+            }
+        else:
+            # No filters - match all documents
+            body = {
+                "query": {"match_all": {}},
+                "aggs": aggs,
+                "size": 0,  # We only need aggregations, not documents
+            }
+        result = self._client.search(index=self._index, body=body)
+
+        # extract cardinality values for each field
+        distinct_counts = {}
+        aggregations = result.get("aggregations", {})
+        for field_name in index_mapping.keys():
+            if field_name not in special_fields:
+                agg_key = f"{field_name}_cardinality"
+                if agg_key in aggregations:
+                    distinct_counts[field_name] = aggregations[agg_key]["value"]
+
+        return distinct_counts
+
+    async def count_distinct_values_by_filter_async(self, filters: dict) -> dict[str, int]:
+        """
+        Asynchronously returns the number of unique values for each meta field of the documents that match the
+        provided filters.
+
+        :param filters: The filters to apply to count documents.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns: The number of unique values for each meta field of the documents that match the filters.
+        """
+        await self._ensure_initialized_async()
+        assert self._async_client is not None
+
+        # use index mapping to get all fields
+        mapping = await self._async_client.indices.get_mapping(index=self._index)
+        index_mapping = mapping[self._index]["mappings"]["properties"]
+
+        # aggregations for each metadata field (exclude special fields)
+        special_fields = {"content", "embedding", "id", "score", "blob", "sparse_embedding"}
+        aggs = {}
+        for field_name in index_mapping.keys():
+            if field_name not in special_fields:
+                aggs[f"{field_name}_cardinality"] = {"cardinality": {"field": field_name}}
+
+        if not aggs:
+            return {}
+
+        # search query with filters and aggregations
+        if filters:
+            normalized_filters = normalize_filters(filters)
+            body = {
+                "query": {"bool": {"filter": normalized_filters}},
+                "aggs": aggs,
+                "size": 0,  # We only need aggregations, not documents
+            }
+        else:
+            # No filters - match all documents
+            body = {
+                "query": {"match_all": {}},
+                "aggs": aggs,
+                "size": 0,  # We only need aggregations, not documents
+            }
+        result = await self._async_client.search(index=self._index, body=body)
+
+        # extract cardinality values for each field
+        distinct_counts = {}
+        aggregations = result.get("aggregations", {})
+        for field_name in index_mapping.keys():
+            if field_name not in special_fields:
+                agg_key = f"{field_name}_cardinality"
+                if agg_key in aggregations:
+                    distinct_counts[field_name] = aggregations[agg_key]["value"]
+
+        return distinct_counts
 
     def get_fields_info(self) -> dict[str, dict]:
         pass
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index c35a9449b2..393d1cc74d 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -602,3 +602,50 @@ def test_count_documents_by_filter(self, document_store: OpenSearchDocumentStore
             }
         )
         assert count_a_active == 2
+
+    def test_count_distinct_values_by_filter(self, document_store: OpenSearchDocumentStore):
+        docs = [
+            Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
+            Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
+            Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}),
+            Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}),
+            Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}),
+        ]
+        document_store.write_documents(docs)
+        assert document_store.count_documents() == 5
+
+        # Count distinct values for all documents
+        distinct_counts = document_store.count_distinct_values_by_filter(filters={})
+        assert distinct_counts["category"] == 3  # A, B, C
+        assert distinct_counts["status"] == 2  # active, inactive
+        assert distinct_counts["priority"] == 3  # 1, 2, 3
+
+        # Count distinct values for documents with category="A"
+        distinct_counts_a = document_store.count_distinct_values_by_filter(
+            filters={"field": "meta.category", "operator": "==", "value": "A"}
+        )
+        assert distinct_counts_a["category"] == 1  # Only A
+        assert distinct_counts_a["status"] == 2  # active, inactive
+        assert distinct_counts_a["priority"] == 2  # 1, 3
+
+        # Count distinct values for documents with status="active"
+        distinct_counts_active = document_store.count_distinct_values_by_filter(
+            filters={"field": "meta.status", "operator": "==", "value": "active"}
+        )
+        assert distinct_counts_active["category"] == 3  # A, B, C
+        assert distinct_counts_active["status"] == 1  # Only active
+        assert distinct_counts_active["priority"] == 3  # 1, 2, 3
+
+        # Count distinct values with complex filter (category="A" AND status="active")
+        distinct_counts_a_active = document_store.count_distinct_values_by_filter(
+            filters={
+                "operator": "AND",
+                "conditions": [
+                    {"field": "meta.category", "operator": "==", "value": "A"},
+                    {"field": "meta.status", "operator": "==", "value": "active"},
+                ],
+            }
+        )
+        assert distinct_counts_a_active["category"] == 1  # Only A
+        assert distinct_counts_a_active["status"] == 1  # Only active
+        assert distinct_counts_a_active["priority"] == 2  # 1, 3
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 783bfa6d11..f3cd7922ff 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -270,6 +270,54 @@ async def test_count_documents_by_filter(self, document_store: OpenSearchDocumen
         )
         assert count_a_active == 2
 
+    @pytest.mark.asyncio
+    async def test_count_distinct_values_by_filter(self, document_store: OpenSearchDocumentStore):
+        filterable_docs = [
+            Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
+            Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
+            Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}),
+            Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}),
+            Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}),
+        ]
+        await document_store.write_documents_async(filterable_docs)
+        assert await document_store.count_documents_async() == 5
+
+        # count distinct values for all documents
+        distinct_counts = await document_store.count_distinct_values_by_filter_async(filters={})
+        assert distinct_counts["category"] == 3  # A, B, C
+        assert distinct_counts["status"] == 2  # active, inactive
+        assert distinct_counts["priority"] == 3  # 1, 2, 3
+
+        # count distinct values for documents with category="A"
+        distinct_counts_a = await document_store.count_distinct_values_by_filter_async(
+            filters={"field": "meta.category", "operator": "==", "value": "A"}
+        )
+        assert distinct_counts_a["category"] == 1  # Only A
+        assert distinct_counts_a["status"] == 2  # active, inactive
+        assert distinct_counts_a["priority"] == 2  # 1, 3
+
+        # count distinct values for documents with status="active"
+        distinct_counts_active = await document_store.count_distinct_values_by_filter_async(
+            filters={"field": "meta.status", "operator": "==", "value": "active"}
+        )
+        assert distinct_counts_active["category"] == 3  # A, B, C
+        assert distinct_counts_active["status"] == 1  # Only active
+        assert distinct_counts_active["priority"] == 3  # 1, 2, 3
+
+        # count distinct values with complex filter (category="A" AND status="active")
+        distinct_counts_a_active = await document_store.count_distinct_values_by_filter_async(
+            filters={
+                "operator": "AND",
+                "conditions": [
+                    {"field": "meta.category", "operator": "==", "value": "A"},
+                    {"field": "meta.status", "operator": "==", "value": "active"},
+                ],
+            }
+        )
+        assert distinct_counts_a_active["category"] == 1  # Only A
+        assert distinct_counts_a_active["status"] == 1  # Only active
+        assert distinct_counts_a_active["priority"] == 2  # 1, 3
+
     @pytest.mark.asyncio
     async def test_delete_documents(self, document_store: OpenSearchDocumentStore):
         doc = Document(content="test doc")

From b0b594caf5d3ee8e2c95d18fb83991c5c35e9535 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 16:26:20 +0100
Subject: [PATCH 04/58] refactoring to reduce duplicated code

---
 .../opensearch/document_store.py              | 115 ++++++++++--------
 1 file changed, 61 insertions(+), 54 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 09a1ab9502..a3533b241e 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1063,59 +1063,92 @@ async def count_documents_by_filter_async(self, filters: dict) -> int:
         body = {"query": {"bool": {"filter": normalized_filters}}}
         return (await self._async_client.count(index=self._index, body=body))["count"]
 
-    def count_distinct_values_by_filter(self, filters: dict) -> dict[str, int]:
+    @staticmethod
+    def _build_cardinality_aggregations(index_mapping: dict[str, Any]) -> dict[str, Any]:
         """
-        Returns the number of unique values for each meta field of the documents that match the provided filters.
+        Builds cardinality aggregations for all metadata fields in the index mapping.
 
-        :param filters: The filters to apply to count documents.
-            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
-        :returns: The number of unique values for each meta field of the documents that match the filters.
+        :param index_mapping: The properties mapping from the index.
+        :returns: Dictionary of aggregations keyed by field name.
         """
-        self._ensure_initialized()
-        assert self._client is not None
-
-        # use index mapping to get all fields
-        mapping = self._client.indices.get_mapping(index=self._index)
-        index_mapping = mapping[self._index]["mappings"]["properties"]
-
-        # aggregations for each metadata field (exclude special fields)
         special_fields = {"content", "embedding", "id", "score", "blob", "sparse_embedding"}
         aggs = {}
         for field_name in index_mapping.keys():
             if field_name not in special_fields:
                 aggs[f"{field_name}_cardinality"] = {"cardinality": {"field": field_name}}
+        return aggs
 
-        if not aggs:
-            return {}
+    @staticmethod
+    def _build_distinct_values_query_body(filters: dict, aggs: dict[str, Any]) -> dict[str, Any]:
+        """
+        Builds the query body for distinct values counting with filters and aggregations.
 
-        # search query with filters and aggregations
+        :param filters: The filters to apply, or empty dict for no filters.
+        :param aggs: The aggregations to include in the query.
+        :returns: The query body dictionary.
+        """
         if filters:
             normalized_filters = normalize_filters(filters)
-            body = {
+            return {
                 "query": {"bool": {"filter": normalized_filters}},
                 "aggs": aggs,
                 "size": 0,  # We only need aggregations, not documents
             }
         else:
             # No filters - match all documents
-            body = {
+            return {
                 "query": {"match_all": {}},
                 "aggs": aggs,
                 "size": 0,  # We only need aggregations, not documents
             }
-        result = self._client.search(index=self._index, body=body)
 
-        # extract cardinality values for each field
+    @staticmethod
+    def _extract_distinct_counts_from_aggregations(
+        aggregations: dict[str, Any], index_mapping: dict[str, Any]
+    ) -> dict[str, int]:
+        """
+        Extracts distinct value counts from search result aggregations.
+
+        :param aggregations: The aggregations from the search result.
+        :param index_mapping: The properties mapping from the index.
+        :returns: Dictionary mapping field names to their distinct value counts.
+        """
+        special_fields = {"content", "embedding", "id", "score", "blob", "sparse_embedding"}
         distinct_counts = {}
-        aggregations = result.get("aggregations", {})
         for field_name in index_mapping.keys():
             if field_name not in special_fields:
                 agg_key = f"{field_name}_cardinality"
                 if agg_key in aggregations:
                     distinct_counts[field_name] = aggregations[agg_key]["value"]
-
         return distinct_counts
 
+    def count_distinct_values_by_filter(self, filters: dict) -> dict[str, int]:
+        """
+        Returns the number of unique values for each meta field of the documents that match the provided filters.
+
+        :param filters: The filters to apply to count documents.
+            For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :returns: The number of unique values for each meta field of the documents that match the filters.
+        """
+        self._ensure_initialized()
+        assert self._client is not None
+
+        # use index mapping to get all fields
+        mapping = self._client.indices.get_mapping(index=self._index)
+        index_mapping = mapping[self._index]["mappings"]["properties"]
+
+        # build aggregations for each metadata field
+        aggs = self._build_cardinality_aggregations(index_mapping)
+        if not aggs:
+            return {}
+
+        # build and execute search query
+        body = self._build_distinct_values_query_body(filters, aggs)
+        result = self._client.search(index=self._index, body=body)
+
+        # extract cardinality values from aggregations
+        return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
+
     async def count_distinct_values_by_filter_async(self, filters: dict) -> dict[str, int]:
         """
         Asynchronously returns the number of unique values for each meta field of the documents that match the
@@ -1132,43 +1165,17 @@ async def count_distinct_values_by_filter_async(self, filters: dict) -> dict[str
         mapping = await self._async_client.indices.get_mapping(index=self._index)
         index_mapping = mapping[self._index]["mappings"]["properties"]
 
-        # aggregations for each metadata field (exclude special fields)
-        special_fields = {"content", "embedding", "id", "score", "blob", "sparse_embedding"}
-        aggs = {}
-        for field_name in index_mapping.keys():
-            if field_name not in special_fields:
-                aggs[f"{field_name}_cardinality"] = {"cardinality": {"field": field_name}}
-
+        # build aggregations for each metadata field
+        aggs = self._build_cardinality_aggregations(index_mapping)
         if not aggs:
             return {}
 
-        # search query with filters and aggregations
-        if filters:
-            normalized_filters = normalize_filters(filters)
-            body = {
-                "query": {"bool": {"filter": normalized_filters}},
-                "aggs": aggs,
-                "size": 0,  # We only need aggregations, not documents
-            }
-        else:
-            # No filters - match all documents
-            body = {
-                "query": {"match_all": {}},
-                "aggs": aggs,
-                "size": 0,  # We only need aggregations, not documents
-            }
+        # build and execute search query
+        body = self._build_distinct_values_query_body(filters, aggs)
         result = await self._async_client.search(index=self._index, body=body)
 
-        # extract cardinality values for each field
-        distinct_counts = {}
-        aggregations = result.get("aggregations", {})
-        for field_name in index_mapping.keys():
-            if field_name not in special_fields:
-                agg_key = f"{field_name}_cardinality"
-                if agg_key in aggregations:
-                    distinct_counts[field_name] = aggregations[agg_key]["value"]
-
-        return distinct_counts
+        # extract cardinality values from aggregations
+        return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
 
     def get_fields_info(self) -> dict[str, dict]:
         pass

From b23274fd3331c439f7eab7f7bbbd1865ba5e131f Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 16:59:04 +0100
Subject: [PATCH 05/58] adding get metadata info

---
 .../opensearch/document_store.py              | 25 ++++++++++++++++++-
 .../opensearch/tests/test_document_store.py   | 24 ++++++++++++++++++
 .../tests/test_document_store_async.py        | 25 +++++++++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index a3533b241e..88a7150e81 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1178,7 +1178,30 @@ async def count_distinct_values_by_filter_async(self, filters: dict) -> dict[str
         return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
 
     def get_fields_info(self) -> dict[str, dict]:
-        pass
+        """
+        Returns the information about the fields in the index.
+
+        :returns: The information about the fields in the index.
+        """
+        self._ensure_initialized()
+        assert self._client is not None
+
+        mapping = self._client.indices.get_mapping(index=self._index)
+        index_mapping = mapping[self._index]["mappings"]["properties"]
+        return index_mapping
+
+    async def get_fields_info_async(self) -> dict[str, dict]:
+        """
+        Asynchronously returns the information about the fields in the index.
+
+        :returns: The information about the fields in the index.
+        """
+        await self._ensure_initialized_async()
+        assert self._async_client is not None
+
+        mapping = await self._async_client.indices.get_mapping(index=self._index)
+        index_mapping = mapping[self._index]["mappings"]["properties"]
+        return index_mapping
 
     def get_field_min_max(self, metadata_field: str) -> dict[str, Any]:
         pass
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 393d1cc74d..a8d033dcbd 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -649,3 +649,27 @@ def test_count_distinct_values_by_filter(self, document_store: OpenSearchDocumen
         assert distinct_counts_a_active["category"] == 1  # Only A
         assert distinct_counts_a_active["status"] == 1  # Only active
         assert distinct_counts_a_active["priority"] == 2  # 1, 3
+
+    def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
+        docs = [
+            Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
+            Document(content="Doc 2", meta={"category": "B", "status": "inactive"}),
+        ]
+        document_store.write_documents(docs)
+
+        fields_info = document_store.get_fields_info()
+
+        # Verify that fields_info contains expected fields
+        assert "content" in fields_info
+        assert "embedding" in fields_info
+        assert "category" in fields_info
+        assert "status" in fields_info
+        assert "priority" in fields_info
+
+        # Verify field types
+        assert fields_info["content"]["type"] == "text"
+        assert fields_info["embedding"]["type"] == "knn_vector"
+        # Metadata fields should be keyword type (from dynamic templates)
+        assert fields_info["category"]["type"] == "keyword"
+        assert fields_info["status"]["type"] == "keyword"
+        assert fields_info["priority"]["type"] == "long"
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index f3cd7922ff..58844ea1b5 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -436,3 +436,28 @@ async def test_update_by_filter_async(self, document_store: OpenSearchDocumentSt
         )
         assert len(draft_docs) == 1
         assert draft_docs[0].meta["category"] == "B"
+
+    @pytest.mark.asyncio
+    async def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
+        filterable_docs = [
+            Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
+            Document(content="Doc 2", meta={"category": "B", "status": "inactive"}),
+        ]
+        await document_store.write_documents_async(filterable_docs)
+
+        fields_info = await document_store.get_fields_info_async()
+
+        # Verify that fields_info contains expected fields
+        assert "content" in fields_info
+        assert "embedding" in fields_info
+        assert "category" in fields_info
+        assert "status" in fields_info
+        assert "priority" in fields_info
+
+        # Verify field types
+        assert fields_info["content"]["type"] == "text"
+        assert fields_info["embedding"]["type"] == "knn_vector"
+        # Metadata fields should be keyword type (from dynamic templates)
+        assert fields_info["category"]["type"] == "keyword"
+        assert fields_info["status"]["type"] == "keyword"
+        assert fields_info["priority"]["type"] == "long"

From 22e160d6c800fab3f43d2ae36e717148d5724f45 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 17:17:45 +0100
Subject: [PATCH 06/58] adding get_field_max_min

---
 .../opensearch/document_store.py              | 66 ++++++++++++++++++-
 .../opensearch/tests/test_document_store.py   | 26 ++++++++
 .../tests/test_document_store_async.py        | 27 ++++++++
 3 files changed, 118 insertions(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 88a7150e81..4b508a1700 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1203,8 +1203,72 @@ async def get_fields_info_async(self) -> dict[str, dict]:
         index_mapping = mapping[self._index]["mappings"]["properties"]
         return index_mapping
 
+    @staticmethod
+    def _normalize_metadata_field_name(metadata_field: str) -> str:
+        """
+        Normalizes a metadata field name by removing the "meta." prefix if present.
+        """
+        return metadata_field[5:] if metadata_field.startswith("meta.") else metadata_field
+
+    @staticmethod
+    def _build_min_max_query_body(field_name: str) -> dict[str, Any]:
+        """
+        Builds the query body for getting min and max values using stats aggregation.
+        """
+        return {
+            "query": {"match_all": {}},
+            "aggs": {
+                "field_stats": {
+                    "stats": {
+                        "field": field_name,
+                    }
+                }
+            },
+            "size": 0,  # We only need aggregations, not documents
+        }
+
+    @staticmethod
+    def _extract_min_max_from_stats(stats: dict[str, Any]) -> dict[str, Any]:
+        """
+        Extracts min and max values from stats aggregation results.
+        """
+        min_value = stats.get("min")
+        max_value = stats.get("max")
+        return {"min": min_value, "max": max_value}
+
     def get_field_min_max(self, metadata_field: str) -> dict[str, Any]:
-        pass
+        """
+        Returns the minimum and maximum values for the given metadata field.
+
+        :param metadata_field: The metadata field to get the minimum and maximum values for.
+        :returns: The minimum and maximum values for the given metadata field.
+        """
+        self._ensure_initialized()
+        assert self._client is not None
+
+        field_name = self._normalize_metadata_field_name(metadata_field)
+        body = self._build_min_max_query_body(field_name)
+        result = self._client.search(index=self._index, body=body)
+        stats = result.get("aggregations", {}).get("field_stats", {})
+
+        return self._extract_min_max_from_stats(stats)
+
+    async def get_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
+        """
+        Asynchronously returns the minimum and maximum values for the given metadata field.
+
+        :param metadata_field: The metadata field to get the minimum and maximum values for.
+        :returns: The minimum and maximum values for the given metadata field.
+        """
+        await self._ensure_initialized_async()
+        assert self._async_client is not None
+
+        field_name = self._normalize_metadata_field_name(metadata_field)
+        body = self._build_min_max_query_body(field_name)
+        result = await self._async_client.search(index=self._index, body=body)
+        stats = result.get("aggregations", {}).get("field_stats", {})
+
+        return self._extract_min_max_from_stats(stats)
 
     def get_field_unique_values(
         self, metadata_field: str, search_term: str | None, from_: int, size: int
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index a8d033dcbd..da34210be5 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -673,3 +673,29 @@ def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
         assert fields_info["category"]["type"] == "keyword"
         assert fields_info["status"]["type"] == "keyword"
         assert fields_info["priority"]["type"] == "long"
+
+    def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
+        docs = [
+            Document(content="Doc 1", meta={"priority": 1, "rating": 10}),
+            Document(content="Doc 2", meta={"priority": 5, "rating": 20}),
+            Document(content="Doc 3", meta={"priority": 3, "rating": 15}),
+            Document(content="Doc 4", meta={"priority": 10, "rating": 5}),
+        ]
+        document_store.write_documents(docs)
+
+        # Test with "meta." prefix for integer field
+        min_max_priority = document_store.get_field_min_max("meta.priority")
+        assert min_max_priority["min"] == 1
+        assert min_max_priority["max"] == 10
+
+        # Test with "meta." prefix for another integer field
+        min_max_rating = document_store.get_field_min_max("meta.rating")
+        assert min_max_rating["min"] == 5
+        assert min_max_rating["max"] == 20
+
+        # Test with single value
+        single_doc = [Document(content="Doc 5", meta={"single_value": 42})]
+        document_store.write_documents(single_doc)
+        min_max_single = document_store.get_field_min_max("meta.single_value")
+        assert min_max_single["min"] == 42
+        assert min_max_single["max"] == 42
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 58844ea1b5..578924536f 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -461,3 +461,30 @@ async def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
         assert fields_info["category"]["type"] == "keyword"
         assert fields_info["status"]["type"] == "keyword"
         assert fields_info["priority"]["type"] == "long"
+
+    @pytest.mark.asyncio
+    async def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
+        filterable_docs = [
+            Document(content="Doc 1", meta={"priority": 1, "rating": 10}),
+            Document(content="Doc 2", meta={"priority": 5, "rating": 20}),
+            Document(content="Doc 3", meta={"priority": 3, "rating": 15}),
+            Document(content="Doc 4", meta={"priority": 10, "rating": 5}),
+        ]
+        await document_store.write_documents_async(filterable_docs)
+
+        # Test with "meta." prefix for integer field
+        min_max_priority = await document_store.get_field_min_max_async("meta.priority")
+        assert min_max_priority["min"] == 1
+        assert min_max_priority["max"] == 10
+
+        # Test with "meta." prefix for another integer field
+        min_max_rating = await document_store.get_field_min_max_async("meta.rating")
+        assert min_max_rating["min"] == 5
+        assert min_max_rating["max"] == 20
+
+        # Test with single value
+        single_doc = [Document(content="Doc 5", meta={"single_value": 42})]
+        await document_store.write_documents_async(single_doc)
+        min_max_single = await document_store.get_field_min_max_async("meta.single_value")
+        assert min_max_single["min"] == 42
+        assert min_max_single["max"] == 42

From 310846d44b82ef86db1162115043eafa0d581575 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 18:09:23 +0100
Subject: [PATCH 07/58] fixing get_field_max_min

---
 .../opensearch/document_store.py              | 25 ++++++++++++++++--
 .../opensearch/tests/test_document_store.py   | 20 ++++++++++----
 .../tests/test_document_store_async.py        | 26 ++++++++++++++-----
 3 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 4b508a1700..4dbfc7a710 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -341,10 +341,31 @@ async def count_documents_async(self) -> int:
     @staticmethod
     def _deserialize_search_hits(hits: list[dict[str, Any]]) -> list[Document]:
         out = []
+        # Fields that are not metadata (should stay at top level)
+        non_meta_fields = {"id", "content", "embedding", "blob", "sparse_embedding", "score"}
+        
         for hit in hits:
-            data = hit["_source"]
+            data = hit["_source"].copy()
+            
+            # Reconstruct metadata dict from flattened fields
+            meta = {}
+            fields_to_remove = []
+            for key, value in data.items():
+                if key not in non_meta_fields:
+                    meta[key] = value
+                    fields_to_remove.append(key)
+            
+            # Remove metadata fields from top level and add them to meta
+            for key in fields_to_remove:
+                data.pop(key, None)
+            
+            if meta:
+                data["meta"] = meta
+            
             if "highlight" in hit:
-                data["metadata"]["highlighted"] = hit["highlight"]
+                if "meta" not in data:
+                    data["meta"] = {}
+                data["meta"]["highlighted"] = hit["highlight"]
             data["score"] = hit["_score"]
             out.append(Document.from_dict(data))
 
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index da34210be5..fc8226fbb6 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -675,11 +675,16 @@ def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
         assert fields_info["priority"]["type"] == "long"
 
     def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
+        # Test with integer values
         docs = [
-            Document(content="Doc 1", meta={"priority": 1, "rating": 10}),
-            Document(content="Doc 2", meta={"priority": 5, "rating": 20}),
-            Document(content="Doc 3", meta={"priority": 3, "rating": 15}),
-            Document(content="Doc 4", meta={"priority": 10, "rating": 5}),
+            Document(content="Doc 1", meta={"priority": 1, "age": 10}),
+            Document(content="Doc 2", meta={"priority": 5, "age": 20}),
+            Document(content="Doc 3", meta={"priority": 3, "age": 15}),
+            Document(content="Doc 4", meta={"priority": 10, "age": 5}),
+            Document(content="Doc 6", meta={"rating": 10.5}),
+            Document(content="Doc 7", meta={"rating": 20.3}),
+            Document(content="Doc 8", meta={"rating": 15.7}),
+            Document(content="Doc 9", meta={"rating": 5.2}),
         ]
         document_store.write_documents(docs)
 
@@ -689,7 +694,7 @@ def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
         assert min_max_priority["max"] == 10
 
         # Test with "meta." prefix for another integer field
-        min_max_rating = document_store.get_field_min_max("meta.rating")
+        min_max_rating = document_store.get_field_min_max("meta.age")
         assert min_max_rating["min"] == 5
         assert min_max_rating["max"] == 20
 
@@ -699,3 +704,8 @@ def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
         min_max_single = document_store.get_field_min_max("meta.single_value")
         assert min_max_single["min"] == 42
         assert min_max_single["max"] == 42
+                
+        # Test with float values
+        min_max_score = document_store.get_field_min_max("meta.rating")
+        assert min_max_score["min"] == pytest.approx(5.2)
+        assert min_max_score["max"] == pytest.approx(20.3)
\ No newline at end of file
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 578924536f..fe4f8ec726 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -464,13 +464,18 @@ async def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
 
     @pytest.mark.asyncio
     async def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
-        filterable_docs = [
-            Document(content="Doc 1", meta={"priority": 1, "rating": 10}),
-            Document(content="Doc 2", meta={"priority": 5, "rating": 20}),
-            Document(content="Doc 3", meta={"priority": 3, "rating": 15}),
-            Document(content="Doc 4", meta={"priority": 10, "rating": 5}),
+        # Test with integer values
+        docs = [
+            Document(content="Doc 1", meta={"priority": 1, "age": 10}),
+            Document(content="Doc 2", meta={"priority": 5, "age": 20}),
+            Document(content="Doc 3", meta={"priority": 3, "age": 15}),
+            Document(content="Doc 4", meta={"priority": 10, "age": 5}),
+            Document(content="Doc 6", meta={"rating": 10.5}),
+            Document(content="Doc 7", meta={"rating": 20.3}),
+            Document(content="Doc 8", meta={"rating": 15.7}),
+            Document(content="Doc 9", meta={"rating": 5.2}),
         ]
-        await document_store.write_documents_async(filterable_docs)
+        await document_store.write_documents_async(docs)
 
         # Test with "meta." prefix for integer field
         min_max_priority = await document_store.get_field_min_max_async("meta.priority")
@@ -478,7 +483,7 @@ async def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
         assert min_max_priority["max"] == 10
 
         # Test with "meta." prefix for another integer field
-        min_max_rating = await document_store.get_field_min_max_async("meta.rating")
+        min_max_rating = await document_store.get_field_min_max_async("meta.age")
         assert min_max_rating["min"] == 5
         assert min_max_rating["max"] == 20
 
@@ -488,3 +493,10 @@ async def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
         min_max_single = await document_store.get_field_min_max_async("meta.single_value")
         assert min_max_single["min"] == 42
         assert min_max_single["max"] == 42
+
+        # Test with float values
+        min_max_score = await document_store.get_field_min_max_async("meta.rating")
+        assert min_max_score["min"] == pytest.approx(5.2)
+        assert min_max_score["max"] == pytest.approx(20.3)
+
+    
\ No newline at end of file

From e0be21f351fd7028a048f10eaec932d621ad3f40 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 18:50:45 +0100
Subject: [PATCH 08/58] adding get_field_unique_values

---
 .../opensearch/document_store.py              | 58 ++++++++++++++++-
 .../opensearch/tests/test_document_store.py   | 63 ++++++++++++++++++-
 2 files changed, 119 insertions(+), 2 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 4dbfc7a710..fbf6419d82 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1294,7 +1294,63 @@ async def get_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
     def get_field_unique_values(
         self, metadata_field: str, search_term: str | None, from_: int, size: int
     ) -> tuple[list[str], int]:
-        pass
+        """
+        Returns unique values for a metadata field, optionally filtered by a search term in the content.
+
+        :param metadata_field: The metadata field to get unique values for.
+        :param search_term: Optional search term to filter documents by matching in the content field.
+        :param from_: The starting index for pagination.
+        :param size: The number of unique values to return.
+        :returns: A tuple containing (list of unique values, total count of unique values).
+        """
+        self._ensure_initialized()
+        assert self._client is not None
+
+        field_name = self._normalize_metadata_field_name(metadata_field)
+
+        # filter by search_term if provided
+        query = {"match_all": {}}
+        if search_term:
+            # Use match_phrase for exact phrase matching to avoid tokenization issues
+            query = {"match_phrase": {"content": search_term}}
+
+        # Build aggregations
+        # Terms aggregation for paginated unique values
+        # Note: Terms aggregation doesn't support 'from' parameter directly,
+        # so we fetch from_ + size results and slice them
+        # Cardinality aggregation for total count
+        terms_size = from_ + size if from_ > 0 else size
+        body = {
+            "query": query,
+            "aggs": {
+                "unique_values": {
+                    "terms": {
+                        "field": field_name,
+                        "size": terms_size,
+                    }
+                },
+                "total_count": {
+                    "cardinality": {
+                        "field": field_name,
+                    }
+                },
+            },
+            "size": 0,  # we only need aggregations, not documents
+        }
+
+        result = self._client.search(index=self._index, body=body)
+        aggregations = result.get("aggregations", {})
+
+        # Extract unique values from terms aggregation buckets
+        unique_values_buckets = aggregations.get("unique_values", {}).get("buckets", [])
+        # Apply pagination by slicing the results
+        paginated_buckets = unique_values_buckets[from_ : from_ + size]
+        unique_values = [str(bucket["key"]) for bucket in paginated_buckets]
+
+        # Extract total count from cardinality aggregation
+        total_count = int(aggregations.get("total_count", {}).get("value", 0))
+
+        return unique_values, total_count
 
     def query_sql(self, query: str):
         pass
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index fc8226fbb6..90d318dc71 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -708,4 +708,65 @@ def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
         # Test with float values
         min_max_score = document_store.get_field_min_max("meta.rating")
         assert min_max_score["min"] == pytest.approx(5.2)
-        assert min_max_score["max"] == pytest.approx(20.3)
\ No newline at end of file
+        assert min_max_score["max"] == pytest.approx(20.3)
+
+    def test_get_field_unique_values(self, document_store: OpenSearchDocumentStore):
+        # Test with string values
+        docs = [
+            Document(content="Python programming", meta={"category": "A", "language": "Python"}),
+            Document(content="Java programming", meta={"category": "B", "language": "Java"}),
+            Document(content="Python scripting", meta={"category": "A", "language": "Python"}),
+            Document(content="JavaScript development", meta={"category": "C", "language": "JavaScript"}),
+            Document(content="Python data science", meta={"category": "A", "language": "Python"}),
+            Document(content="Java backend", meta={"category": "B", "language": "Java"}),
+        ]
+        document_store.write_documents(docs)
+
+        # Test getting all unique values without search term
+        unique_values, total_count = document_store.get_field_unique_values("meta.category", None, 0, 10)
+        assert set(unique_values) == {"A", "B", "C"}
+        assert total_count == 3
+
+        # Test with "meta." prefix
+        unique_languages, lang_count = document_store.get_field_unique_values("meta.language", None, 0, 10)
+        assert set(unique_languages) == {"Python", "Java", "JavaScript"}
+        assert lang_count == 3
+
+        # Test pagination - first page
+        unique_values_page1, total_count = document_store.get_field_unique_values("meta.category", None, 0, 2)
+        assert len(unique_values_page1) == 2
+        assert total_count == 3
+        assert all(val in ["A", "B", "C"] for val in unique_values_page1)
+
+        # Test pagination - second page
+        unique_values_page2, total_count = document_store.get_field_unique_values("meta.category", None, 2, 2)
+        assert len(unique_values_page2) == 1
+        assert total_count == 3
+        assert unique_values_page2[0] in ["A", "B", "C"]
+
+        # Test with search term - filter by content matching "Python"
+        unique_values_filtered, total_count = document_store.get_field_unique_values("meta.category", "Python", 0, 10)
+        assert set(unique_values_filtered) == {"A"}  # Only category A has documents with "Python" in content
+        assert total_count == 1
+
+        # Test with search term - filter by content matching "Java"
+        unique_values_java, total_count = document_store.get_field_unique_values("meta.category", "Java", 0, 10)
+        assert set(unique_values_java) == {"B"}  # Only category B has documents with "Java" in content
+        assert total_count == 1
+
+        # Test with integer values
+        int_docs = [
+            Document(content="Doc 1", meta={"priority": 1}),
+            Document(content="Doc 2", meta={"priority": 2}),
+            Document(content="Doc 3", meta={"priority": 1}),
+            Document(content="Doc 4", meta={"priority": 3}),
+        ]
+        document_store.write_documents(int_docs)
+        unique_priorities, priority_count = document_store.get_field_unique_values("meta.priority", None, 0, 10)
+        assert set(unique_priorities) == {"1", "2", "3"}
+        assert priority_count == 3
+
+        # Test with search term on integer field
+        unique_priorities_filtered, priority_count = document_store.get_field_unique_values("meta.priority", "Doc 1", 0, 10)
+        assert set(unique_priorities_filtered) == {"1"}
+        assert priority_count == 1
\ No newline at end of file

From e6932b0d73445d350f134ef8988e35c458a8fcbd Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 18:51:59 +0100
Subject: [PATCH 09/58] adding get_field_unique_values async

---
 .../opensearch/document_store.py              | 61 ++++++++++++++++++
 .../tests/test_document_store_async.py        | 62 +++++++++++++++++++
 2 files changed, 123 insertions(+)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index fbf6419d82..cd902f3ac4 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1352,5 +1352,66 @@ def get_field_unique_values(
 
         return unique_values, total_count
 
+    async def get_field_unique_values_async(
+        self, metadata_field: str, search_term: str | None, from_: int, size: int
+    ) -> tuple[list[str], int]:
+        """
+        Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content.
+
+        :param metadata_field: The metadata field to get unique values for.
+        :param search_term: Optional search term to filter documents by matching in the content field.
+        :param from_: The starting index for pagination.
+        :param size: The number of unique values to return.
+        :returns: A tuple containing (list of unique values, total count of unique values).
+        """
+        await self._ensure_initialized_async()
+        assert self._async_client is not None
+
+        field_name = self._normalize_metadata_field_name(metadata_field)
+
+        # filter by search_term if provided
+        query = {"match_all": {}}
+        if search_term:
+            # Use match_phrase for exact phrase matching to avoid tokenization issues
+            query = {"match_phrase": {"content": search_term}}
+
+        # Build aggregations
+        # Terms aggregation for paginated unique values
+        # Note: Terms aggregation doesn't support 'from' parameter directly,
+        # so we fetch from_ + size results and slice them
+        # Cardinality aggregation for total count
+        terms_size = from_ + size if from_ > 0 else size
+        body = {
+            "query": query,
+            "aggs": {
+                "unique_values": {
+                    "terms": {
+                        "field": field_name,
+                        "size": terms_size,
+                    }
+                },
+                "total_count": {
+                    "cardinality": {
+                        "field": field_name,
+                    }
+                },
+            },
+            "size": 0,  # we only need aggregations, not documents
+        }
+
+        result = await self._async_client.search(index=self._index, body=body)
+        aggregations = result.get("aggregations", {})
+
+        # Extract unique values from terms aggregation buckets
+        unique_values_buckets = aggregations.get("unique_values", {}).get("buckets", [])
+        # Apply pagination by slicing the results
+        paginated_buckets = unique_values_buckets[from_ : from_ + size]
+        unique_values = [str(bucket["key"]) for bucket in paginated_buckets]
+
+        # Extract total count from cardinality aggregation
+        total_count = int(aggregations.get("total_count", {}).get("value", 0))
+
+        return unique_values, total_count
+
     def query_sql(self, query: str):
         pass
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index fe4f8ec726..28bdebe64a 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -499,4 +499,66 @@ async def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
         assert min_max_score["min"] == pytest.approx(5.2)
         assert min_max_score["max"] == pytest.approx(20.3)
 
+    @pytest.mark.asyncio
+    async def test_get_field_unique_values(self, document_store: OpenSearchDocumentStore):
+        # Test with string values
+        docs = [
+            Document(content="Python programming", meta={"category": "A", "language": "Python"}),
+            Document(content="Java programming", meta={"category": "B", "language": "Java"}),
+            Document(content="Python scripting", meta={"category": "A", "language": "Python"}),
+            Document(content="JavaScript development", meta={"category": "C", "language": "JavaScript"}),
+            Document(content="Python data science", meta={"category": "A", "language": "Python"}),
+            Document(content="Java backend", meta={"category": "B", "language": "Java"}),
+        ]
+        await document_store.write_documents_async(docs)
+
+        # Test getting all unique values without search term
+        unique_values, total_count = await document_store.get_field_unique_values_async("meta.category", None, 0, 10)
+        assert set(unique_values) == {"A", "B", "C"}
+        assert total_count == 3
+
+        # Test with "meta." prefix
+        unique_languages, lang_count = await document_store.get_field_unique_values_async("meta.language", None, 0, 10)
+        assert set(unique_languages) == {"Python", "Java", "JavaScript"}
+        assert lang_count == 3
+
+        # Test pagination - first page
+        unique_values_page1, total_count = await document_store.get_field_unique_values_async("meta.category", None, 0, 2)
+        assert len(unique_values_page1) == 2
+        assert total_count == 3
+        assert all(val in ["A", "B", "C"] for val in unique_values_page1)
+
+        # Test pagination - second page
+        unique_values_page2, total_count = await document_store.get_field_unique_values_async("meta.category", None, 2, 2)
+        assert len(unique_values_page2) == 1
+        assert total_count == 3
+        assert unique_values_page2[0] in ["A", "B", "C"]
+
+        # Test with search term - filter by content matching "Python"
+        unique_values_filtered, total_count = await document_store.get_field_unique_values_async("meta.category", "Python", 0, 10)
+        assert set(unique_values_filtered) == {"A"}  # Only category A has documents with "Python" in content
+        assert total_count == 1
+
+        # Test with search term - filter by content matching "Java"
+        unique_values_java, total_count = await document_store.get_field_unique_values_async("meta.category", "Java", 0, 10)
+        assert set(unique_values_java) == {"B"}  # Only category B has documents with "Java" in content
+        assert total_count == 1
+
+        # Test with integer values
+        int_docs = [
+            Document(content="Doc 1", meta={"priority": 1}),
+            Document(content="Doc 2", meta={"priority": 2}),
+            Document(content="Doc 3", meta={"priority": 1}),
+            Document(content="Doc 4", meta={"priority": 3}),
+        ]
+        await document_store.write_documents_async(int_docs)
+        unique_priorities, priority_count = await document_store.get_field_unique_values_async("meta.priority", None, 0, 10)
+        assert set(unique_priorities) == {"1", "2", "3"}
+        assert priority_count == 3
+
+        # Test with search term on integer field
+        unique_priorities_filtered, priority_count = await document_store.get_field_unique_values_async("meta.priority", "Doc 1", 0, 10)
+        assert set(unique_priorities_filtered) == {"1"}
+        assert priority_count == 1
+
     
\ No newline at end of file

From 5e7cd906bfc6666c4c167b194c6f97b75ba355fd Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 18:58:35 +0100
Subject: [PATCH 10/58] formmatting

---
 .../opensearch/document_store.py              | 10 +++----
 .../opensearch/tests/test_document_store.py   |  8 +++---
 .../tests/test_document_store_async.py        | 26 +++++++++++++------
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index cd902f3ac4..26eba98540 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -343,10 +343,10 @@ def _deserialize_search_hits(hits: list[dict[str, Any]]) -> list[Document]:
         out = []
         # Fields that are not metadata (should stay at top level)
         non_meta_fields = {"id", "content", "embedding", "blob", "sparse_embedding", "score"}
-        
+
         for hit in hits:
             data = hit["_source"].copy()
-            
+
             # Reconstruct metadata dict from flattened fields
             meta = {}
             fields_to_remove = []
@@ -354,14 +354,14 @@ def _deserialize_search_hits(hits: list[dict[str, Any]]) -> list[Document]:
                 if key not in non_meta_fields:
                     meta[key] = value
                     fields_to_remove.append(key)
-            
+
             # Remove metadata fields from top level and add them to meta
             for key in fields_to_remove:
                 data.pop(key, None)
-            
+
             if meta:
                 data["meta"] = meta
-            
+
             if "highlight" in hit:
                 if "meta" not in data:
                     data["meta"] = {}
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 90d318dc71..517f6e8435 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -704,7 +704,7 @@ def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
         min_max_single = document_store.get_field_min_max("meta.single_value")
         assert min_max_single["min"] == 42
         assert min_max_single["max"] == 42
-                
+
         # Test with float values
         min_max_score = document_store.get_field_min_max("meta.rating")
         assert min_max_score["min"] == pytest.approx(5.2)
@@ -767,6 +767,8 @@ def test_get_field_unique_values(self, document_store: OpenSearchDocumentStore):
         assert priority_count == 3
 
         # Test with search term on integer field
-        unique_priorities_filtered, priority_count = document_store.get_field_unique_values("meta.priority", "Doc 1", 0, 10)
+        unique_priorities_filtered, priority_count = document_store.get_field_unique_values(
+            "meta.priority", "Doc 1", 0, 10
+        )
         assert set(unique_priorities_filtered) == {"1"}
-        assert priority_count == 1
\ No newline at end of file
+        assert priority_count == 1
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 28bdebe64a..b8d9242dec 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -523,24 +523,32 @@ async def test_get_field_unique_values(self, document_store: OpenSearchDocumentS
         assert lang_count == 3
 
         # Test pagination - first page
-        unique_values_page1, total_count = await document_store.get_field_unique_values_async("meta.category", None, 0, 2)
+        unique_values_page1, total_count = await document_store.get_field_unique_values_async(
+            "meta.category", None, 0, 2
+        )
         assert len(unique_values_page1) == 2
         assert total_count == 3
         assert all(val in ["A", "B", "C"] for val in unique_values_page1)
 
         # Test pagination - second page
-        unique_values_page2, total_count = await document_store.get_field_unique_values_async("meta.category", None, 2, 2)
+        unique_values_page2, total_count = await document_store.get_field_unique_values_async(
+            "meta.category", None, 2, 2
+        )
         assert len(unique_values_page2) == 1
         assert total_count == 3
         assert unique_values_page2[0] in ["A", "B", "C"]
 
         # Test with search term - filter by content matching "Python"
-        unique_values_filtered, total_count = await document_store.get_field_unique_values_async("meta.category", "Python", 0, 10)
+        unique_values_filtered, total_count = await document_store.get_field_unique_values_async(
+            "meta.category", "Python", 0, 10
+        )
         assert set(unique_values_filtered) == {"A"}  # Only category A has documents with "Python" in content
         assert total_count == 1
 
         # Test with search term - filter by content matching "Java"
-        unique_values_java, total_count = await document_store.get_field_unique_values_async("meta.category", "Java", 0, 10)
+        unique_values_java, total_count = await document_store.get_field_unique_values_async(
+            "meta.category", "Java", 0, 10
+        )
         assert set(unique_values_java) == {"B"}  # Only category B has documents with "Java" in content
         assert total_count == 1
 
@@ -552,13 +560,15 @@ async def test_get_field_unique_values(self, document_store: OpenSearchDocumentS
             Document(content="Doc 4", meta={"priority": 3}),
         ]
         await document_store.write_documents_async(int_docs)
-        unique_priorities, priority_count = await document_store.get_field_unique_values_async("meta.priority", None, 0, 10)
+        unique_priorities, priority_count = await document_store.get_field_unique_values_async(
+            "meta.priority", None, 0, 10
+        )
         assert set(unique_priorities) == {"1", "2", "3"}
         assert priority_count == 3
 
         # Test with search term on integer field
-        unique_priorities_filtered, priority_count = await document_store.get_field_unique_values_async("meta.priority", "Doc 1", 0, 10)
+        unique_priorities_filtered, priority_count = await document_store.get_field_unique_values_async(
+            "meta.priority", "Doc 1", 0, 10
+        )
         assert set(unique_priorities_filtered) == {"1"}
         assert priority_count == 1
-
-    
\ No newline at end of file

From 0c0f31cb47b659bfbda58d651038599a790e77ff Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Mon, 5 Jan 2026 23:48:22 +0100
Subject: [PATCH 11/58] updating tests

---
 .../opensearch/document_store.py              | 171 +++++++++++++++++-
 .../opensearch/tests/test_document_store.py   |  62 +++++++
 .../tests/test_document_store_async.py        |  64 +++++++
 3 files changed, 292 insertions(+), 5 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 26eba98540..57a9085125 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -2,16 +2,19 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+import json
 from collections.abc import Mapping
 from math import exp
-from typing import Any, Optional, Union
+from typing import Any, Literal, Optional, Union
 
+import requests
 from haystack import default_from_dict, default_to_dict, logging
 from haystack.dataclasses import Document
 from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
 from haystack.document_stores.types import DuplicatePolicy
 from haystack.utils.auth import Secret
 from opensearchpy import AsyncHttpConnection, AsyncOpenSearch, OpenSearch
+from opensearchpy.exceptions import SerializationError
 from opensearchpy.helpers import async_bulk, bulk
 
 from haystack_integrations.document_stores.opensearch.auth import AsyncAWSAuth, AWSAuth
@@ -21,6 +24,8 @@
 
 Hosts = Union[str, list[Union[str, Mapping[str, Union[str, int]]]]]
 
+ResponseFormat = Literal["json", "jdbc", "csv", "raw"]
+
 # document scores are essentially unbounded and will be scaled to values between 0 and 1 if scale_score is set to
 # True. Scaling uses the expit function (inverse of the logit function) after applying a scaling factor
 # (e.g., BM25_SCALING_FACTOR for the bm25_retrieval method).
@@ -1309,7 +1314,7 @@ def get_field_unique_values(
         field_name = self._normalize_metadata_field_name(metadata_field)
 
         # filter by search_term if provided
-        query = {"match_all": {}}
+        query: dict[str, Any] = {"match_all": {}}
         if search_term:
             # Use match_phrase for exact phrase matching to avoid tokenization issues
             query = {"match_phrase": {"content": search_term}}
@@ -1370,7 +1375,7 @@ async def get_field_unique_values_async(
         field_name = self._normalize_metadata_field_name(metadata_field)
 
         # filter by search_term if provided
-        query = {"match_all": {}}
+        query: dict[str, Any] = {"match_all": {}}
         if search_term:
             # Use match_phrase for exact phrase matching to avoid tokenization issues
             query = {"match_phrase": {"content": search_term}}
@@ -1413,5 +1418,161 @@ async def get_field_unique_values_async(
 
         return unique_values, total_count
 
-    def query_sql(self, query: str):
-        pass
+    def query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any:
+        """
+        Execute a raw OpenSearch SQL query against the index.
+
+        :param query: The OpenSearch SQL query to execute
+        :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
+        :returns: The query results in the specified format. For JSON format, returns a list of dictionaries
+            (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
+        """
+        self._ensure_initialized()
+        assert self._client is not None
+
+        # For non-JSON formats, use requests directly to avoid deserialization issues
+        if response_format != "json":
+            try:
+                # Get connection info from the transport
+                connection = self._client.transport.get_connection()
+                base_url = connection.host
+                url = f"{base_url}/_plugins/_sql?format={response_format}"
+                
+                headers = {"Content-Type": "application/json"}
+                auth = None
+                if self._http_auth:
+                    if isinstance(self._http_auth, tuple):
+                        auth = self._http_auth
+                    elif isinstance(self._http_auth, AWSAuth):
+                        # For AWS auth, we need to use the opensearchpy client
+                        # Fall through to the try/except below
+                        pass
+                
+                verify = self._verify_certs if self._verify_certs is not None else True
+                timeout = self._timeout if self._timeout is not None else 30.0
+                response = requests.post(
+                    url,
+                    json={"query": query},
+                    headers=headers,
+                    auth=auth,
+                    verify=verify,
+                    timeout=timeout,
+                )
+                response.raise_for_status()
+                return response.text
+            except Exception as e:
+                # If requests fails (e.g., AWS auth), fall back to opensearchpy
+                # which will raise SerializationError that we can handle
+                pass
+        
+        try:
+            body = {"query": query}
+            params = {"format": response_format}
+            
+            response_data = self._client.transport.perform_request(
+                method="POST",
+                url="/_plugins/_sql",
+                params=params,
+                body=body,
+            )
+
+            if response_format == "json":
+                # extract only the query results
+                if isinstance(response_data, dict) and "hits" in response_data:
+                    hits = response_data.get("hits", {}).get("hits", [])
+                    # extract _source from each hit, which contains the actual document data
+                    return [hit.get("_source", {}) for hit in hits]
+                return response_data
+            else:
+                return response_data if isinstance(response_data, str) else str(response_data)
+        except SerializationError:
+            # If we get here, it means requests failed above (likely AWS auth)
+            # and opensearchpy can't deserialize the response
+            # Re-raise as DocumentStoreError with a helpful message
+            msg = f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. This format may not be supported with the current authentication method."
+            raise DocumentStoreError(msg) from None
+        except Exception as e:
+            msg = f"Failed to execute SQL query in OpenSearch: {e!s}"
+            raise DocumentStoreError(msg) from e
+
+    async def query_sql_async(self, query: str, response_format: ResponseFormat = "json") -> Any:
+        """
+        Asynchronously execute a raw OpenSearch SQL query against the index.
+
+        :param query: The OpenSearch SQL query to execute
+        :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
+        :returns: The query results in the specified format. For JSON format, returns a list of dictionaries
+            (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
+        """
+        await self._ensure_initialized_async()
+        assert self._async_client is not None
+
+        # For non-JSON formats, use httpx directly to avoid deserialization issues
+        if response_format != "json":
+            try:
+                import httpx
+                
+                # Get connection info from the transport
+                connection = self._async_client.transport.get_connection()
+                base_url = connection.host
+                url = f"{base_url}/_plugins/_sql?format={response_format}"
+                
+                headers = {"Content-Type": "application/json"}
+                auth = None
+                if self._http_auth:
+                    if isinstance(self._http_auth, tuple):
+                        auth = self._http_auth
+                    elif isinstance(self._http_auth, AWSAuth):
+                        # For AWS auth, we need to use the opensearchpy client
+                        # Fall through to the try/except below
+                        pass
+                
+                verify = self._verify_certs if self._verify_certs is not None else True
+                timeout = httpx.Timeout(self._timeout if self._timeout else 30.0)
+                
+                async with httpx.AsyncClient(verify=verify, timeout=timeout) as client:
+                    response = await client.post(
+                        url,
+                        json={"query": query},
+                        headers=headers,
+                        auth=auth,
+                    )
+                    response.raise_for_status()
+                    return response.text
+            except ImportError:
+                # httpx not available, fall through to opensearchpy
+                pass
+            except Exception as e:
+                # If httpx fails (e.g., AWS auth), fall back to opensearchpy
+                # which will raise SerializationError that we can handle
+                pass
+
+        try:
+            body = {"query": query}
+            params = {"format": response_format}
+            
+            response_data = await self._async_client.transport.perform_request(
+                method="POST",
+                url="/_plugins/_sql",
+                params=params,
+                body=body,
+            )
+
+            if response_format == "json":
+                # extract only the query results
+                if isinstance(response_data, dict) and "hits" in response_data:
+                    hits = response_data.get("hits", {}).get("hits", [])
+                    # extract _source from each hit, which contains the actual document data
+                    return [hit.get("_source", {}) for hit in hits]
+                return response_data
+            else:
+                return response_data if isinstance(response_data, str) else str(response_data)
+        except SerializationError:
+            # If we get here, it means httpx failed above (likely AWS auth or not installed)
+            # and opensearchpy can't deserialize the response
+            # Re-raise as DocumentStoreError with a helpful message
+            msg = f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. This format may not be supported with the current authentication method. Consider installing httpx for better support."
+            raise DocumentStoreError(msg) from None
+        except Exception as e:
+            msg = f"Failed to execute SQL query in OpenSearch: {e!s}"
+            raise DocumentStoreError(msg) from e
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 517f6e8435..e6cd334290 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -772,3 +772,65 @@ def test_get_field_unique_values(self, document_store: OpenSearchDocumentStore):
         )
         assert set(unique_priorities_filtered) == {"1"}
         assert priority_count == 1
+
+    def test_query_sql(self, document_store: OpenSearchDocumentStore):
+        """
+        Test executing SQL queries against the OpenSearch index.
+        """
+        docs = [
+            Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
+            Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
+            Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
+            Document(content="JavaScript development", meta={"category": "C", "status": "active", "priority": 1}),
+        ]
+        document_store.write_documents(docs)
+        time.sleep(1)  # Wait for documents to be indexed
+
+        # Test SQL query with JSON format (default)
+        sql_query = (
+            f"SELECT content, category, status, priority FROM {document_store._index} "  # noqa: S608
+            f"WHERE category = 'A' ORDER BY priority"
+        )
+        result = document_store.query_sql(sql_query, response_format="json")
+
+        # New format returns a list of dictionaries (the _source from each hit)
+        assert len(result) == 2  # Two documents with category A
+        assert isinstance(result, list)
+        assert all(isinstance(row, dict) for row in result)
+
+        # Verify data contains expected values
+        categories = [row.get("category") for row in result]
+        assert all(cat == "A" for cat in categories)
+
+        # Verify all expected fields are present
+        for row in result:
+            assert "content" in row
+            assert "category" in row
+            assert "status" in row
+            assert "priority" in row
+
+        # Test SQL query with CSV format
+        result_csv = document_store.query_sql(sql_query, response_format="csv")
+        assert isinstance(result_csv, str)
+        assert "content" in result_csv
+        assert "category" in result_csv
+
+        # Test SQL query with JDBC format
+        result_jdbc = document_store.query_sql(sql_query, response_format="jdbc")
+        # JDBC format can be dict or str depending on OpenSearch version
+        assert result_jdbc is not None
+
+        # Test SQL query with RAW format
+        result_raw = document_store.query_sql(sql_query, response_format="raw")
+        assert isinstance(result_raw, str)
+
+        # Test COUNT query
+        count_query = f"SELECT COUNT(*) as total FROM {document_store._index}"  # noqa: S608
+        count_result = document_store.query_sql(count_query, response_format="json")
+        # COUNT query may return different format, check it's a valid response
+        assert count_result is not None
+
+        # Test error handling for invalid SQL query
+        invalid_query = "SELECT * FROM non_existent_index"
+        with pytest.raises(DocumentStoreError, match="Failed to execute SQL query"):
+            document_store.query_sql(invalid_query)
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index b8d9242dec..df01566f54 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -6,6 +6,7 @@
 
 import pytest
 from haystack.dataclasses import Document
+from haystack.document_stores.errors import DocumentStoreError
 from haystack.document_stores.types import DuplicatePolicy
 
 from haystack_integrations.document_stores.opensearch.document_store import OpenSearchDocumentStore
@@ -572,3 +573,66 @@ async def test_get_field_unique_values(self, document_store: OpenSearchDocumentS
         )
         assert set(unique_priorities_filtered) == {"1"}
         assert priority_count == 1
+
+    @pytest.mark.asyncio
+    async def test_query_sql(self, document_store: OpenSearchDocumentStore):
+        """
+        Test executing SQL queries against the OpenSearch index.
+        """
+        docs = [
+            Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
+            Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
+            Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
+            Document(content="JavaScript development", meta={"category": "C", "status": "active", "priority": 1}),
+        ]
+        await document_store.write_documents_async(docs)
+        time.sleep(1)  # Wait for documents to be indexed
+
+        # Test SQL query with JSON format (default)
+        sql_query = (
+            f"SELECT content, category, status, priority FROM {document_store._index} "  # noqa: S608
+            f"WHERE category = 'A' ORDER BY priority"
+        )
+        result = await document_store.query_sql_async(sql_query, response_format="json")
+
+        # New format returns a list of dictionaries (the _source from each hit)
+        assert len(result) == 2  # Two documents with category A
+        assert isinstance(result, list)
+        assert all(isinstance(row, dict) for row in result)
+
+        # Verify data contains expected values
+        categories = [row.get("category") for row in result]
+        assert all(cat == "A" for cat in categories)
+
+        # Verify all expected fields are present
+        for row in result:
+            assert "content" in row
+            assert "category" in row
+            assert "status" in row
+            assert "priority" in row
+
+        # Test SQL query with CSV format
+        result_csv = await document_store.query_sql_async(sql_query, response_format="csv")
+        assert isinstance(result_csv, str)
+        assert "content" in result_csv
+        assert "category" in result_csv
+
+        # Test SQL query with JDBC format
+        result_jdbc = await document_store.query_sql_async(sql_query, response_format="jdbc")
+        # JDBC format can be dict or str depending on OpenSearch version
+        assert result_jdbc is not None
+
+        # Test SQL query with RAW format
+        result_raw = await document_store.query_sql_async(sql_query, response_format="raw")
+        assert isinstance(result_raw, str)
+
+        # Test COUNT query
+        count_query = f"SELECT COUNT(*) as total FROM {document_store._index}"  # noqa: S608
+        count_result = await document_store.query_sql_async(count_query, response_format="json")
+        # COUNT query may return different format, check it's a valid response
+        assert count_result is not None
+
+        # Test error handling for invalid SQL query
+        invalid_query = "SELECT * FROM non_existent_index"
+        with pytest.raises(DocumentStoreError, match="Failed to execute SQL query"):
+            await document_store.query_sql_async(invalid_query)

From 2010261eb38e4baa75af9e94d966d51e45254654 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 6 Jan 2026 00:03:24 +0100
Subject: [PATCH 12/58] formmatting

---
 .../opensearch/document_store.py              | 108 +++++++++---------
 1 file changed, 55 insertions(+), 53 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 57a9085125..c172047bbf 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -2,11 +2,11 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import json
 from collections.abc import Mapping
 from math import exp
 from typing import Any, Literal, Optional, Union
 
+import httpx
 import requests
 from haystack import default_from_dict, default_to_dict, logging
 from haystack.dataclasses import Document
@@ -1418,6 +1418,39 @@ async def get_field_unique_values_async(
 
         return unique_values, total_count
 
+    def _prepare_sql_http_request_params(
+        self, base_url: str, response_format: ResponseFormat
+    ) -> tuple[str, dict[str, str], Any]:
+        """
+        Prepares HTTP request parameters for SQL query execution.
+        """
+        url = f"{base_url}/_plugins/_sql?format={response_format}"
+        headers = {"Content-Type": "application/json"}
+        auth = None
+        if self._http_auth:
+            if isinstance(self._http_auth, tuple):
+                auth = self._http_auth
+            elif isinstance(self._http_auth, AWSAuth):
+                # For AWS auth, we need to use the opensearchpy client
+                # Fall through to the try/except below
+                pass
+        return url, headers, auth
+
+    @staticmethod
+    def _process_sql_response(response_data: Any, response_format: ResponseFormat) -> Any:
+        """
+        Processes the SQL query response data.
+        """
+        if response_format == "json":
+            # extract only the query results
+            if isinstance(response_data, dict) and "hits" in response_data:
+                hits = response_data.get("hits", {}).get("hits", [])
+                # extract _source from each hit, which contains the actual document data
+                return [hit.get("_source", {}) for hit in hits]
+            return response_data
+        else:
+            return response_data if isinstance(response_data, str) else str(response_data)
+
     def query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any:
         """
         Execute a raw OpenSearch SQL query against the index.
@@ -1436,18 +1469,8 @@ def query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any
                 # Get connection info from the transport
                 connection = self._client.transport.get_connection()
                 base_url = connection.host
-                url = f"{base_url}/_plugins/_sql?format={response_format}"
-                
-                headers = {"Content-Type": "application/json"}
-                auth = None
-                if self._http_auth:
-                    if isinstance(self._http_auth, tuple):
-                        auth = self._http_auth
-                    elif isinstance(self._http_auth, AWSAuth):
-                        # For AWS auth, we need to use the opensearchpy client
-                        # Fall through to the try/except below
-                        pass
-                
+                url, headers, auth = self._prepare_sql_http_request_params(base_url, response_format)
+
                 verify = self._verify_certs if self._verify_certs is not None else True
                 timeout = self._timeout if self._timeout is not None else 30.0
                 response = requests.post(
@@ -1463,12 +1486,12 @@ def query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any
             except Exception as e:
                 # If requests fails (e.g., AWS auth), fall back to opensearchpy
                 # which will raise SerializationError that we can handle
-                pass
-        
+                logger.error(f"Failed to execute SQL query in OpenSearch: {e!s}")
+
         try:
             body = {"query": query}
             params = {"format": response_format}
-            
+
             response_data = self._client.transport.perform_request(
                 method="POST",
                 url="/_plugins/_sql",
@@ -1476,20 +1499,15 @@ def query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any
                 body=body,
             )
 
-            if response_format == "json":
-                # extract only the query results
-                if isinstance(response_data, dict) and "hits" in response_data:
-                    hits = response_data.get("hits", {}).get("hits", [])
-                    # extract _source from each hit, which contains the actual document data
-                    return [hit.get("_source", {}) for hit in hits]
-                return response_data
-            else:
-                return response_data if isinstance(response_data, str) else str(response_data)
+            return self._process_sql_response(response_data, response_format)
         except SerializationError:
             # If we get here, it means requests failed above (likely AWS auth)
             # and opensearchpy can't deserialize the response
             # Re-raise as DocumentStoreError with a helpful message
-            msg = f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. This format may not be supported with the current authentication method."
+            msg = (
+                f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. "
+                f"This format may not be supported with the current authentication method."
+            )
             raise DocumentStoreError(msg) from None
         except Exception as e:
             msg = f"Failed to execute SQL query in OpenSearch: {e!s}"
@@ -1510,26 +1528,14 @@ async def query_sql_async(self, query: str, response_format: ResponseFormat = "j
         # For non-JSON formats, use httpx directly to avoid deserialization issues
         if response_format != "json":
             try:
-                import httpx
-                
                 # Get connection info from the transport
                 connection = self._async_client.transport.get_connection()
                 base_url = connection.host
-                url = f"{base_url}/_plugins/_sql?format={response_format}"
-                
-                headers = {"Content-Type": "application/json"}
-                auth = None
-                if self._http_auth:
-                    if isinstance(self._http_auth, tuple):
-                        auth = self._http_auth
-                    elif isinstance(self._http_auth, AWSAuth):
-                        # For AWS auth, we need to use the opensearchpy client
-                        # Fall through to the try/except below
-                        pass
-                
+                url, headers, auth = self._prepare_sql_http_request_params(base_url, response_format)
+
                 verify = self._verify_certs if self._verify_certs is not None else True
                 timeout = httpx.Timeout(self._timeout if self._timeout else 30.0)
-                
+
                 async with httpx.AsyncClient(verify=verify, timeout=timeout) as client:
                     response = await client.post(
                         url,
@@ -1545,12 +1551,12 @@ async def query_sql_async(self, query: str, response_format: ResponseFormat = "j
             except Exception as e:
                 # If httpx fails (e.g., AWS auth), fall back to opensearchpy
                 # which will raise SerializationError that we can handle
-                pass
+                logger.error(f"Failed to execute SQL query in OpenSearch: {e!s}")
 
         try:
             body = {"query": query}
             params = {"format": response_format}
-            
+
             response_data = await self._async_client.transport.perform_request(
                 method="POST",
                 url="/_plugins/_sql",
@@ -1558,20 +1564,16 @@ async def query_sql_async(self, query: str, response_format: ResponseFormat = "j
                 body=body,
             )
 
-            if response_format == "json":
-                # extract only the query results
-                if isinstance(response_data, dict) and "hits" in response_data:
-                    hits = response_data.get("hits", {}).get("hits", [])
-                    # extract _source from each hit, which contains the actual document data
-                    return [hit.get("_source", {}) for hit in hits]
-                return response_data
-            else:
-                return response_data if isinstance(response_data, str) else str(response_data)
+            return self._process_sql_response(response_data, response_format)
         except SerializationError:
             # If we get here, it means httpx failed above (likely AWS auth or not installed)
             # and opensearchpy can't deserialize the response
             # Re-raise as DocumentStoreError with a helpful message
-            msg = f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. This format may not be supported with the current authentication method. Consider installing httpx for better support."
+            msg = (
+                f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. "
+                f"This format may not be supported with the current authentication method. "
+                f"Consider installing httpx for better support."
+            )
             raise DocumentStoreError(msg) from None
         except Exception as e:
             msg = f"Failed to execute SQL query in OpenSearch: {e!s}"

From 873a4dc50061c9cd48e0612920ef7aa978e9a30a Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 6 Jan 2026 10:55:34 +0100
Subject: [PATCH 13/58] cleaning up

---
 .../opensearch/document_store.py              | 32 ++++++-------------
 1 file changed, 10 insertions(+), 22 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index c172047bbf..e80193f2fc 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1093,9 +1093,6 @@ async def count_documents_by_filter_async(self, filters: dict) -> int:
     def _build_cardinality_aggregations(index_mapping: dict[str, Any]) -> dict[str, Any]:
         """
         Builds cardinality aggregations for all metadata fields in the index mapping.
-
-        :param index_mapping: The properties mapping from the index.
-        :returns: Dictionary of aggregations keyed by field name.
         """
         special_fields = {"content", "embedding", "id", "score", "blob", "sparse_embedding"}
         aggs = {}
@@ -1108,10 +1105,6 @@ def _build_cardinality_aggregations(index_mapping: dict[str, Any]) -> dict[str,
     def _build_distinct_values_query_body(filters: dict, aggs: dict[str, Any]) -> dict[str, Any]:
         """
         Builds the query body for distinct values counting with filters and aggregations.
-
-        :param filters: The filters to apply, or empty dict for no filters.
-        :param aggs: The aggregations to include in the query.
-        :returns: The query body dictionary.
         """
         if filters:
             normalized_filters = normalize_filters(filters)
@@ -1134,10 +1127,6 @@ def _extract_distinct_counts_from_aggregations(
     ) -> dict[str, int]:
         """
         Extracts distinct value counts from search result aggregations.
-
-        :param aggregations: The aggregations from the search result.
-        :param index_mapping: The properties mapping from the index.
-        :returns: Dictionary mapping field names to their distinct value counts.
         """
         special_fields = {"content", "embedding", "id", "score", "blob", "sparse_embedding"}
         distinct_counts = {}
@@ -1459,6 +1448,9 @@ def query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any
         :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
         :returns: The query results in the specified format. For JSON format, returns a list of dictionaries
             (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
+
+        NOTE: For non-JSON formats (csv, jdbc, raw), use requests to make a raw HTTP request and get the text response
+              This avoids deserialization issues with the opensearchpy client.
         """
         self._ensure_initialized()
         assert self._client is not None
@@ -1501,9 +1493,8 @@ def query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any
 
             return self._process_sql_response(response_data, response_format)
         except SerializationError:
-            # If we get here, it means requests failed above (likely AWS auth)
-            # and opensearchpy can't deserialize the response
-            # Re-raise as DocumentStoreError with a helpful message
+            # If we get here, it means requests failed above (likely AWS auth) and opensearchpy can't deserialize the
+            # response. Re-raise as DocumentStoreError with a helpful message
             msg = (
                 f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. "
                 f"This format may not be supported with the current authentication method."
@@ -1521,6 +1512,9 @@ async def query_sql_async(self, query: str, response_format: ResponseFormat = "j
         :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
         :returns: The query results in the specified format. For JSON format, returns a list of dictionaries
             (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
+
+        NOTE: For non-JSON formats (csv, jdbc, raw), use httpx AsyncClient to make a raw HTTP request and get the text
+              response. This avoids deserialization issues with the opensearchpy client.
         """
         await self._ensure_initialized_async()
         assert self._async_client is not None
@@ -1545,12 +1539,7 @@ async def query_sql_async(self, query: str, response_format: ResponseFormat = "j
                     )
                     response.raise_for_status()
                     return response.text
-            except ImportError:
-                # httpx not available, fall through to opensearchpy
-                pass
             except Exception as e:
-                # If httpx fails (e.g., AWS auth), fall back to opensearchpy
-                # which will raise SerializationError that we can handle
                 logger.error(f"Failed to execute SQL query in OpenSearch: {e!s}")
 
         try:
@@ -1566,9 +1555,8 @@ async def query_sql_async(self, query: str, response_format: ResponseFormat = "j
 
             return self._process_sql_response(response_data, response_format)
         except SerializationError:
-            # If we get here, it means httpx failed above (likely AWS auth or not installed)
-            # and opensearchpy can't deserialize the response
-            # Re-raise as DocumentStoreError with a helpful message
+            # If we get here, it means httpx failed above (likely AWS auth or not installed) and opensearchpy can't
+            # deserialize the response. Re-raise as DocumentStoreError with a helpful message
             msg = (
                 f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. "
                 f"This format may not be supported with the current authentication method. "

From 1f3347bcb0a489a33aaa45b43dabb1e9017786b0 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 6 Jan 2026 10:56:47 +0100
Subject: [PATCH 14/58] adding httpx as a dependency

---
 integrations/opensearch/pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/integrations/opensearch/pyproject.toml b/integrations/opensearch/pyproject.toml
index 466863c445..e3f93af0b7 100644
--- a/integrations/opensearch/pyproject.toml
+++ b/integrations/opensearch/pyproject.toml
@@ -25,7 +25,8 @@ classifiers = [
 ]
 dependencies = [
   "haystack-ai>=2.14.0",
-  "opensearch-py[async]>=2.4.0,<3"]
+  "opensearch-py[async]>=2.4.0,<3"],
+  "httpx>=0.28.1"
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/opensearch#readme"

From 3622168b28ba40dbbe526b35b8491b8eb5e34f57 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 6 Jan 2026 11:03:45 +0100
Subject: [PATCH 15/58] fixing pyproject.toml

---
 integrations/opensearch/pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/integrations/opensearch/pyproject.toml b/integrations/opensearch/pyproject.toml
index e3f93af0b7..db8bcfd19e 100644
--- a/integrations/opensearch/pyproject.toml
+++ b/integrations/opensearch/pyproject.toml
@@ -25,8 +25,9 @@ classifiers = [
 ]
 dependencies = [
   "haystack-ai>=2.14.0",
-  "opensearch-py[async]>=2.4.0,<3"],
+  "opensearch-py[async]>=2.4.0,<3",
   "httpx>=0.28.1"
+]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/opensearch#readme"

From d96cc4cf85c57b02e8b7535c62c528258a63e683 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Wed, 7 Jan 2026 16:32:39 +0100
Subject: [PATCH 16/58] updating tests: making use of the new refresh feature

---
 integrations/opensearch/tests/test_document_store.py       | 3 +--
 integrations/opensearch/tests/test_document_store_async.py | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 3c25636351..4fa6bca5cb 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -781,8 +781,7 @@ def test_query_sql(self, document_store: OpenSearchDocumentStore):
             Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
             Document(content="JavaScript development", meta={"category": "C", "status": "active", "priority": 1}),
         ]
-        document_store.write_documents(docs)
-        time.sleep(1)  # Wait for documents to be indexed
+        document_store.write_documents(docs, refresh=True)
 
         # Test SQL query with JSON format (default)
         sql_query = (
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index be17577d34..fd37a6dcd2 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -581,8 +581,7 @@ async def test_query_sql(self, document_store: OpenSearchDocumentStore):
             Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
             Document(content="JavaScript development", meta={"category": "C", "status": "active", "priority": 1}),
         ]
-        await document_store.write_documents_async(docs)
-        time.sleep(1)  # Wait for documents to be indexed
+        await document_store.write_documents_async(docs, refresh=True)
 
         # Test SQL query with JSON format (default)
         sql_query = (

From 69863d0a539aba8f3d8a634b3e58c187d554fc23 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Thu, 8 Jan 2026 10:55:08 +0100
Subject: [PATCH 17/58] dealing with special fields

---
 .../document_stores/opensearch/document_store.py      | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 3ca8e9d813..82a4c6331a 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -24,6 +24,7 @@
 
 logger = logging.getLogger(__name__)
 
+SPECIAL_FIELDS = {"content", "embedding", "id", "score", "sparse_embedding", "blob"}
 
 Hosts = Union[str, list[Union[str, Mapping[str, Union[str, int]]]]]
 
@@ -351,8 +352,6 @@ async def count_documents_async(self) -> int:
     @staticmethod
     def _deserialize_search_hits(hits: list[dict[str, Any]]) -> list[Document]:
         out = []
-        # Fields that are not metadata (should stay at top level)
-        non_meta_fields = {"id", "content", "embedding", "blob", "sparse_embedding", "score"}
 
         for hit in hits:
             data = hit["_source"].copy()
@@ -361,7 +360,7 @@ def _deserialize_search_hits(hits: list[dict[str, Any]]) -> list[Document]:
             meta = {}
             fields_to_remove = []
             for key, value in data.items():
-                if key not in non_meta_fields:
+                if key not in SPECIAL_FIELDS:
                     meta[key] = value
                     fields_to_remove.append(key)
 
@@ -1203,10 +1202,9 @@ def _build_cardinality_aggregations(index_mapping: dict[str, Any]) -> dict[str,
         """
         Builds cardinality aggregations for all metadata fields in the index mapping.
         """
-        special_fields = {"content", "embedding", "id", "score", "blob", "sparse_embedding"}
         aggs = {}
         for field_name in index_mapping.keys():
-            if field_name not in special_fields:
+            if field_name not in SPECIAL_FIELDS:
                 aggs[f"{field_name}_cardinality"] = {"cardinality": {"field": field_name}}
         return aggs
 
@@ -1237,10 +1235,9 @@ def _extract_distinct_counts_from_aggregations(
         """
         Extracts distinct value counts from search result aggregations.
         """
-        special_fields = {"content", "embedding", "id", "score", "blob", "sparse_embedding"}
         distinct_counts = {}
         for field_name in index_mapping.keys():
-            if field_name not in special_fields:
+            if field_name not in SPECIAL_FIELDS:
                 agg_key = f"{field_name}_cardinality"
                 if agg_key in aggregations:
                     distinct_counts[field_name] = aggregations[agg_key]["value"]

From 3a3df4c025ac9e4d7a5057857eff5cf4d7417124 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Thu, 8 Jan 2026 10:56:04 +0100
Subject: [PATCH 18/58] docstring update

---
 .../document_stores/opensearch/document_store.py                | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 82a4c6331a..876c6cfa95 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1201,6 +1201,8 @@ async def count_documents_by_filter_async(self, filters: dict) -> int:
     def _build_cardinality_aggregations(index_mapping: dict[str, Any]) -> dict[str, Any]:
         """
         Builds cardinality aggregations for all metadata fields in the index mapping.
+
+        See: https://docs.opensearch.org/latest/aggregations/metric/cardinality/
         """
         aggs = {}
         for field_name in index_mapping.keys():

From 6b2081b9ec27387e49ebf36ce08994e338bb2073 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Thu, 8 Jan 2026 15:00:25 +0100
Subject: [PATCH 19/58] adding roundtrip tests to assert documents metadata is
 correctly written and retrieved

---
 .../opensearch/document_store.py              | 23 +-------
 .../opensearch/tests/test_bm25_retriever.py   | 54 +++++++++++++++++++
 .../opensearch/tests/test_document_store.py   |  2 -
 .../tests/test_embedding_retriever.py         | 48 +++++++++++++++++
 4 files changed, 104 insertions(+), 23 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 876c6cfa95..45cf600b18 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -352,29 +352,10 @@ async def count_documents_async(self) -> int:
     @staticmethod
     def _deserialize_search_hits(hits: list[dict[str, Any]]) -> list[Document]:
         out = []
-
         for hit in hits:
-            data = hit["_source"].copy()
-
-            # Reconstruct metadata dict from flattened fields
-            meta = {}
-            fields_to_remove = []
-            for key, value in data.items():
-                if key not in SPECIAL_FIELDS:
-                    meta[key] = value
-                    fields_to_remove.append(key)
-
-            # Remove metadata fields from top level and add them to meta
-            for key in fields_to_remove:
-                data.pop(key, None)
-
-            if meta:
-                data["meta"] = meta
-
+            data = hit["_source"]
             if "highlight" in hit:
-                if "meta" not in data:
-                    data["meta"] = {}
-                data["meta"]["highlighted"] = hit["highlight"]
+                data["metadata"]["highlighted"] = hit["highlight"]
             data["score"] = hit["_score"]
             out.append(Document.from_dict(data))
 
diff --git a/integrations/opensearch/tests/test_bm25_retriever.py b/integrations/opensearch/tests/test_bm25_retriever.py
index 03235c36b3..d6bb6350e4 100644
--- a/integrations/opensearch/tests/test_bm25_retriever.py
+++ b/integrations/opensearch/tests/test_bm25_retriever.py
@@ -424,6 +424,60 @@ def test_bm25_retriever_runtime_document_store_switching(
     assert len(results_1_again["documents"]) == 1
 
 
+@pytest.mark.integration
+def test_bm25_retriever_document_structure_with_metadata(document_store):
+    """
+    Test document structure with complex metadata (nested values, lists, etc.)
+    """
+    docs = [
+        Document(
+            content="Python is versatile",
+            meta={
+                "category": "programming",
+                "tags": ["python", "general-purpose"],
+                "rating": 4.5,
+                "active": True,
+                "author": {"name": "John", "role": "developer"},
+            },
+            id="python_doc",
+        ),
+        Document(
+            content="JavaScript is dynamic",
+            meta={
+                "category": "programming",
+                "tags": ["javascript", "web"],
+                "rating": 4.8,
+                "active": True,
+            },
+            id="js_doc",
+        ),
+    ]
+    document_store.write_documents(docs, refresh=True)
+    retriever = OpenSearchBM25Retriever(document_store=document_store)
+
+    results = retriever.run(query="programming", top_k=2)
+    assert len(results["documents"]) == 2
+
+    for doc in results["documents"]:
+        # Verify structure
+        assert hasattr(doc, "content")
+        assert hasattr(doc, "meta")
+        assert isinstance(doc.meta, dict)
+
+        # Verify complex metadata is preserved
+        assert "category" in doc.meta
+        assert "tags" in doc.meta
+        assert isinstance(doc.meta["tags"], list)
+        assert "rating" in doc.meta
+
+        # Verify document can be serialized/deserialized
+        doc_dict = doc.to_dict()
+        doc_from_dict = Document.from_dict(doc_dict)
+        assert doc_from_dict.content == doc.content
+        assert doc_from_dict.meta == doc.meta
+        assert doc_from_dict.id == doc.id
+
+
 @pytest.mark.asyncio
 @pytest.mark.integration
 async def test_bm25_retriever_async_runtime_document_store_switching(
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 91500edff9..6fb562608c 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -615,7 +615,6 @@ def test_update_by_filter(self, document_store: OpenSearchDocumentStore):
         assert len(draft_docs) == 1
         assert draft_docs[0].meta["category"] == "B"
 
-
     def test_count_documents_by_filter(self, document_store: OpenSearchDocumentStore):
         docs = [
             Document(content="Doc 1", meta={"category": "A", "status": "active"}),
@@ -914,4 +913,3 @@ def test_delete_with_routing(self, document_store: OpenSearchDocumentStore):
         document_store.delete_documents(["1", "2"], routing=routing_map)
 
         assert document_store.count_documents() == 1
-
diff --git a/integrations/opensearch/tests/test_embedding_retriever.py b/integrations/opensearch/tests/test_embedding_retriever.py
index a01b7dc008..fe2f5865c6 100644
--- a/integrations/opensearch/tests/test_embedding_retriever.py
+++ b/integrations/opensearch/tests/test_embedding_retriever.py
@@ -404,3 +404,51 @@ async def test_embedding_retriever_runtime_document_store_switching_async(
     python_query_embedding = [0.4, 0.4, 0.4] + [0.0] * 765
     results_1_again = await retriever.run_async(query_embedding=python_query_embedding)
     assert "Python" in results_1_again["documents"][0].content
+
+
+@pytest.mark.integration
+def test_embedding_retriever_document_structure_with_metadata(document_store, test_documents_with_embeddings_1):
+    """
+    Test that documents returned by embedding retriever have correct structure:
+    - Metadata fields are in doc.meta (not at top level)
+    - Special fields (content, embedding, id, score) are at top level
+    - All original metadata is preserved
+    """
+    document_store.write_documents(test_documents_with_embeddings_1, refresh=True)
+    retriever = OpenSearchEmbeddingRetriever(document_store=document_store)
+
+    # Query embedding to match functional programming languages
+    query_embedding = [0.2, 0.3, 0.4] + [0.0] * 765
+    results = retriever.run(query_embedding=query_embedding, top_k=5)
+
+    assert len(results["documents"]) > 0
+
+    for doc in results["documents"]:
+        # Verify special fields are at top level
+        assert hasattr(doc, "content")
+        assert isinstance(doc.content, str)
+        assert hasattr(doc, "id")
+        assert isinstance(doc.id, str)
+        assert hasattr(doc, "score")
+        assert doc.score is not None
+        assert hasattr(doc, "embedding")
+        assert isinstance(doc.embedding, list)
+        assert len(doc.embedding) == 768
+
+        # Verify metadata fields are in meta dict (not at top level)
+        assert hasattr(doc, "meta")
+        assert isinstance(doc.meta, dict)
+
+        # Verify original metadata is preserved
+        assert "likes" in doc.meta
+        assert "language_type" in doc.meta
+        assert isinstance(doc.meta["likes"], int)
+        assert isinstance(doc.meta["language_type"], str)
+
+        # Verify document can be serialized/deserialized
+        doc_dict = doc.to_dict()
+        doc_from_dict = Document.from_dict(doc_dict)
+        assert doc_from_dict.content == doc.content
+        assert doc_from_dict.meta == doc.meta
+        assert doc_from_dict.id == doc.id
+        assert doc_from_dict.embedding == doc.embedding

From 923081e86677f40a6552f17636a18d7b41f3fed1 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 10:20:45 +0100
Subject: [PATCH 20/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 45cf600b18..ff1e2118c0 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1148,7 +1148,7 @@ def _render_custom_query(self, custom_query: Any, substitutions: dict[str, Any])
 
         return custom_query
 
-    def count_documents_by_filter(self, filters: dict) -> int:
+    def count_documents_by_filter(self, filters: dict[str, Any]) -> int:
         """
         Returns the number of documents that match the provided filters.
 

From abd4b7f3c5b1a5d0a3e24e9bece44e64c4bfa505 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 10:20:55 +0100
Subject: [PATCH 21/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index ff1e2118c0..1ab9b42e83 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1163,7 +1163,7 @@ def count_documents_by_filter(self, filters: dict[str, Any]) -> int:
         body = {"query": {"bool": {"filter": normalized_filters}}}
         return self._client.count(index=self._index, body=body)["count"]
 
-    async def count_documents_by_filter_async(self, filters: dict) -> int:
+    async def count_documents_by_filter_async(self, filters: dict[str, Any]) -> int:
         """
         Asynchronously returns the number of documents that match the provided filters.
 

From dfcb8ec87fa296620075f963f575339e89efab21 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 10:31:43 +0100
Subject: [PATCH 22/58] updating function names

---
 .../document_stores/opensearch/document_store.py            | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 1ab9b42e83..e1532b10be 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1281,7 +1281,7 @@ async def count_distinct_values_by_filter_async(self, filters: dict) -> dict[str
         # extract cardinality values from aggregations
         return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
 
-    def get_fields_info(self) -> dict[str, dict]:
+    def get_meta_fields_info(self) -> dict[str, dict]:
         """
         Returns the information about the fields in the index.
 
@@ -1340,7 +1340,7 @@ def _extract_min_max_from_stats(stats: dict[str, Any]) -> dict[str, Any]:
         max_value = stats.get("max")
         return {"min": min_value, "max": max_value}
 
-    def get_field_min_max(self, metadata_field: str) -> dict[str, Any]:
+    def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
         """
         Returns the minimum and maximum values for the given metadata field.
 
@@ -1374,7 +1374,7 @@ async def get_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
 
         return self._extract_min_max_from_stats(stats)
 
-    def get_field_unique_values(
+    def get_metadata_field_unique_values(
         self, metadata_field: str, search_term: str | None, from_: int, size: int
     ) -> tuple[list[str], int]:
         """

From a926d03f2c99c006b657a32841a57bca9fa25af6 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 11:36:58 +0100
Subject: [PATCH 23/58] updating function names + tests

---
 .../opensearch/document_store.py              | 12 ++---
 .../opensearch/tests/test_document_store.py   | 48 +++++++++++--------
 .../tests/test_document_store_async.py        | 45 +++++++++--------
 3 files changed, 58 insertions(+), 47 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index e1532b10be..872953c1b5 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1226,7 +1226,7 @@ def _extract_distinct_counts_from_aggregations(
                     distinct_counts[field_name] = aggregations[agg_key]["value"]
         return distinct_counts
 
-    def count_distinct_values_by_filter(self, filters: dict) -> dict[str, int]:
+    def count_distinct_metadata_values_by_filter(self, filters: dict) -> dict[str, int]:
         """
         Returns the number of unique values for each meta field of the documents that match the provided filters.
 
@@ -1253,7 +1253,7 @@ def count_distinct_values_by_filter(self, filters: dict) -> dict[str, int]:
         # extract cardinality values from aggregations
         return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
 
-    async def count_distinct_values_by_filter_async(self, filters: dict) -> dict[str, int]:
+    async def count_distinct_metadata_values_by_filter_async(self, filters: dict) -> dict[str, int]:
         """
         Asynchronously returns the number of unique values for each meta field of the documents that match the
         provided filters.
@@ -1281,7 +1281,7 @@ async def count_distinct_values_by_filter_async(self, filters: dict) -> dict[str
         # extract cardinality values from aggregations
         return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
 
-    def get_meta_fields_info(self) -> dict[str, dict]:
+    def get_metadata_fields_info(self) -> dict[str, dict]:
         """
         Returns the information about the fields in the index.
 
@@ -1294,7 +1294,7 @@ def get_meta_fields_info(self) -> dict[str, dict]:
         index_mapping = mapping[self._index]["mappings"]["properties"]
         return index_mapping
 
-    async def get_fields_info_async(self) -> dict[str, dict]:
+    async def get_metadata_fields_info_async(self) -> dict[str, dict]:
         """
         Asynchronously returns the information about the fields in the index.
 
@@ -1357,7 +1357,7 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
 
         return self._extract_min_max_from_stats(stats)
 
-    async def get_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
+    async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
         """
         Asynchronously returns the minimum and maximum values for the given metadata field.
 
@@ -1435,7 +1435,7 @@ def get_metadata_field_unique_values(
 
         return unique_values, total_count
 
-    async def get_field_unique_values_async(
+    async def get_metadata_field_unique_values_async(
         self, metadata_field: str, search_term: str | None, from_: int, size: int
     ) -> tuple[list[str], int]:
         """
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 3e224c4982..70a9696318 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -619,7 +619,7 @@ def test_count_documents_by_filter(self, document_store: OpenSearchDocumentStore
         )
         assert count_a_active == 2
 
-    def test_count_distinct_values_by_filter(self, document_store: OpenSearchDocumentStore):
+    def test_count_distinct_metadata_values_by_filter(self, document_store: OpenSearchDocumentStore):
         docs = [
             Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
             Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
@@ -631,13 +631,13 @@ def test_count_distinct_values_by_filter(self, document_store: OpenSearchDocumen
         assert document_store.count_documents() == 5
 
         # Count distinct values for all documents
-        distinct_counts = document_store.count_distinct_values_by_filter(filters={})
+        distinct_counts = document_store.count_distinct_metadata_values_by_filter(filters={})
         assert distinct_counts["category"] == 3  # A, B, C
         assert distinct_counts["status"] == 2  # active, inactive
         assert distinct_counts["priority"] == 3  # 1, 2, 3
 
         # Count distinct values for documents with category="A"
-        distinct_counts_a = document_store.count_distinct_values_by_filter(
+        distinct_counts_a = document_store.count_distinct_metadata_values_by_filter(
             filters={"field": "meta.category", "operator": "==", "value": "A"}
         )
         assert distinct_counts_a["category"] == 1  # Only A
@@ -645,7 +645,7 @@ def test_count_distinct_values_by_filter(self, document_store: OpenSearchDocumen
         assert distinct_counts_a["priority"] == 2  # 1, 3
 
         # Count distinct values for documents with status="active"
-        distinct_counts_active = document_store.count_distinct_values_by_filter(
+        distinct_counts_active = document_store.count_distinct_metadata_values_by_filter(
             filters={"field": "meta.status", "operator": "==", "value": "active"}
         )
         assert distinct_counts_active["category"] == 3  # A, B, C
@@ -653,7 +653,7 @@ def test_count_distinct_values_by_filter(self, document_store: OpenSearchDocumen
         assert distinct_counts_active["priority"] == 3  # 1, 2, 3
 
         # Count distinct values with complex filter (category="A" AND status="active")
-        distinct_counts_a_active = document_store.count_distinct_values_by_filter(
+        distinct_counts_a_active = document_store.count_distinct_metadata_values_by_filter(
             filters={
                 "operator": "AND",
                 "conditions": [
@@ -666,14 +666,14 @@ def test_count_distinct_values_by_filter(self, document_store: OpenSearchDocumen
         assert distinct_counts_a_active["status"] == 1  # Only active
         assert distinct_counts_a_active["priority"] == 2  # 1, 3
 
-    def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
+    def test_get_metadata_fields_info(self, document_store: OpenSearchDocumentStore):
         docs = [
             Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
             Document(content="Doc 2", meta={"category": "B", "status": "inactive"}),
         ]
         document_store.write_documents(docs)
 
-        fields_info = document_store.get_fields_info()
+        fields_info = document_store.get_metadata_fields_info()
 
         # Verify that fields_info contains expected fields
         assert "content" in fields_info
@@ -690,7 +690,7 @@ def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
         assert fields_info["status"]["type"] == "keyword"
         assert fields_info["priority"]["type"] == "long"
 
-    def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
+    def test_get_metadata_field_min_max(self, document_store: OpenSearchDocumentStore):
         # Test with integer values
         docs = [
             Document(content="Doc 1", meta={"priority": 1, "age": 10}),
@@ -705,28 +705,28 @@ def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
         document_store.write_documents(docs)
 
         # Test with "meta." prefix for integer field
-        min_max_priority = document_store.get_field_min_max("meta.priority")
+        min_max_priority = document_store.get_metadata_field_min_max("meta.priority")
         assert min_max_priority["min"] == 1
         assert min_max_priority["max"] == 10
 
         # Test with "meta." prefix for another integer field
-        min_max_rating = document_store.get_field_min_max("meta.age")
+        min_max_rating = document_store.get_metadata_field_min_max("meta.age")
         assert min_max_rating["min"] == 5
         assert min_max_rating["max"] == 20
 
         # Test with single value
         single_doc = [Document(content="Doc 5", meta={"single_value": 42})]
         document_store.write_documents(single_doc)
-        min_max_single = document_store.get_field_min_max("meta.single_value")
+        min_max_single = document_store.get_metadata_field_min_max("meta.single_value")
         assert min_max_single["min"] == 42
         assert min_max_single["max"] == 42
 
         # Test with float values
-        min_max_score = document_store.get_field_min_max("meta.rating")
+        min_max_score = document_store.get_metadata_field_min_max("meta.rating")
         assert min_max_score["min"] == pytest.approx(5.2)
         assert min_max_score["max"] == pytest.approx(20.3)
 
-    def test_get_field_unique_values(self, document_store: OpenSearchDocumentStore):
+    def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocumentStore):
         # Test with string values
         docs = [
             Document(content="Python programming", meta={"category": "A", "language": "Python"}),
@@ -739,34 +739,38 @@ def test_get_field_unique_values(self, document_store: OpenSearchDocumentStore):
         document_store.write_documents(docs)
 
         # Test getting all unique values without search term
-        unique_values, total_count = document_store.get_field_unique_values("meta.category", None, 0, 10)
+        unique_values, total_count = document_store.get_metadata_field_unique_values("meta.category", None, 0, 10)
         assert set(unique_values) == {"A", "B", "C"}
         assert total_count == 3
 
         # Test with "meta." prefix
-        unique_languages, lang_count = document_store.get_field_unique_values("meta.language", None, 0, 10)
+        unique_languages, lang_count = document_store.get_metadata_field_unique_values("meta.language", None, 0, 10)
         assert set(unique_languages) == {"Python", "Java", "JavaScript"}
         assert lang_count == 3
 
         # Test pagination - first page
-        unique_values_page1, total_count = document_store.get_field_unique_values("meta.category", None, 0, 2)
+        unique_values_page1, total_count = document_store.get_metadata_field_unique_values("meta.category", None, 0, 2)
         assert len(unique_values_page1) == 2
         assert total_count == 3
         assert all(val in ["A", "B", "C"] for val in unique_values_page1)
 
         # Test pagination - second page
-        unique_values_page2, total_count = document_store.get_field_unique_values("meta.category", None, 2, 2)
+        unique_values_page2, total_count = document_store.get_metadata_field_unique_values("meta.category", None, 2, 2)
         assert len(unique_values_page2) == 1
         assert total_count == 3
         assert unique_values_page2[0] in ["A", "B", "C"]
 
         # Test with search term - filter by content matching "Python"
-        unique_values_filtered, total_count = document_store.get_field_unique_values("meta.category", "Python", 0, 10)
+        unique_values_filtered, total_count = document_store.get_metadata_field_unique_values(
+            "meta.category", "Python", 0, 10
+        )
         assert set(unique_values_filtered) == {"A"}  # Only category A has documents with "Python" in content
         assert total_count == 1
 
         # Test with search term - filter by content matching "Java"
-        unique_values_java, total_count = document_store.get_field_unique_values("meta.category", "Java", 0, 10)
+        unique_values_java, total_count = document_store.get_metadata_field_unique_values(
+            "meta.category", "Java", 0, 10
+        )
         assert set(unique_values_java) == {"B"}  # Only category B has documents with "Java" in content
         assert total_count == 1
 
@@ -778,12 +782,14 @@ def test_get_field_unique_values(self, document_store: OpenSearchDocumentStore):
             Document(content="Doc 4", meta={"priority": 3}),
         ]
         document_store.write_documents(int_docs)
-        unique_priorities, priority_count = document_store.get_field_unique_values("meta.priority", None, 0, 10)
+        unique_priorities, priority_count = document_store.get_metadata_field_unique_values(
+            "meta.priority", None, 0, 10
+        )
         assert set(unique_priorities) == {"1", "2", "3"}
         assert priority_count == 3
 
         # Test with search term on integer field
-        unique_priorities_filtered, priority_count = document_store.get_field_unique_values(
+        unique_priorities_filtered, priority_count = document_store.get_metadata_field_unique_values(
             "meta.priority", "Doc 1", 0, 10
         )
         assert set(unique_priorities_filtered) == {"1"}
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 9c0fbe5b54..05df97fe75 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -282,13 +282,13 @@ async def test_count_distinct_values_by_filter(self, document_store: OpenSearchD
         assert await document_store.count_documents_async() == 5
 
         # count distinct values for all documents
-        distinct_counts = await document_store.count_distinct_values_by_filter_async(filters={})
+        distinct_counts = await document_store.count_distinct_metadata_values_by_filter_async(filters={})
         assert distinct_counts["category"] == 3  # A, B, C
         assert distinct_counts["status"] == 2  # active, inactive
         assert distinct_counts["priority"] == 3  # 1, 2, 3
 
         # count distinct values for documents with category="A"
-        distinct_counts_a = await document_store.count_distinct_values_by_filter_async(
+        distinct_counts_a = await document_store.count_distinct_metadata_values_by_filter_async(
             filters={"field": "meta.category", "operator": "==", "value": "A"}
         )
         assert distinct_counts_a["category"] == 1  # Only A
@@ -296,7 +296,7 @@ async def test_count_distinct_values_by_filter(self, document_store: OpenSearchD
         assert distinct_counts_a["priority"] == 2  # 1, 3
 
         # count distinct values for documents with status="active"
-        distinct_counts_active = await document_store.count_distinct_values_by_filter_async(
+        distinct_counts_active = await document_store.count_distinct_metadata_values_by_filter_async(
             filters={"field": "meta.status", "operator": "==", "value": "active"}
         )
         assert distinct_counts_active["category"] == 3  # A, B, C
@@ -304,7 +304,7 @@ async def test_count_distinct_values_by_filter(self, document_store: OpenSearchD
         assert distinct_counts_active["priority"] == 3  # 1, 2, 3
 
         # count distinct values with complex filter (category="A" AND status="active")
-        distinct_counts_a_active = await document_store.count_distinct_values_by_filter_async(
+        distinct_counts_a_active = await document_store.count_distinct_metadata_values_by_filter_async(
             filters={
                 "operator": "AND",
                 "conditions": [
@@ -474,14 +474,14 @@ async def test_update_by_filter_async(self, document_store: OpenSearchDocumentSt
         assert draft_docs[0].meta["category"] == "B"
 
     @pytest.mark.asyncio
-    async def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
+    async def test_get_metadata_fields_info_async(self, document_store: OpenSearchDocumentStore):
         filterable_docs = [
             Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
             Document(content="Doc 2", meta={"category": "B", "status": "inactive"}),
         ]
         await document_store.write_documents_async(filterable_docs)
 
-        fields_info = await document_store.get_fields_info_async()
+        fields_info = await document_store.get_metadata_fields_info_async()
 
         # Verify that fields_info contains expected fields
         assert "content" in fields_info
@@ -493,13 +493,14 @@ async def test_get_fields_info(self, document_store: OpenSearchDocumentStore):
         # Verify field types
         assert fields_info["content"]["type"] == "text"
         assert fields_info["embedding"]["type"] == "knn_vector"
+
         # Metadata fields should be keyword type (from dynamic templates)
         assert fields_info["category"]["type"] == "keyword"
         assert fields_info["status"]["type"] == "keyword"
         assert fields_info["priority"]["type"] == "long"
 
     @pytest.mark.asyncio
-    async def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
+    async def test_get_metadata_field_min_max_async(self, document_store: OpenSearchDocumentStore):
         # Test with integer values
         docs = [
             Document(content="Doc 1", meta={"priority": 1, "age": 10}),
@@ -514,29 +515,29 @@ async def test_get_field_min_max(self, document_store: OpenSearchDocumentStore):
         await document_store.write_documents_async(docs)
 
         # Test with "meta." prefix for integer field
-        min_max_priority = await document_store.get_field_min_max_async("meta.priority")
+        min_max_priority = await document_store.get_metadata_field_min_max_async("meta.priority")
         assert min_max_priority["min"] == 1
         assert min_max_priority["max"] == 10
 
         # Test with "meta." prefix for another integer field
-        min_max_rating = await document_store.get_field_min_max_async("meta.age")
+        min_max_rating = await document_store.get_metadata_field_min_max_async("meta.age")
         assert min_max_rating["min"] == 5
         assert min_max_rating["max"] == 20
 
         # Test with single value
         single_doc = [Document(content="Doc 5", meta={"single_value": 42})]
         await document_store.write_documents_async(single_doc)
-        min_max_single = await document_store.get_field_min_max_async("meta.single_value")
+        min_max_single = await document_store.get_metadata_field_min_max_async("meta.single_value")
         assert min_max_single["min"] == 42
         assert min_max_single["max"] == 42
 
         # Test with float values
-        min_max_score = await document_store.get_field_min_max_async("meta.rating")
+        min_max_score = await document_store.get_metadata_field_min_max_async("meta.rating")
         assert min_max_score["min"] == pytest.approx(5.2)
         assert min_max_score["max"] == pytest.approx(20.3)
 
     @pytest.mark.asyncio
-    async def test_get_field_unique_values(self, document_store: OpenSearchDocumentStore):
+    async def test_get_metadata_field_unique_values_async(self, document_store: OpenSearchDocumentStore):
         # Test with string values
         docs = [
             Document(content="Python programming", meta={"category": "A", "language": "Python"}),
@@ -549,17 +550,21 @@ async def test_get_field_unique_values(self, document_store: OpenSearchDocumentS
         await document_store.write_documents_async(docs)
 
         # Test getting all unique values without search term
-        unique_values, total_count = await document_store.get_field_unique_values_async("meta.category", None, 0, 10)
+        unique_values, total_count = await document_store.get_metadata_field_unique_values_async(
+            "meta.category", None, 0, 10
+        )
         assert set(unique_values) == {"A", "B", "C"}
         assert total_count == 3
 
         # Test with "meta." prefix
-        unique_languages, lang_count = await document_store.get_field_unique_values_async("meta.language", None, 0, 10)
+        unique_languages, lang_count = await document_store.get_metadata_field_unique_values_async(
+            "meta.language", None, 0, 10
+        )
         assert set(unique_languages) == {"Python", "Java", "JavaScript"}
         assert lang_count == 3
 
         # Test pagination - first page
-        unique_values_page1, total_count = await document_store.get_field_unique_values_async(
+        unique_values_page1, total_count = await document_store.get_metadata_field_unique_values_async(
             "meta.category", None, 0, 2
         )
         assert len(unique_values_page1) == 2
@@ -567,7 +572,7 @@ async def test_get_field_unique_values(self, document_store: OpenSearchDocumentS
         assert all(val in ["A", "B", "C"] for val in unique_values_page1)
 
         # Test pagination - second page
-        unique_values_page2, total_count = await document_store.get_field_unique_values_async(
+        unique_values_page2, total_count = await document_store.get_metadata_field_unique_values_async(
             "meta.category", None, 2, 2
         )
         assert len(unique_values_page2) == 1
@@ -575,14 +580,14 @@ async def test_get_field_unique_values(self, document_store: OpenSearchDocumentS
         assert unique_values_page2[0] in ["A", "B", "C"]
 
         # Test with search term - filter by content matching "Python"
-        unique_values_filtered, total_count = await document_store.get_field_unique_values_async(
+        unique_values_filtered, total_count = await document_store.get_metadata_field_unique_values_async(
             "meta.category", "Python", 0, 10
         )
         assert set(unique_values_filtered) == {"A"}  # Only category A has documents with "Python" in content
         assert total_count == 1
 
         # Test with search term - filter by content matching "Java"
-        unique_values_java, total_count = await document_store.get_field_unique_values_async(
+        unique_values_java, total_count = await document_store.get_metadata_field_unique_values_async(
             "meta.category", "Java", 0, 10
         )
         assert set(unique_values_java) == {"B"}  # Only category B has documents with "Java" in content
@@ -596,14 +601,14 @@ async def test_get_field_unique_values(self, document_store: OpenSearchDocumentS
             Document(content="Doc 4", meta={"priority": 3}),
         ]
         await document_store.write_documents_async(int_docs)
-        unique_priorities, priority_count = await document_store.get_field_unique_values_async(
+        unique_priorities, priority_count = await document_store.get_metadata_field_unique_values_async(
             "meta.priority", None, 0, 10
         )
         assert set(unique_priorities) == {"1", "2", "3"}
         assert priority_count == 3
 
         # Test with search term on integer field
-        unique_priorities_filtered, priority_count = await document_store.get_field_unique_values_async(
+        unique_priorities_filtered, priority_count = await document_store.get_metadata_field_unique_values_async(
             "meta.priority", "Doc 1", 0, 10
         )
         assert set(unique_priorities_filtered) == {"1"}

From 2a65a49a26560bdd4ec9cb43ce89d1bd7b13cb7d Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 11:42:12 +0100
Subject: [PATCH 24/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 872953c1b5..b45850f9ae 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1192,7 +1192,7 @@ def _build_cardinality_aggregations(index_mapping: dict[str, Any]) -> dict[str,
         return aggs
 
     @staticmethod
-    def _build_distinct_values_query_body(filters: dict, aggs: dict[str, Any]) -> dict[str, Any]:
+    def _build_distinct_values_query_body(filters: Optional[dict[str, Any]], aggs: dict[str, Any]) -> dict[str, Any]:
         """
         Builds the query body for distinct values counting with filters and aggregations.
         """

From 60406ac54664726bc8cc45d1578eb5031d4c6694 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 11:53:22 +0100
Subject: [PATCH 25/58] updating function names + tests

---
 .../document_stores/opensearch/document_store.py       |  4 ++--
 integrations/opensearch/tests/test_document_store.py   | 10 +++++-----
 .../opensearch/tests/test_document_store_async.py      | 10 +++++-----
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index b45850f9ae..4c3b344076 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1226,7 +1226,7 @@ def _extract_distinct_counts_from_aggregations(
                     distinct_counts[field_name] = aggregations[agg_key]["value"]
         return distinct_counts
 
-    def count_distinct_metadata_values_by_filter(self, filters: dict) -> dict[str, int]:
+    def count_unique_metadata_by_filter(self, filters: dict) -> dict[str, int]:
         """
         Returns the number of unique values for each meta field of the documents that match the provided filters.
 
@@ -1253,7 +1253,7 @@ def count_distinct_metadata_values_by_filter(self, filters: dict) -> dict[str, i
         # extract cardinality values from aggregations
         return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
 
-    async def count_distinct_metadata_values_by_filter_async(self, filters: dict) -> dict[str, int]:
+    async def count_unique_metadata_by_filter_async(self, filters: dict) -> dict[str, int]:
         """
         Asynchronously returns the number of unique values for each meta field of the documents that match the
         provided filters.
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 70a9696318..5a930de613 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -619,7 +619,7 @@ def test_count_documents_by_filter(self, document_store: OpenSearchDocumentStore
         )
         assert count_a_active == 2
 
-    def test_count_distinct_metadata_values_by_filter(self, document_store: OpenSearchDocumentStore):
+    def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumentStore):
         docs = [
             Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
             Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
@@ -631,13 +631,13 @@ def test_count_distinct_metadata_values_by_filter(self, document_store: OpenSear
         assert document_store.count_documents() == 5
 
         # Count distinct values for all documents
-        distinct_counts = document_store.count_distinct_metadata_values_by_filter(filters={})
+        distinct_counts = document_store.count_unique_metadata_by_filter(filters={})
         assert distinct_counts["category"] == 3  # A, B, C
         assert distinct_counts["status"] == 2  # active, inactive
         assert distinct_counts["priority"] == 3  # 1, 2, 3
 
         # Count distinct values for documents with category="A"
-        distinct_counts_a = document_store.count_distinct_metadata_values_by_filter(
+        distinct_counts_a = document_store.count_unique_metadata_by_filter(
             filters={"field": "meta.category", "operator": "==", "value": "A"}
         )
         assert distinct_counts_a["category"] == 1  # Only A
@@ -645,7 +645,7 @@ def test_count_distinct_metadata_values_by_filter(self, document_store: OpenSear
         assert distinct_counts_a["priority"] == 2  # 1, 3
 
         # Count distinct values for documents with status="active"
-        distinct_counts_active = document_store.count_distinct_metadata_values_by_filter(
+        distinct_counts_active = document_store.count_unique_metadata_by_filter(
             filters={"field": "meta.status", "operator": "==", "value": "active"}
         )
         assert distinct_counts_active["category"] == 3  # A, B, C
@@ -653,7 +653,7 @@ def test_count_distinct_metadata_values_by_filter(self, document_store: OpenSear
         assert distinct_counts_active["priority"] == 3  # 1, 2, 3
 
         # Count distinct values with complex filter (category="A" AND status="active")
-        distinct_counts_a_active = document_store.count_distinct_metadata_values_by_filter(
+        distinct_counts_a_active = document_store.count_unique_metadata_by_filter(
             filters={
                 "operator": "AND",
                 "conditions": [
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 05df97fe75..6b7f562068 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -270,7 +270,7 @@ async def test_count_documents_by_filter(self, document_store: OpenSearchDocumen
         assert count_a_active == 2
 
     @pytest.mark.asyncio
-    async def test_count_distinct_values_by_filter(self, document_store: OpenSearchDocumentStore):
+    async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumentStore):
         filterable_docs = [
             Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
             Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
@@ -282,13 +282,13 @@ async def test_count_distinct_values_by_filter(self, document_store: OpenSearchD
         assert await document_store.count_documents_async() == 5
 
         # count distinct values for all documents
-        distinct_counts = await document_store.count_distinct_metadata_values_by_filter_async(filters={})
+        distinct_counts = await document_store.count_unique_metadata_by_filter_async(filters={})
         assert distinct_counts["category"] == 3  # A, B, C
         assert distinct_counts["status"] == 2  # active, inactive
         assert distinct_counts["priority"] == 3  # 1, 2, 3
 
         # count distinct values for documents with category="A"
-        distinct_counts_a = await document_store.count_distinct_metadata_values_by_filter_async(
+        distinct_counts_a = await document_store.count_unique_metadata_by_filter_async(
             filters={"field": "meta.category", "operator": "==", "value": "A"}
         )
         assert distinct_counts_a["category"] == 1  # Only A
@@ -296,7 +296,7 @@ async def test_count_distinct_values_by_filter(self, document_store: OpenSearchD
         assert distinct_counts_a["priority"] == 2  # 1, 3
 
         # count distinct values for documents with status="active"
-        distinct_counts_active = await document_store.count_distinct_metadata_values_by_filter_async(
+        distinct_counts_active = await document_store.count_unique_metadata_by_filter_async(
             filters={"field": "meta.status", "operator": "==", "value": "active"}
         )
         assert distinct_counts_active["category"] == 3  # A, B, C
@@ -304,7 +304,7 @@ async def test_count_distinct_values_by_filter(self, document_store: OpenSearchD
         assert distinct_counts_active["priority"] == 3  # 1, 2, 3
 
         # count distinct values with complex filter (category="A" AND status="active")
-        distinct_counts_a_active = await document_store.count_distinct_metadata_values_by_filter_async(
+        distinct_counts_a_active = await document_store.count_unique_metadata_by_filter_async(
             filters={
                 "operator": "AND",
                 "conditions": [

From 7780e762fe2b4bd470171a64d2f922605c8770a1 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 13:30:24 +0100
Subject: [PATCH 26/58] adding SQLRetriever + tests

---
 .../retrievers/opensearch/__init__.py         |  8 ++++-
 .../opensearch/document_store.py              | 16 ++++++++--
 .../opensearch/tests/test_document_store.py   | 32 ++++++++-----------
 .../tests/test_document_store_async.py        | 32 ++++++++-----------
 4 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py
index 7641b6a421..5f80dbd69f 100644
--- a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py
+++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py
@@ -5,5 +5,11 @@
 from .bm25_retriever import OpenSearchBM25Retriever
 from .embedding_retriever import OpenSearchEmbeddingRetriever
 from .open_search_hybrid_retriever import OpenSearchHybridRetriever
+from .sql_retriever import OpenSearchSQLRetriever
 
-__all__ = ["OpenSearchBM25Retriever", "OpenSearchEmbeddingRetriever", "OpenSearchHybridRetriever"]
+__all__ = [
+    "OpenSearchBM25Retriever",
+    "OpenSearchEmbeddingRetriever",
+    "OpenSearchHybridRetriever",
+    "OpenSearchSQLRetriever",
+]
diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 4c3b344076..82018225c1 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1529,10 +1529,16 @@ def _process_sql_response(response_data: Any, response_format: ResponseFormat) -
         else:
             return response_data if isinstance(response_data, str) else str(response_data)
 
-    def query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any:
+    def _query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any:
         """
         Execute a raw OpenSearch SQL query against the index.
 
+        This method is not meant to be part of the public interface of
+        `OpenSearchDocumentStore` nor called directly.
+        `OpenSearchSQLRetriever` uses this method directly and is the public interface for it.
+
+        See `OpenSearchSQLRetriever` for more information.
+
         :param query: The OpenSearch SQL query to execute
         :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
         :returns: The query results in the specified format. For JSON format, returns a list of dictionaries
@@ -1593,10 +1599,16 @@ def query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any
             msg = f"Failed to execute SQL query in OpenSearch: {e!s}"
             raise DocumentStoreError(msg) from e
 
-    async def query_sql_async(self, query: str, response_format: ResponseFormat = "json") -> Any:
+    async def _query_sql_async(self, query: str, response_format: ResponseFormat = "json") -> Any:
         """
         Asynchronously execute a raw OpenSearch SQL query against the index.
 
+        This method is not meant to be part of the public interface of
+        `OpenSearchDocumentStore` nor called directly.
+        `OpenSearchSQLRetriever` uses this method directly and is the public interface for it.
+
+        See `OpenSearchSQLRetriever` for more information.
+
         :param query: The OpenSearch SQL query to execute
         :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
         :returns: The query results in the specified format. For JSON format, returns a list of dictionaries
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 5a930de613..665e7e4d38 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -796,9 +796,6 @@ def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocume
         assert priority_count == 1
 
     def test_query_sql(self, document_store: OpenSearchDocumentStore):
-        """
-        Test executing SQL queries against the OpenSearch index.
-        """
         docs = [
             Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
             Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
@@ -807,54 +804,53 @@ def test_query_sql(self, document_store: OpenSearchDocumentStore):
         ]
         document_store.write_documents(docs, refresh=True)
 
-        # Test SQL query with JSON format (default)
+        # SQL query with JSON format (default)
         sql_query = (
             f"SELECT content, category, status, priority FROM {document_store._index} "  # noqa: S608
             f"WHERE category = 'A' ORDER BY priority"
         )
-        result = document_store.query_sql(sql_query, response_format="json")
+        result = document_store._query_sql(sql_query, response_format="json")
 
-        # New format returns a list of dictionaries (the _source from each hit)
+        # format returns a list of dictionaries (the _source from each hit)
         assert len(result) == 2  # Two documents with category A
         assert isinstance(result, list)
         assert all(isinstance(row, dict) for row in result)
 
-        # Verify data contains expected values
         categories = [row.get("category") for row in result]
         assert all(cat == "A" for cat in categories)
 
-        # Verify all expected fields are present
+        # verify all expected fields are present
         for row in result:
             assert "content" in row
             assert "category" in row
             assert "status" in row
             assert "priority" in row
 
-        # Test SQL query with CSV format
-        result_csv = document_store.query_sql(sql_query, response_format="csv")
+        # SQL query with CSV format
+        result_csv = document_store._query_sql(sql_query, response_format="csv")
         assert isinstance(result_csv, str)
         assert "content" in result_csv
         assert "category" in result_csv
 
-        # Test SQL query with JDBC format
-        result_jdbc = document_store.query_sql(sql_query, response_format="jdbc")
+        # SQL query with JDBC format
+        result_jdbc = document_store._query_sql(sql_query, response_format="jdbc")
         # JDBC format can be dict or str depending on OpenSearch version
         assert result_jdbc is not None
 
-        # Test SQL query with RAW format
-        result_raw = document_store.query_sql(sql_query, response_format="raw")
+        # SQL query with RAW format
+        result_raw = document_store._query_sql(sql_query, response_format="raw")
         assert isinstance(result_raw, str)
 
-        # Test COUNT query
+        # COUNT query
         count_query = f"SELECT COUNT(*) as total FROM {document_store._index}"  # noqa: S608
-        count_result = document_store.query_sql(count_query, response_format="json")
+        count_result = document_store._query_sql(count_query, response_format="json")
         # COUNT query may return different format, check it's a valid response
         assert count_result is not None
 
-        # Test error handling for invalid SQL query
+        # error handling for invalid SQL query
         invalid_query = "SELECT * FROM non_existent_index"
         with pytest.raises(DocumentStoreError, match="Failed to execute SQL query"):
-            document_store.query_sql(invalid_query)
+            document_store._query_sql(invalid_query)
 
     @pytest.mark.integration
     def test_write_with_routing(self, document_store: OpenSearchDocumentStore):
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 6b7f562068..05ae4d94e7 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -616,9 +616,6 @@ async def test_get_metadata_field_unique_values_async(self, document_store: Open
 
     @pytest.mark.asyncio
     async def test_query_sql(self, document_store: OpenSearchDocumentStore):
-        """
-        Test executing SQL queries against the OpenSearch index.
-        """
         docs = [
             Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
             Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
@@ -627,51 +624,50 @@ async def test_query_sql(self, document_store: OpenSearchDocumentStore):
         ]
         await document_store.write_documents_async(docs, refresh=True)
 
-        # Test SQL query with JSON format (default)
+        # SQL query with JSON format (default)
         sql_query = (
             f"SELECT content, category, status, priority FROM {document_store._index} "  # noqa: S608
             f"WHERE category = 'A' ORDER BY priority"
         )
-        result = await document_store.query_sql_async(sql_query, response_format="json")
+        result = await document_store._query_sql_async(sql_query, response_format="json")
 
-        # New format returns a list of dictionaries (the _source from each hit)
+        # returns a list of dictionaries (the _source from each hit)
         assert len(result) == 2  # Two documents with category A
         assert isinstance(result, list)
         assert all(isinstance(row, dict) for row in result)
 
-        # Verify data contains expected values
         categories = [row.get("category") for row in result]
         assert all(cat == "A" for cat in categories)
 
-        # Verify all expected fields are present
+        # all expected fields are present
         for row in result:
             assert "content" in row
             assert "category" in row
             assert "status" in row
             assert "priority" in row
 
-        # Test SQL query with CSV format
-        result_csv = await document_store.query_sql_async(sql_query, response_format="csv")
+        # SQL query with CSV format
+        result_csv = await document_store._query_sql_async(sql_query, response_format="csv")
         assert isinstance(result_csv, str)
         assert "content" in result_csv
         assert "category" in result_csv
 
-        # Test SQL query with JDBC format
-        result_jdbc = await document_store.query_sql_async(sql_query, response_format="jdbc")
+        # SQL query with JDBC format
+        result_jdbc = await document_store._query_sql_async(sql_query, response_format="jdbc")
         # JDBC format can be dict or str depending on OpenSearch version
         assert result_jdbc is not None
 
-        # Test SQL query with RAW format
-        result_raw = await document_store.query_sql_async(sql_query, response_format="raw")
+        # SQL query with RAW format
+        result_raw = await document_store._query_sql_async(sql_query, response_format="raw")
         assert isinstance(result_raw, str)
 
-        # Test COUNT query
+        # COUNT query
         count_query = f"SELECT COUNT(*) as total FROM {document_store._index}"  # noqa: S608
-        count_result = await document_store.query_sql_async(count_query, response_format="json")
+        count_result = await document_store._query_sql_async(count_query, response_format="json")
         # COUNT query may return different format, check it's a valid response
         assert count_result is not None
 
-        # Test error handling for invalid SQL query
+        # error handling for invalid SQL query
         invalid_query = "SELECT * FROM non_existent_index"
         with pytest.raises(DocumentStoreError, match="Failed to execute SQL query"):
-            await document_store.query_sql_async(invalid_query)
+            await document_store._query_sql_async(invalid_query)

From 67852d50c735e50d2723f9980c3847be5b14690d Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 13:33:09 +0100
Subject: [PATCH 27/58] adding missing files

---
 .../opensearch/tests/test_sql_retriever.py    | 409 ++++++++++++++++++
 1 file changed, 409 insertions(+)
 create mode 100644 integrations/opensearch/tests/test_sql_retriever.py

diff --git a/integrations/opensearch/tests/test_sql_retriever.py b/integrations/opensearch/tests/test_sql_retriever.py
new file mode 100644
index 0000000000..dba0b57e01
--- /dev/null
+++ b/integrations/opensearch/tests/test_sql_retriever.py
@@ -0,0 +1,409 @@
+# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from unittest.mock import Mock, patch
+
+import pytest
+from haystack.dataclasses import Document
+
+from haystack_integrations.components.retrievers.opensearch import OpenSearchSQLRetriever
+from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
+
+
+def test_init_default():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    retriever = OpenSearchSQLRetriever(document_store=mock_store)
+    assert retriever._document_store == mock_store
+    assert retriever._response_format == "json"
+    assert retriever._raise_on_failure is True
+
+
+def test_init_custom():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    retriever = OpenSearchSQLRetriever(document_store=mock_store, response_format="csv", raise_on_failure=False)
+    assert retriever._response_format == "csv"
+    assert retriever._raise_on_failure is False
+
+
+def test_init_invalid_document_store():
+    with pytest.raises(ValueError, match="document_store must be an instance of OpenSearchDocumentStore"):
+        OpenSearchSQLRetriever(document_store="not a document store")
+
+
+@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
+def test_to_dict(_mock_opensearch_client):
+    document_store = OpenSearchDocumentStore(hosts="some fake host")
+    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="csv")
+    res = retriever.to_dict()
+    assert res["type"] == "haystack_integrations.components.retrievers.opensearch.sql_retriever.OpenSearchSQLRetriever"
+    assert res["init_parameters"]["response_format"] == "csv"
+    assert res["init_parameters"]["raise_on_failure"] is True
+    assert "document_store" in res["init_parameters"]
+
+
+@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
+def test_from_dict(_mock_opensearch_client):
+    document_store = OpenSearchDocumentStore(hosts="some fake host")
+    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="csv")
+    data = retriever.to_dict()
+    retriever_from_dict = OpenSearchSQLRetriever.from_dict(data)
+    assert retriever_from_dict._response_format == "csv"
+    assert retriever_from_dict._raise_on_failure is True
+
+
+def test_run():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    mock_store._query_sql.return_value = [{"content": "Test doc", "category": "A"}]
+    retriever = OpenSearchSQLRetriever(document_store=mock_store)
+    res = retriever.run(query="SELECT content, category FROM my_index WHERE category = 'A'")
+    mock_store._query_sql.assert_called_once_with(
+        query="SELECT content, category FROM my_index WHERE category = 'A'",
+        response_format="json",
+    )
+    assert len(res) == 1
+    assert "result" in res
+    assert res["result"] == [{"content": "Test doc", "category": "A"}]
+
+
+def test_run_with_custom_response_format():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    mock_store._query_sql.return_value = "content,category\nTest doc,A"
+    retriever = OpenSearchSQLRetriever(document_store=mock_store, response_format="csv")
+    res = retriever.run(query="SELECT content, category FROM my_index")
+    mock_store._query_sql.assert_called_once_with(query="SELECT content, category FROM my_index", response_format="csv")
+    assert res["result"] == "content,category\nTest doc,A"
+
+
+def test_run_with_runtime_response_format():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    mock_store._query_sql.return_value = "raw response"
+    retriever = OpenSearchSQLRetriever(document_store=mock_store, response_format="json")
+    res = retriever.run(query="SELECT * FROM my_index", response_format="raw")
+    mock_store._query_sql.assert_called_once_with(query="SELECT * FROM my_index", response_format="raw")
+    assert res["result"] == "raw response"
+
+
+def test_run_with_runtime_document_store():
+    mock_store1 = Mock(spec=OpenSearchDocumentStore)
+    mock_store2 = Mock(spec=OpenSearchDocumentStore)
+    mock_store2._query_sql.return_value = [{"result": "from store 2"}]
+    retriever = OpenSearchSQLRetriever(document_store=mock_store1)
+    res = retriever.run(query="SELECT * FROM my_index", document_store=mock_store2)
+    mock_store1._query_sql.assert_not_called()
+    mock_store2._query_sql.assert_called_once_with(query="SELECT * FROM my_index", response_format="json")
+    assert res["result"] == [{"result": "from store 2"}]
+
+
+def test_run_with_error_raise_on_failure():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    mock_store._query_sql.side_effect = Exception("SQL error")
+    retriever = OpenSearchSQLRetriever(document_store=mock_store, raise_on_failure=True)
+    with pytest.raises(Exception, match="SQL error"):
+        retriever.run(query="SELECT * FROM my_index")
+
+
+def test_run_with_error_no_raise():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    mock_store._query_sql.side_effect = Exception("SQL error")
+    retriever = OpenSearchSQLRetriever(document_store=mock_store, raise_on_failure=False)
+    res = retriever.run(query="SELECT * FROM my_index")
+    assert res["result"] is None
+
+
+@pytest.mark.integration
+def test_sql_retriever_basic_query(document_store: OpenSearchDocumentStore):
+    """Test basic SQL query execution with JSON format"""
+    docs = [
+        Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
+        Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
+        Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
+        Document(content="JavaScript development", meta={"category": "C", "status": "active", "priority": 1}),
+    ]
+    document_store.write_documents(docs, refresh=True)
+
+    retriever = OpenSearchSQLRetriever(document_store=document_store)
+    sql_query = (
+        f"SELECT content, category, status, priority FROM {document_store._index} "  # noqa: S608
+        f"WHERE category = 'A' ORDER BY priority"
+    )
+    result = retriever.run(query=sql_query)
+
+    assert "result" in result
+    assert len(result["result"]) == 2
+    assert isinstance(result["result"], list)
+    assert all(isinstance(row, dict) for row in result["result"])
+
+    categories = [row.get("category") for row in result["result"]]
+    assert all(cat == "A" for cat in categories)
+
+    for row in result["result"]:
+        assert "content" in row
+        assert "category" in row
+        assert "status" in row
+        assert "priority" in row
+
+
+@pytest.mark.integration
+def test_sql_retriever_csv_format(document_store: OpenSearchDocumentStore):
+    """Test SQL query with CSV response format"""
+    docs = [
+        Document(content="Python programming", meta={"category": "A", "status": "active"}),
+        Document(content="Java programming", meta={"category": "B", "status": "active"}),
+    ]
+    document_store.write_documents(docs, refresh=True)
+
+    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="csv")
+    sql_query = f"SELECT content, category FROM {document_store._index}"  # noqa: S608
+    result = retriever.run(query=sql_query)
+
+    assert "result" in result
+    assert isinstance(result["result"], str)
+    assert "content" in result["result"]
+    assert "category" in result["result"]
+
+
+@pytest.mark.integration
+def test_sql_retriever_count_query(document_store: OpenSearchDocumentStore):
+    """Test COUNT query execution"""
+    docs = [
+        Document(content="Doc 1", meta={"category": "A"}),
+        Document(content="Doc 2", meta={"category": "B"}),
+        Document(content="Doc 3", meta={"category": "A"}),
+    ]
+    document_store.write_documents(docs, refresh=True)
+
+    retriever = OpenSearchSQLRetriever(document_store=document_store)
+    count_query = f"SELECT COUNT(*) as total FROM {document_store._index}"  # noqa: S608
+    result = retriever.run(query=count_query)
+
+    assert "result" in result
+    assert result["result"] is not None
+
+
+@pytest.mark.integration
+def test_sql_retriever_with_filters(document_store: OpenSearchDocumentStore):
+    """Test SQL query with WHERE clause filtering"""
+
+    docs = [
+        Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
+        Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
+        Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
+    ]
+    document_store.write_documents(docs, refresh=True)
+
+    retriever = OpenSearchSQLRetriever(document_store=document_store)
+    sql_query = (
+        f"SELECT content, category, status FROM {document_store._index} "  # noqa: S608
+        f"WHERE category = 'A' AND status = 'active'"
+    )
+    result = retriever.run(query=sql_query)
+
+    assert "result" in result
+    assert len(result["result"]) == 1
+    assert result["result"][0]["category"] == "A"
+    assert result["result"][0]["status"] == "active"
+
+
+@pytest.mark.integration
+def test_sql_retriever_runtime_response_format(document_store: OpenSearchDocumentStore):
+    """Test overriding response format at runtime"""
+    docs = [
+        Document(content="Python programming", meta={"category": "A"}),
+        Document(content="Java programming", meta={"category": "B"}),
+    ]
+    document_store.write_documents(docs, refresh=True)
+
+    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="json")
+    sql_query = f"SELECT content, category FROM {document_store._index}"  # noqa: S608
+
+    # Override with CSV format at runtime
+    result = retriever.run(query=sql_query, response_format="csv")
+    assert isinstance(result["result"], str)
+    assert "content" in result["result"]
+
+    # Use default JSON format
+    result_json = retriever.run(query=sql_query)
+    assert isinstance(result_json["result"], list)
+
+
+@pytest.mark.integration
+def test_sql_retriever_runtime_document_store_switching(
+    document_store: OpenSearchDocumentStore, document_store_2: OpenSearchDocumentStore
+):
+    """Test switching document stores at runtime"""
+    docs1 = [
+        Document(content="Python programming", meta={"category": "A"}),
+        Document(content="Java programming", meta={"category": "B"}),
+    ]
+    document_store.write_documents(docs1, refresh=True)
+
+    docs2 = [
+        Document(content="JavaScript development", meta={"category": "C"}),
+        Document(content="TypeScript development", meta={"category": "D"}),
+    ]
+    document_store_2.write_documents(docs2, refresh=True)
+
+    retriever = OpenSearchSQLRetriever(document_store=document_store)
+
+    # Query first store
+    sql_query1 = f"SELECT content, category FROM {document_store._index} WHERE category = 'A'"  # noqa: S608
+    result1 = retriever.run(query=sql_query1)
+    assert len(result1["result"]) == 1
+    assert "Python" in result1["result"][0]["content"]
+
+    # Query second store at runtime
+    sql_query2 = f"SELECT content, category FROM {document_store_2._index} WHERE category = 'C'"  # noqa: S608
+    result2 = retriever.run(query=sql_query2, document_store=document_store_2)
+    assert len(result2["result"]) == 1
+    assert "JavaScript" in result2["result"][0]["content"]
+
+    # Verify results are different
+    assert result1["result"][0]["content"] != result2["result"][0]["content"]
+
+
+@pytest.mark.integration
+def test_sql_retriever_error_handling(document_store: OpenSearchDocumentStore):
+    """Test error handling for invalid SQL queries"""
+    retriever = OpenSearchSQLRetriever(document_store=document_store, raise_on_failure=True)
+
+    invalid_query = "SELECT * FROM non_existent_index"
+    with pytest.raises(Exception, match="Failed to execute SQL query"):
+        retriever.run(query=invalid_query)
+
+    # Test with raise_on_failure=False
+    retriever_no_raise = OpenSearchSQLRetriever(document_store=document_store, raise_on_failure=False)
+    result = retriever_no_raise.run(query=invalid_query)
+    assert result["result"] is None
+
+
+@pytest.mark.asyncio
+async def test_run_async():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    mock_store._query_sql_async.return_value = [{"content": "Test doc", "category": "A"}]
+    retriever = OpenSearchSQLRetriever(document_store=mock_store)
+    res = await retriever.run_async(query="SELECT content, category FROM my_index WHERE category = 'A'")
+    mock_store._query_sql_async.assert_called_once_with(
+        query="SELECT content, category FROM my_index WHERE category = 'A'",
+        response_format="json",
+    )
+    assert len(res) == 1
+    assert "result" in res
+    assert res["result"] == [{"content": "Test doc", "category": "A"}]
+
+
+@pytest.mark.asyncio
+async def test_run_async_with_error_raise_on_failure():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    mock_store._query_sql_async.side_effect = Exception("SQL error")
+    retriever = OpenSearchSQLRetriever(document_store=mock_store, raise_on_failure=True)
+    with pytest.raises(Exception, match="SQL error"):
+        await retriever.run_async(query="SELECT * FROM my_index")
+
+
+@pytest.mark.asyncio
+async def test_run_async_with_error_no_raise():
+    mock_store = Mock(spec=OpenSearchDocumentStore)
+    mock_store._query_sql_async.side_effect = Exception("SQL error")
+    retriever = OpenSearchSQLRetriever(document_store=mock_store, raise_on_failure=False)
+    res = await retriever.run_async(query="SELECT * FROM my_index")
+    assert res["result"] is None
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_sql_retriever_async_basic_query(document_store: OpenSearchDocumentStore):
+    """Test basic async SQL query execution"""
+    docs = [
+        Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
+        Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
+        Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
+    ]
+    await document_store.write_documents_async(docs, refresh=True)
+
+    retriever = OpenSearchSQLRetriever(document_store=document_store)
+    sql_query = (
+        f"SELECT content, category, status FROM {document_store._index} "  # noqa: S608
+        f"WHERE category = 'A' ORDER BY priority"
+    )
+    result = await retriever.run_async(query=sql_query)
+
+    assert "result" in result
+    assert len(result["result"]) == 2
+    assert isinstance(result["result"], list)
+    assert all(isinstance(row, dict) for row in result["result"])
+
+    categories = [row.get("category") for row in result["result"]]
+    assert all(cat == "A" for cat in categories)
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_sql_retriever_async_csv_format(document_store: OpenSearchDocumentStore):
+    """Test async SQL query with CSV response format"""
+    docs = [
+        Document(content="Python programming", meta={"category": "A"}),
+        Document(content="Java programming", meta={"category": "B"}),
+    ]
+    await document_store.write_documents_async(docs, refresh=True)
+
+    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="csv")
+    sql_query = f"SELECT content, category FROM {document_store._index}"  # noqa: S608
+    result = await retriever.run_async(query=sql_query)
+
+    assert "result" in result
+    assert isinstance(result["result"], str)
+    assert "content" in result["result"]
+    assert "category" in result["result"]
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_sql_retriever_async_runtime_document_store_switching(
+    document_store: OpenSearchDocumentStore, document_store_2: OpenSearchDocumentStore
+):
+    """Test async switching document stores at runtime"""
+    docs1 = [
+        Document(content="Python programming", meta={"category": "A"}),
+        Document(content="Java programming", meta={"category": "B"}),
+    ]
+    await document_store.write_documents_async(docs1, refresh=True)
+
+    docs2 = [
+        Document(content="JavaScript development", meta={"category": "C"}),
+        Document(content="TypeScript development", meta={"category": "D"}),
+    ]
+    await document_store_2.write_documents_async(docs2, refresh=True)
+
+    retriever = OpenSearchSQLRetriever(document_store=document_store)
+
+    # Query first store
+    sql_query1 = f"SELECT content, category FROM {document_store._index} WHERE category = 'A'"  # noqa: S608
+    result1 = await retriever.run_async(query=sql_query1)
+    assert len(result1["result"]) == 1
+    assert "Python" in result1["result"][0]["content"]
+
+    # Query second store at runtime
+    sql_query2 = f"SELECT content, category FROM {document_store_2._index} WHERE category = 'C'"  # noqa: S608
+    result2 = await retriever.run_async(query=sql_query2, document_store=document_store_2)
+    assert len(result2["result"]) == 1
+    assert "JavaScript" in result2["result"][0]["content"]
+
+    # Verify results are different
+    assert result1["result"][0]["content"] != result2["result"][0]["content"]
+
+
+@pytest.mark.integration
+@pytest.mark.asyncio
+async def test_sql_retriever_async_error_handling(document_store: OpenSearchDocumentStore):
+    """Test async error handling for invalid SQL queries"""
+    retriever = OpenSearchSQLRetriever(document_store=document_store, raise_on_failure=True)
+
+    invalid_query = "SELECT * FROM non_existent_index"
+    with pytest.raises(Exception, match="Failed to execute SQL query"):
+        await retriever.run_async(query=invalid_query)
+
+    # Test with raise_on_failure=False
+    retriever_no_raise = OpenSearchSQLRetriever(document_store=document_store, raise_on_failure=False)
+    result = await retriever_no_raise.run_async(query=invalid_query)
+    assert result["result"] is None

From 3985984bca54636eb636ea0e49d79f29d1ba1ccf Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 13:33:36 +0100
Subject: [PATCH 28/58] adding missing files

---
 .../retrievers/opensearch/sql_retriever.py    | 190 ++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100644 integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py

diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py
new file mode 100644
index 0000000000..921639042a
--- /dev/null
+++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py
@@ -0,0 +1,190 @@
+# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any, Literal, Optional
+
+from haystack import component, default_from_dict, default_to_dict, logging
+
+from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
+
+logger = logging.getLogger(__name__)
+
+ResponseFormat = Literal["json", "jdbc", "csv", "raw"]
+
+
+@component
+class OpenSearchSQLRetriever:
+    """
+    Executes raw OpenSearch SQL queries against an OpenSearchDocumentStore.
+
+    This component allows you to execute SQL queries directly against the OpenSearch index,
+    which is useful for fetching metadata, aggregations, and other structured data at runtime.
+    """
+
+    def __init__(
+        self,
+        *,
+        document_store: OpenSearchDocumentStore,
+        response_format: ResponseFormat = "json",
+        raise_on_failure: bool = True,
+    ):
+        """
+        Creates the OpenSearchSQLRetriever component.
+
+        :param document_store: An instance of OpenSearchDocumentStore to use with the Retriever.
+        :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
+            - `json`: Returns a list of dictionaries (the _source from each hit). Default.
+            - `csv`: Returns the response as CSV text.
+            - `jdbc`: Returns the response in JDBC format.
+            - `raw`: Returns the raw response as text.
+        :param raise_on_failure:
+            Whether to raise an exception if the API call fails. Otherwise, log a warning and return None.
+
+        :raises ValueError: If `document_store` is not an instance of OpenSearchDocumentStore.
+        """
+        if not isinstance(document_store, OpenSearchDocumentStore):
+            msg = "document_store must be an instance of OpenSearchDocumentStore"
+            raise ValueError(msg)
+
+        self._document_store = document_store
+        self._response_format = response_format
+        self._raise_on_failure = raise_on_failure
+
+    def to_dict(self) -> dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
+        """
+        return default_to_dict(
+            self,
+            document_store=self._document_store.to_dict(),
+            response_format=self._response_format,
+            raise_on_failure=self._raise_on_failure,
+        )
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "OpenSearchSQLRetriever":
+        """
+        Deserializes the component from a dictionary.
+
+        :param data:
+            Dictionary to deserialize from.
+
+        :returns:
+            Deserialized component.
+        """
+        data["init_parameters"]["document_store"] = OpenSearchDocumentStore.from_dict(
+            data["init_parameters"]["document_store"]
+        )
+        return default_from_dict(cls, data)
+
+    @component.output_types(result=Any)
+    def run(
+        self,
+        query: str,
+        response_format: Optional[ResponseFormat] = None,
+        document_store: Optional[OpenSearchDocumentStore] = None,
+    ) -> dict[str, Any]:
+        """
+        Execute a raw OpenSearch SQL query against the index.
+
+        :param query: The OpenSearch SQL query to execute.
+        :param response_format: The format of the response. If not provided, uses the format
+            specified during initialization. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
+        :param document_store: Optionally, an instance of OpenSearchDocumentStore to use with the Retriever.
+
+        :returns:
+            A dictionary containing the query results with the following structure:
+            - result: The query results in the specified format. For JSON format, returns a list of dictionaries
+              (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
+
+        Example:
+            ```python
+            retriever = OpenSearchSQLRetriever(document_store=document_store)
+            result = retriever.run(
+                query="SELECT content, category FROM my_index WHERE category = 'A'"
+            )
+            # result["result"] contains a list of dictionaries with the query results
+            ```
+        """
+        if document_store is not None:
+            if not isinstance(document_store, OpenSearchDocumentStore):
+                msg = "document_store must be an instance of OpenSearchDocumentStore"
+                raise ValueError(msg)
+            doc_store = document_store
+        else:
+            doc_store = self._document_store
+
+        response_format = response_format or self._response_format
+
+        try:
+            result = doc_store._query_sql(query=query, response_format=response_format)
+        except Exception as e:
+            if self._raise_on_failure:
+                raise e
+            else:
+                logger.warning(
+                    "An error during SQL query execution occurred and will be ignored by returning None: {error}",
+                    error=str(e),
+                    exc_info=True,
+                )
+                result = None
+
+        return {"result": result}
+
+    @component.output_types(result=Any)
+    async def run_async(
+        self,
+        query: str,
+        response_format: Optional[ResponseFormat] = None,
+        document_store: Optional[OpenSearchDocumentStore] = None,
+    ) -> dict[str, Any]:
+        """
+        Asynchronously execute a raw OpenSearch SQL query against the index.
+
+        :param query: The OpenSearch SQL query to execute.
+        :param response_format: The format of the response. If not provided, uses the format
+            specified during initialization. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
+        :param document_store: Optionally, an instance of OpenSearchDocumentStore to use with the Retriever.
+
+        :returns:
+            A dictionary containing the query results with the following structure:
+            - result: The query results in the specified format. For JSON format, returns a list of dictionaries
+              (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
+
+        Example:
+            ```python
+            retriever = OpenSearchSQLRetriever(document_store=document_store)
+            result = await retriever.run_async(
+                query="SELECT content, category FROM my_index WHERE category = 'A'"
+            )
+            # result["result"] contains a list of dictionaries with the query results
+            ```
+        """
+        if document_store is not None:
+            if not isinstance(document_store, OpenSearchDocumentStore):
+                msg = "document_store must be an instance of OpenSearchDocumentStore"
+                raise ValueError(msg)
+            doc_store = document_store
+        else:
+            doc_store = self._document_store
+
+        response_format = response_format or self._response_format
+
+        try:
+            result = await doc_store._query_sql_async(query=query, response_format=response_format)
+        except Exception as e:
+            if self._raise_on_failure:
+                raise e
+            else:
+                logger.warning(
+                    "An error during SQL query execution occurred and will be ignored by returning None: {error}",
+                    error=str(e),
+                    exc_info=True,
+                )
+                result = None
+
+        return {"result": result}

From ea69e28f0dd816d026f702cf989594d11b604fd2 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:21:55 +0100
Subject: [PATCH 29/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 82018225c1..920b1da0c9 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1228,7 +1228,7 @@ def _extract_distinct_counts_from_aggregations(
 
     def count_unique_metadata_by_filter(self, filters: dict) -> dict[str, int]:
         """
-        Returns the number of unique values for each meta field of the documents that match the provided filters.
+        Returns the number of unique values for each metadata field of the documents that match the provided filters.
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)

From 7caca707852537f95aea9fc2b55b1346edd29988 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:22:06 +0100
Subject: [PATCH 30/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 920b1da0c9..90fc15e06e 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1232,7 +1232,7 @@ def count_unique_metadata_by_filter(self, filters: dict) -> dict[str, int]:
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
-        :returns: The number of unique values for each meta field of the documents that match the filters.
+        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered documents.
         """
         self._ensure_initialized()
         assert self._client is not None

From 0f86e3b12b2a3c7a83e7b36161b643c8c2fc7b9d Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:22:30 +0100
Subject: [PATCH 31/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 90fc15e06e..0fe7ab4626 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1340,7 +1340,7 @@ def _extract_min_max_from_stats(stats: dict[str, Any]) -> dict[str, Any]:
         max_value = stats.get("max")
         return {"min": min_value, "max": max_value}
 
-    def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, Any]:
+    def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, int | None]:
         """
         Returns the minimum and maximum values for the given metadata field.
 

From 07785d63cc5241c218e6b53d090379d174c837b7 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:22:39 +0100
Subject: [PATCH 32/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 0fe7ab4626..e95429c33f 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1357,7 +1357,7 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, int | Non
 
         return self._extract_min_max_from_stats(stats)
 
-    async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, Any]:
+    async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[str, int | None]:
         """
         Asynchronously returns the minimum and maximum values for the given metadata field.
 

From f70438660f8eb6ae73e2c2159419fddb685ec388 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:22:48 +0100
Subject: [PATCH 33/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index e95429c33f..147415e66e 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1362,7 +1362,7 @@ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[st
         Asynchronously returns the minimum and maximum values for the given metadata field.
 
         :param metadata_field: The metadata field to get the minimum and maximum values for.
-        :returns: The minimum and maximum values for the given metadata field.
+        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the metadata field across all documents.
         """
         await self._ensure_initialized_async()
         assert self._async_client is not None

From b6e00ea89198da04390ee0ae0441a4a8139bd25f Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:27:52 +0100
Subject: [PATCH 34/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 147415e66e..a28324de85 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1253,7 +1253,7 @@ def count_unique_metadata_by_filter(self, filters: dict) -> dict[str, int]:
         # extract cardinality values from aggregations
         return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
 
-    async def count_unique_metadata_by_filter_async(self, filters: dict) -> dict[str, int]:
+    async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any]) -> dict[str, int]:
         """
         Asynchronously returns the number of unique values for each meta field of the documents that match the
         provided filters.

From 386130a87f22b4c910f6fb4d269c7f0b04ba9c6e Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:28:16 +0100
Subject: [PATCH 35/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index a28324de85..92f3768dd1 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1226,7 +1226,7 @@ def _extract_distinct_counts_from_aggregations(
                     distinct_counts[field_name] = aggregations[agg_key]["value"]
         return distinct_counts
 
-    def count_unique_metadata_by_filter(self, filters: dict) -> dict[str, int]:
+    def count_unique_metadata_by_filter(self, filters: dict[str, Any]) -> dict[str, int]:
         """
         Returns the number of unique values for each metadata field of the documents that match the provided filters.
 

From 743a6a5c88dc44dff1a71e317c983fa8fa7669b0 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:29:15 +0100
Subject: [PATCH 36/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 92f3768dd1..faca193b7e 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1255,7 +1255,7 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any]) -> dict[str,
 
     async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any]) -> dict[str, int]:
         """
-        Asynchronously returns the number of unique values for each meta field of the documents that match the
+        Asynchronously returns the number of unique values for each metadata field of the documents that match the
         provided filters.
 
         :param filters: The filters to apply to count documents.

From 54511730fed42693e157429ffe24c085c7a1089b Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:29:22 +0100
Subject: [PATCH 37/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index faca193b7e..88f9379ea3 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1260,7 +1260,7 @@ async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any]) -
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
-        :returns: The number of unique values for each meta field of the documents that match the filters.
+        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered documents.
         """
         await self._ensure_initialized_async()
         assert self._async_client is not None

From 7a95c33b61515558c4d443e2aade2eecec9917f8 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:29:35 +0100
Subject: [PATCH 38/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 88f9379ea3..063a19ce71 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1332,7 +1332,7 @@ def _build_min_max_query_body(field_name: str) -> dict[str, Any]:
         }
 
     @staticmethod
-    def _extract_min_max_from_stats(stats: dict[str, Any]) -> dict[str, Any]:
+    def _extract_min_max_from_stats(stats: dict[str, Any]) -> dict[str, int | None]:
         """
         Extracts min and max values from stats aggregation results.
         """

From 4dc5c02ef90a0ca5e5e70c2e9d4b4fd7746810d1 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 14:29:47 +0100
Subject: [PATCH 39/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 063a19ce71..6a50e641fd 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1345,7 +1345,7 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, int | Non
         Returns the minimum and maximum values for the given metadata field.
 
         :param metadata_field: The metadata field to get the minimum and maximum values for.
-        :returns: The minimum and maximum values for the given metadata field.
+        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the metadata field across all documents.
         """
         self._ensure_initialized()
         assert self._client is not None

From a9f35d2ca3659f45f55e51fce3761279c30e4335 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:16:32 +0100
Subject: [PATCH 40/58] PR comments/fixes

---
 .../opensearch/document_store.py              | 30 ++++++++++++++++---
 .../opensearch/tests/test_document_store.py   |  6 ----
 .../tests/test_document_store_async.py        |  7 -----
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 6a50e641fd..a29c02726c 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1232,7 +1232,8 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any]) -> dict[str,
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
-        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered documents.
+        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
+                 documents.
         """
         self._ensure_initialized()
         assert self._client is not None
@@ -1260,7 +1261,8 @@ async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any]) -
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
-        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered documents.
+        :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
+                  documents.
         """
         await self._ensure_initialized_async()
         assert self._async_client is not None
@@ -1285,6 +1287,20 @@ def get_metadata_fields_info(self) -> dict[str, dict]:
         """
         Returns the information about the fields in the index.
 
+        If we populated the index with documents like:
+
+            Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1})
+            Document(content="Doc 2", meta={"category": "B", "status": "inactive"})
+
+        This method would return:
+
+            {
+                'content': {'type': 'text'},
+                'category': {'type': 'keyword'},
+                'status': {'type': 'keyword'},
+                'priority': {'type': 'long'},
+            }
+
         :returns: The information about the fields in the index.
         """
         self._ensure_initialized()
@@ -1292,6 +1308,8 @@ def get_metadata_fields_info(self) -> dict[str, dict]:
 
         mapping = self._client.indices.get_mapping(index=self._index)
         index_mapping = mapping[self._index]["mappings"]["properties"]
+        # remove all fields that are not metadata fields
+        index_mapping = {k: v for k, v in index_mapping.items() if k not in SPECIAL_FIELDS}
         return index_mapping
 
     async def get_metadata_fields_info_async(self) -> dict[str, dict]:
@@ -1305,6 +1323,8 @@ async def get_metadata_fields_info_async(self) -> dict[str, dict]:
 
         mapping = await self._async_client.indices.get_mapping(index=self._index)
         index_mapping = mapping[self._index]["mappings"]["properties"]
+        # remove all fields that are not metadata fields
+        index_mapping = {k: v for k, v in index_mapping.items() if k not in SPECIAL_FIELDS}
         return index_mapping
 
     @staticmethod
@@ -1345,7 +1365,8 @@ def get_metadata_field_min_max(self, metadata_field: str) -> dict[str, int | Non
         Returns the minimum and maximum values for the given metadata field.
 
         :param metadata_field: The metadata field to get the minimum and maximum values for.
-        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the metadata field across all documents.
+        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
+                  metadata field across all documents.
         """
         self._ensure_initialized()
         assert self._client is not None
@@ -1362,7 +1383,8 @@ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[st
         Asynchronously returns the minimum and maximum values for the given metadata field.
 
         :param metadata_field: The metadata field to get the minimum and maximum values for.
-        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the metadata field across all documents.
+        :returns: A dictionary with the keys "min" and "max", where each value is the minimum or maximum value of the
+                  metadata field across all documents.
         """
         await self._ensure_initialized_async()
         assert self._async_client is not None
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 665e7e4d38..c67cee5e42 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -676,16 +676,10 @@ def test_get_metadata_fields_info(self, document_store: OpenSearchDocumentStore)
         fields_info = document_store.get_metadata_fields_info()
 
         # Verify that fields_info contains expected fields
-        assert "content" in fields_info
-        assert "embedding" in fields_info
         assert "category" in fields_info
         assert "status" in fields_info
         assert "priority" in fields_info
 
-        # Verify field types
-        assert fields_info["content"]["type"] == "text"
-        assert fields_info["embedding"]["type"] == "knn_vector"
-        # Metadata fields should be keyword type (from dynamic templates)
         assert fields_info["category"]["type"] == "keyword"
         assert fields_info["status"]["type"] == "keyword"
         assert fields_info["priority"]["type"] == "long"
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 05ae4d94e7..879a8de33b 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -484,17 +484,10 @@ async def test_get_metadata_fields_info_async(self, document_store: OpenSearchDo
         fields_info = await document_store.get_metadata_fields_info_async()
 
         # Verify that fields_info contains expected fields
-        assert "content" in fields_info
-        assert "embedding" in fields_info
         assert "category" in fields_info
         assert "status" in fields_info
         assert "priority" in fields_info
 
-        # Verify field types
-        assert fields_info["content"]["type"] == "text"
-        assert fields_info["embedding"]["type"] == "knn_vector"
-
-        # Metadata fields should be keyword type (from dynamic templates)
         assert fields_info["category"]["type"] == "keyword"
         assert fields_info["status"]["type"] == "keyword"
         assert fields_info["priority"]["type"] == "long"

From eb261f96f4f6b5a4389dbeda4d0ca0f6fb30e0d3 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:18:45 +0100
Subject: [PATCH 41/58] fixes

---
 .../components/retrievers/opensearch/sql_retriever.py       | 5 ++---
 .../document_stores/opensearch/document_store.py            | 6 +++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py
index 921639042a..a7ec5dadb7 100644
--- a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py
+++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py
@@ -2,16 +2,15 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import Any, Literal, Optional
+from typing import Any, Optional
 
 from haystack import component, default_from_dict, default_to_dict, logging
 
 from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
+from haystack_integrations.document_stores.opensearch.document_store import ResponseFormat
 
 logger = logging.getLogger(__name__)
 
-ResponseFormat = Literal["json", "jdbc", "csv", "raw"]
-
 
 @component
 class OpenSearchSQLRetriever:
diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index a29c02726c..ca1b70eef0 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -26,10 +26,10 @@
 
 SPECIAL_FIELDS = {"content", "embedding", "id", "score", "sparse_embedding", "blob"}
 
-Hosts = Union[str, list[Union[str, Mapping[str, Union[str, int]]]]]
-
 ResponseFormat = Literal["json", "jdbc", "csv", "raw"]
 
+Hosts = Union[str, list[Union[str, Mapping[str, Union[str, int]]]]]
+
 # document scores are essentially unbounded and will be scaled to values between 0 and 1 if scale_score is set to
 # True. Scaling uses the expit function (inverse of the logit function) after applying a scaling factor
 # (e.g., BM25_SCALING_FACTOR for the bm25_retrieval method).
@@ -1233,7 +1233,7 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any]) -> dict[str,
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
         :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
-                 documents.
+                  documents.
         """
         self._ensure_initialized()
         assert self._client is not None

From 99a17dbc470195d9f473cdd290dc1104d0f96d8f Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:25:37 +0100
Subject: [PATCH 42/58] improving docstring

---
 .../document_stores/opensearch/document_store.py   | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index ca1b70eef0..7b62fcd5b2 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1316,6 +1316,20 @@ async def get_metadata_fields_info_async(self) -> dict[str, dict]:
         """
         Asynchronously returns the information about the fields in the index.
 
+        If we populated the index with documents like:
+
+            Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1})
+            Document(content="Doc 2", meta={"category": "B", "status": "inactive"})
+
+        This method would return:
+
+            {
+                'content': {'type': 'text'},
+                'category': {'type': 'keyword'},
+                'status': {'type': 'keyword'},
+                'priority': {'type': 'long'},
+            }
+
         :returns: The information about the fields in the index.
         """
         await self._ensure_initialized_async()

From f5eaf4bec5dc370ebe0d2a42aeae993cfa61b0bd Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:32:11 +0100
Subject: [PATCH 43/58] updating docs

---
 integrations/opensearch/pydoc/config_docusaurus.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/integrations/opensearch/pydoc/config_docusaurus.yml b/integrations/opensearch/pydoc/config_docusaurus.yml
index 7e711f6f34..d10454000f 100644
--- a/integrations/opensearch/pydoc/config_docusaurus.yml
+++ b/integrations/opensearch/pydoc/config_docusaurus.yml
@@ -5,6 +5,7 @@ loaders:
   - haystack_integrations.components.retrievers.opensearch.bm25_retriever
   - haystack_integrations.components.retrievers.opensearch.embedding_retriever
   - haystack_integrations.components.retrievers.opensearch.open_search_hybrid_retriever
+  - haystack_integrations.components.retrievers.opensearch.sql_retriever
   - haystack_integrations.document_stores.opensearch.document_store
   - haystack_integrations.document_stores.opensearch.filters
   search_path:

From 1f1b83294b41b5db3120c040fd8fb97c5a224b97 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:36:44 +0100
Subject: [PATCH 44/58] removing all SQLRetriever related code

---
 .../opensearch/document_store.py              | 139 ------------------
 .../opensearch/tests/test_document_store.py   |  57 -------
 .../tests/test_document_store_async.py        |  58 --------
 3 files changed, 254 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 7b62fcd5b2..5568d72a49 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1564,142 +1564,3 @@ def _process_sql_response(response_data: Any, response_format: ResponseFormat) -
             return response_data
         else:
             return response_data if isinstance(response_data, str) else str(response_data)
-
-    def _query_sql(self, query: str, response_format: ResponseFormat = "json") -> Any:
-        """
-        Execute a raw OpenSearch SQL query against the index.
-
-        This method is not meant to be part of the public interface of
-        `OpenSearchDocumentStore` nor called directly.
-        `OpenSearchSQLRetriever` uses this method directly and is the public interface for it.
-
-        See `OpenSearchSQLRetriever` for more information.
-
-        :param query: The OpenSearch SQL query to execute
-        :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
-        :returns: The query results in the specified format. For JSON format, returns a list of dictionaries
-            (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
-
-        NOTE: For non-JSON formats (csv, jdbc, raw), use requests to make a raw HTTP request and get the text response
-              This avoids deserialization issues with the opensearchpy client.
-        """
-        self._ensure_initialized()
-        assert self._client is not None
-
-        # For non-JSON formats, use requests directly to avoid deserialization issues
-        if response_format != "json":
-            try:
-                # Get connection info from the transport
-                connection = self._client.transport.get_connection()
-                base_url = connection.host
-                url, headers, auth = self._prepare_sql_http_request_params(base_url, response_format)
-
-                verify = self._verify_certs if self._verify_certs is not None else True
-                timeout = self._timeout if self._timeout is not None else 30.0
-                response = requests.post(
-                    url,
-                    json={"query": query},
-                    headers=headers,
-                    auth=auth,
-                    verify=verify,
-                    timeout=timeout,
-                )
-                response.raise_for_status()
-                return response.text
-            except Exception as e:
-                # If requests fails (e.g., AWS auth), fall back to opensearchpy
-                # which will raise SerializationError that we can handle
-                logger.error(f"Failed to execute SQL query in OpenSearch: {e!s}")
-
-        try:
-            body = {"query": query}
-            params = {"format": response_format}
-
-            response_data = self._client.transport.perform_request(
-                method="POST",
-                url="/_plugins/_sql",
-                params=params,
-                body=body,
-            )
-
-            return self._process_sql_response(response_data, response_format)
-        except SerializationError:
-            # If we get here, it means requests failed above (likely AWS auth) and opensearchpy can't deserialize the
-            # response. Re-raise as DocumentStoreError with a helpful message
-            msg = (
-                f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. "
-                f"This format may not be supported with the current authentication method."
-            )
-            raise DocumentStoreError(msg) from None
-        except Exception as e:
-            msg = f"Failed to execute SQL query in OpenSearch: {e!s}"
-            raise DocumentStoreError(msg) from e
-
-    async def _query_sql_async(self, query: str, response_format: ResponseFormat = "json") -> Any:
-        """
-        Asynchronously execute a raw OpenSearch SQL query against the index.
-
-        This method is not meant to be part of the public interface of
-        `OpenSearchDocumentStore` nor called directly.
-        `OpenSearchSQLRetriever` uses this method directly and is the public interface for it.
-
-        See `OpenSearchSQLRetriever` for more information.
-
-        :param query: The OpenSearch SQL query to execute
-        :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
-        :returns: The query results in the specified format. For JSON format, returns a list of dictionaries
-            (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
-
-        NOTE: For non-JSON formats (csv, jdbc, raw), use httpx AsyncClient to make a raw HTTP request and get the text
-              response. This avoids deserialization issues with the opensearchpy client.
-        """
-        await self._ensure_initialized_async()
-        assert self._async_client is not None
-
-        # For non-JSON formats, use httpx directly to avoid deserialization issues
-        if response_format != "json":
-            try:
-                # Get connection info from the transport
-                connection = self._async_client.transport.get_connection()
-                base_url = connection.host
-                url, headers, auth = self._prepare_sql_http_request_params(base_url, response_format)
-
-                verify = self._verify_certs if self._verify_certs is not None else True
-                timeout = httpx.Timeout(self._timeout if self._timeout else 30.0)
-
-                async with httpx.AsyncClient(verify=verify, timeout=timeout) as client:
-                    response = await client.post(
-                        url,
-                        json={"query": query},
-                        headers=headers,
-                        auth=auth,
-                    )
-                    response.raise_for_status()
-                    return response.text
-            except Exception as e:
-                logger.error(f"Failed to execute SQL query in OpenSearch: {e!s}")
-
-        try:
-            body = {"query": query}
-            params = {"format": response_format}
-
-            response_data = await self._async_client.transport.perform_request(
-                method="POST",
-                url="/_plugins/_sql",
-                params=params,
-                body=body,
-            )
-
-            return self._process_sql_response(response_data, response_format)
-        except SerializationError:
-            # If we get here, it means httpx failed above (likely AWS auth or not installed) and opensearchpy can't
-            # deserialize the response. Re-raise as DocumentStoreError with a helpful message
-            msg = (
-                f"Failed to execute SQL query in OpenSearch: Unable to deserialize {response_format} response. "
-                f"This format may not be supported with the current authentication method. "
-                f"Consider installing httpx for better support."
-            )
-            raise DocumentStoreError(msg) from None
-        except Exception as e:
-            msg = f"Failed to execute SQL query in OpenSearch: {e!s}"
-            raise DocumentStoreError(msg) from e
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index c67cee5e42..e02d7a1aaf 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -789,63 +789,6 @@ def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocume
         assert set(unique_priorities_filtered) == {"1"}
         assert priority_count == 1
 
-    def test_query_sql(self, document_store: OpenSearchDocumentStore):
-        docs = [
-            Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
-            Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
-            Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
-            Document(content="JavaScript development", meta={"category": "C", "status": "active", "priority": 1}),
-        ]
-        document_store.write_documents(docs, refresh=True)
-
-        # SQL query with JSON format (default)
-        sql_query = (
-            f"SELECT content, category, status, priority FROM {document_store._index} "  # noqa: S608
-            f"WHERE category = 'A' ORDER BY priority"
-        )
-        result = document_store._query_sql(sql_query, response_format="json")
-
-        # format returns a list of dictionaries (the _source from each hit)
-        assert len(result) == 2  # Two documents with category A
-        assert isinstance(result, list)
-        assert all(isinstance(row, dict) for row in result)
-
-        categories = [row.get("category") for row in result]
-        assert all(cat == "A" for cat in categories)
-
-        # verify all expected fields are present
-        for row in result:
-            assert "content" in row
-            assert "category" in row
-            assert "status" in row
-            assert "priority" in row
-
-        # SQL query with CSV format
-        result_csv = document_store._query_sql(sql_query, response_format="csv")
-        assert isinstance(result_csv, str)
-        assert "content" in result_csv
-        assert "category" in result_csv
-
-        # SQL query with JDBC format
-        result_jdbc = document_store._query_sql(sql_query, response_format="jdbc")
-        # JDBC format can be dict or str depending on OpenSearch version
-        assert result_jdbc is not None
-
-        # SQL query with RAW format
-        result_raw = document_store._query_sql(sql_query, response_format="raw")
-        assert isinstance(result_raw, str)
-
-        # COUNT query
-        count_query = f"SELECT COUNT(*) as total FROM {document_store._index}"  # noqa: S608
-        count_result = document_store._query_sql(count_query, response_format="json")
-        # COUNT query may return different format, check it's a valid response
-        assert count_result is not None
-
-        # error handling for invalid SQL query
-        invalid_query = "SELECT * FROM non_existent_index"
-        with pytest.raises(DocumentStoreError, match="Failed to execute SQL query"):
-            document_store._query_sql(invalid_query)
-
     @pytest.mark.integration
     def test_write_with_routing(self, document_store: OpenSearchDocumentStore):
         """Test writing documents with routing metadata"""
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 879a8de33b..b18adfab24 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -606,61 +606,3 @@ async def test_get_metadata_field_unique_values_async(self, document_store: Open
         )
         assert set(unique_priorities_filtered) == {"1"}
         assert priority_count == 1
-
-    @pytest.mark.asyncio
-    async def test_query_sql(self, document_store: OpenSearchDocumentStore):
-        docs = [
-            Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
-            Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
-            Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
-            Document(content="JavaScript development", meta={"category": "C", "status": "active", "priority": 1}),
-        ]
-        await document_store.write_documents_async(docs, refresh=True)
-
-        # SQL query with JSON format (default)
-        sql_query = (
-            f"SELECT content, category, status, priority FROM {document_store._index} "  # noqa: S608
-            f"WHERE category = 'A' ORDER BY priority"
-        )
-        result = await document_store._query_sql_async(sql_query, response_format="json")
-
-        # returns a list of dictionaries (the _source from each hit)
-        assert len(result) == 2  # Two documents with category A
-        assert isinstance(result, list)
-        assert all(isinstance(row, dict) for row in result)
-
-        categories = [row.get("category") for row in result]
-        assert all(cat == "A" for cat in categories)
-
-        # all expected fields are present
-        for row in result:
-            assert "content" in row
-            assert "category" in row
-            assert "status" in row
-            assert "priority" in row
-
-        # SQL query with CSV format
-        result_csv = await document_store._query_sql_async(sql_query, response_format="csv")
-        assert isinstance(result_csv, str)
-        assert "content" in result_csv
-        assert "category" in result_csv
-
-        # SQL query with JDBC format
-        result_jdbc = await document_store._query_sql_async(sql_query, response_format="jdbc")
-        # JDBC format can be dict or str depending on OpenSearch version
-        assert result_jdbc is not None
-
-        # SQL query with RAW format
-        result_raw = await document_store._query_sql_async(sql_query, response_format="raw")
-        assert isinstance(result_raw, str)
-
-        # COUNT query
-        count_query = f"SELECT COUNT(*) as total FROM {document_store._index}"  # noqa: S608
-        count_result = await document_store._query_sql_async(count_query, response_format="json")
-        # COUNT query may return different format, check it's a valid response
-        assert count_result is not None
-
-        # error handling for invalid SQL query
-        invalid_query = "SELECT * FROM non_existent_index"
-        with pytest.raises(DocumentStoreError, match="Failed to execute SQL query"):
-            await document_store._query_sql_async(invalid_query)

From 3b1d2d917308f32b81821aa33caa17c67fcc7401 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:41:07 +0100
Subject: [PATCH 45/58] removing all SQLRetriever related code

---
 .../retrievers/opensearch/sql_retriever.py    | 189 ------------------
 .../opensearch/document_store.py              |   3 -
 .../opensearch/tests/test_document_store.py   |   2 +-
 .../tests/test_document_store_async.py        |   1 -
 4 files changed, 1 insertion(+), 194 deletions(-)
 delete mode 100644 integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py

diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py
deleted file mode 100644
index a7ec5dadb7..0000000000
--- a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/sql_retriever.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from typing import Any, Optional
-
-from haystack import component, default_from_dict, default_to_dict, logging
-
-from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
-from haystack_integrations.document_stores.opensearch.document_store import ResponseFormat
-
-logger = logging.getLogger(__name__)
-
-
-@component
-class OpenSearchSQLRetriever:
-    """
-    Executes raw OpenSearch SQL queries against an OpenSearchDocumentStore.
-
-    This component allows you to execute SQL queries directly against the OpenSearch index,
-    which is useful for fetching metadata, aggregations, and other structured data at runtime.
-    """
-
-    def __init__(
-        self,
-        *,
-        document_store: OpenSearchDocumentStore,
-        response_format: ResponseFormat = "json",
-        raise_on_failure: bool = True,
-    ):
-        """
-        Creates the OpenSearchSQLRetriever component.
-
-        :param document_store: An instance of OpenSearchDocumentStore to use with the Retriever.
-        :param response_format: The format of the response. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
-            - `json`: Returns a list of dictionaries (the _source from each hit). Default.
-            - `csv`: Returns the response as CSV text.
-            - `jdbc`: Returns the response in JDBC format.
-            - `raw`: Returns the raw response as text.
-        :param raise_on_failure:
-            Whether to raise an exception if the API call fails. Otherwise, log a warning and return None.
-
-        :raises ValueError: If `document_store` is not an instance of OpenSearchDocumentStore.
-        """
-        if not isinstance(document_store, OpenSearchDocumentStore):
-            msg = "document_store must be an instance of OpenSearchDocumentStore"
-            raise ValueError(msg)
-
-        self._document_store = document_store
-        self._response_format = response_format
-        self._raise_on_failure = raise_on_failure
-
-    def to_dict(self) -> dict[str, Any]:
-        """
-        Serializes the component to a dictionary.
-
-        :returns:
-            Dictionary with serialized data.
-        """
-        return default_to_dict(
-            self,
-            document_store=self._document_store.to_dict(),
-            response_format=self._response_format,
-            raise_on_failure=self._raise_on_failure,
-        )
-
-    @classmethod
-    def from_dict(cls, data: dict[str, Any]) -> "OpenSearchSQLRetriever":
-        """
-        Deserializes the component from a dictionary.
-
-        :param data:
-            Dictionary to deserialize from.
-
-        :returns:
-            Deserialized component.
-        """
-        data["init_parameters"]["document_store"] = OpenSearchDocumentStore.from_dict(
-            data["init_parameters"]["document_store"]
-        )
-        return default_from_dict(cls, data)
-
-    @component.output_types(result=Any)
-    def run(
-        self,
-        query: str,
-        response_format: Optional[ResponseFormat] = None,
-        document_store: Optional[OpenSearchDocumentStore] = None,
-    ) -> dict[str, Any]:
-        """
-        Execute a raw OpenSearch SQL query against the index.
-
-        :param query: The OpenSearch SQL query to execute.
-        :param response_format: The format of the response. If not provided, uses the format
-            specified during initialization. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
-        :param document_store: Optionally, an instance of OpenSearchDocumentStore to use with the Retriever.
-
-        :returns:
-            A dictionary containing the query results with the following structure:
-            - result: The query results in the specified format. For JSON format, returns a list of dictionaries
-              (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
-
-        Example:
-            ```python
-            retriever = OpenSearchSQLRetriever(document_store=document_store)
-            result = retriever.run(
-                query="SELECT content, category FROM my_index WHERE category = 'A'"
-            )
-            # result["result"] contains a list of dictionaries with the query results
-            ```
-        """
-        if document_store is not None:
-            if not isinstance(document_store, OpenSearchDocumentStore):
-                msg = "document_store must be an instance of OpenSearchDocumentStore"
-                raise ValueError(msg)
-            doc_store = document_store
-        else:
-            doc_store = self._document_store
-
-        response_format = response_format or self._response_format
-
-        try:
-            result = doc_store._query_sql(query=query, response_format=response_format)
-        except Exception as e:
-            if self._raise_on_failure:
-                raise e
-            else:
-                logger.warning(
-                    "An error during SQL query execution occurred and will be ignored by returning None: {error}",
-                    error=str(e),
-                    exc_info=True,
-                )
-                result = None
-
-        return {"result": result}
-
-    @component.output_types(result=Any)
-    async def run_async(
-        self,
-        query: str,
-        response_format: Optional[ResponseFormat] = None,
-        document_store: Optional[OpenSearchDocumentStore] = None,
-    ) -> dict[str, Any]:
-        """
-        Asynchronously execute a raw OpenSearch SQL query against the index.
-
-        :param query: The OpenSearch SQL query to execute.
-        :param response_format: The format of the response. If not provided, uses the format
-            specified during initialization. See https://docs.opensearch.org/latest/search-plugins/sql/response-formats/
-        :param document_store: Optionally, an instance of OpenSearchDocumentStore to use with the Retriever.
-
-        :returns:
-            A dictionary containing the query results with the following structure:
-            - result: The query results in the specified format. For JSON format, returns a list of dictionaries
-              (the _source from each hit). For other formats (csv, jdbc, raw), returns the response as text.
-
-        Example:
-            ```python
-            retriever = OpenSearchSQLRetriever(document_store=document_store)
-            result = await retriever.run_async(
-                query="SELECT content, category FROM my_index WHERE category = 'A'"
-            )
-            # result["result"] contains a list of dictionaries with the query results
-            ```
-        """
-        if document_store is not None:
-            if not isinstance(document_store, OpenSearchDocumentStore):
-                msg = "document_store must be an instance of OpenSearchDocumentStore"
-                raise ValueError(msg)
-            doc_store = document_store
-        else:
-            doc_store = self._document_store
-
-        response_format = response_format or self._response_format
-
-        try:
-            result = await doc_store._query_sql_async(query=query, response_format=response_format)
-        except Exception as e:
-            if self._raise_on_failure:
-                raise e
-            else:
-                logger.warning(
-                    "An error during SQL query execution occurred and will be ignored by returning None: {error}",
-                    error=str(e),
-                    exc_info=True,
-                )
-                result = None
-
-        return {"result": result}
diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 5568d72a49..73f8f4428a 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -8,15 +8,12 @@
 from math import exp
 from typing import Any, Literal, Optional, Union
 
-import httpx
-import requests
 from haystack import default_from_dict, default_to_dict, logging
 from haystack.dataclasses import Document
 from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError
 from haystack.document_stores.types import DuplicatePolicy
 from haystack.utils.auth import Secret
 from opensearchpy import AsyncHttpConnection, AsyncOpenSearch, OpenSearch
-from opensearchpy.exceptions import SerializationError
 from opensearchpy.helpers import async_bulk, bulk
 
 from haystack_integrations.document_stores.opensearch.auth import AsyncAWSAuth, AWSAuth
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index e02d7a1aaf..3648b44a1f 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
+gi# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index b18adfab24..a1b5271400 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -4,7 +4,6 @@
 
 import pytest
 from haystack.dataclasses import Document
-from haystack.document_stores.errors import DocumentStoreError
 from haystack.document_stores.types import DuplicatePolicy
 
 from haystack_integrations.document_stores.opensearch.document_store import OpenSearchDocumentStore

From 78019c317ffa47899d850a6441505626ef858071 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:44:51 +0100
Subject: [PATCH 46/58] cleaning up typo

---
 integrations/opensearch/tests/test_document_store.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 3648b44a1f..e02d7a1aaf 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -1,4 +1,4 @@
-gi# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
+# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
 

From 041faa84eeea503323d42e9a7e9d46fded40752d Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:47:19 +0100
Subject: [PATCH 47/58] updating init

---
 .../components/retrievers/opensearch/__init__.py          | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py
index 5f80dbd69f..7641b6a421 100644
--- a/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py
+++ b/integrations/opensearch/src/haystack_integrations/components/retrievers/opensearch/__init__.py
@@ -5,11 +5,5 @@
 from .bm25_retriever import OpenSearchBM25Retriever
 from .embedding_retriever import OpenSearchEmbeddingRetriever
 from .open_search_hybrid_retriever import OpenSearchHybridRetriever
-from .sql_retriever import OpenSearchSQLRetriever
 
-__all__ = [
-    "OpenSearchBM25Retriever",
-    "OpenSearchEmbeddingRetriever",
-    "OpenSearchHybridRetriever",
-    "OpenSearchSQLRetriever",
-]
+__all__ = ["OpenSearchBM25Retriever", "OpenSearchEmbeddingRetriever", "OpenSearchHybridRetriever"]

From 5d45544de9ccaa0da950bd939606994513f1db4b Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:49:42 +0100
Subject: [PATCH 48/58] reverting docs updated

---
 integrations/opensearch/pydoc/config_docusaurus.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/integrations/opensearch/pydoc/config_docusaurus.yml b/integrations/opensearch/pydoc/config_docusaurus.yml
index d10454000f..7e711f6f34 100644
--- a/integrations/opensearch/pydoc/config_docusaurus.yml
+++ b/integrations/opensearch/pydoc/config_docusaurus.yml
@@ -5,7 +5,6 @@ loaders:
   - haystack_integrations.components.retrievers.opensearch.bm25_retriever
   - haystack_integrations.components.retrievers.opensearch.embedding_retriever
   - haystack_integrations.components.retrievers.opensearch.open_search_hybrid_retriever
-  - haystack_integrations.components.retrievers.opensearch.sql_retriever
   - haystack_integrations.document_stores.opensearch.document_store
   - haystack_integrations.document_stores.opensearch.filters
   search_path:

From 131e3b5f878288db0ecced63c993ef05bd837d24 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Tue, 13 Jan 2026 15:54:01 +0100
Subject: [PATCH 49/58] removing tests for SQLRetrieve

---
 .../opensearch/tests/test_sql_retriever.py    | 409 ------------------
 1 file changed, 409 deletions(-)
 delete mode 100644 integrations/opensearch/tests/test_sql_retriever.py

diff --git a/integrations/opensearch/tests/test_sql_retriever.py b/integrations/opensearch/tests/test_sql_retriever.py
deleted file mode 100644
index dba0b57e01..0000000000
--- a/integrations/opensearch/tests/test_sql_retriever.py
+++ /dev/null
@@ -1,409 +0,0 @@
-# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from unittest.mock import Mock, patch
-
-import pytest
-from haystack.dataclasses import Document
-
-from haystack_integrations.components.retrievers.opensearch import OpenSearchSQLRetriever
-from haystack_integrations.document_stores.opensearch import OpenSearchDocumentStore
-
-
-def test_init_default():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    retriever = OpenSearchSQLRetriever(document_store=mock_store)
-    assert retriever._document_store == mock_store
-    assert retriever._response_format == "json"
-    assert retriever._raise_on_failure is True
-
-
-def test_init_custom():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    retriever = OpenSearchSQLRetriever(document_store=mock_store, response_format="csv", raise_on_failure=False)
-    assert retriever._response_format == "csv"
-    assert retriever._raise_on_failure is False
-
-
-def test_init_invalid_document_store():
-    with pytest.raises(ValueError, match="document_store must be an instance of OpenSearchDocumentStore"):
-        OpenSearchSQLRetriever(document_store="not a document store")
-
-
-@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
-def test_to_dict(_mock_opensearch_client):
-    document_store = OpenSearchDocumentStore(hosts="some fake host")
-    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="csv")
-    res = retriever.to_dict()
-    assert res["type"] == "haystack_integrations.components.retrievers.opensearch.sql_retriever.OpenSearchSQLRetriever"
-    assert res["init_parameters"]["response_format"] == "csv"
-    assert res["init_parameters"]["raise_on_failure"] is True
-    assert "document_store" in res["init_parameters"]
-
-
-@patch("haystack_integrations.document_stores.opensearch.document_store.OpenSearch")
-def test_from_dict(_mock_opensearch_client):
-    document_store = OpenSearchDocumentStore(hosts="some fake host")
-    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="csv")
-    data = retriever.to_dict()
-    retriever_from_dict = OpenSearchSQLRetriever.from_dict(data)
-    assert retriever_from_dict._response_format == "csv"
-    assert retriever_from_dict._raise_on_failure is True
-
-
-def test_run():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    mock_store._query_sql.return_value = [{"content": "Test doc", "category": "A"}]
-    retriever = OpenSearchSQLRetriever(document_store=mock_store)
-    res = retriever.run(query="SELECT content, category FROM my_index WHERE category = 'A'")
-    mock_store._query_sql.assert_called_once_with(
-        query="SELECT content, category FROM my_index WHERE category = 'A'",
-        response_format="json",
-    )
-    assert len(res) == 1
-    assert "result" in res
-    assert res["result"] == [{"content": "Test doc", "category": "A"}]
-
-
-def test_run_with_custom_response_format():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    mock_store._query_sql.return_value = "content,category\nTest doc,A"
-    retriever = OpenSearchSQLRetriever(document_store=mock_store, response_format="csv")
-    res = retriever.run(query="SELECT content, category FROM my_index")
-    mock_store._query_sql.assert_called_once_with(query="SELECT content, category FROM my_index", response_format="csv")
-    assert res["result"] == "content,category\nTest doc,A"
-
-
-def test_run_with_runtime_response_format():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    mock_store._query_sql.return_value = "raw response"
-    retriever = OpenSearchSQLRetriever(document_store=mock_store, response_format="json")
-    res = retriever.run(query="SELECT * FROM my_index", response_format="raw")
-    mock_store._query_sql.assert_called_once_with(query="SELECT * FROM my_index", response_format="raw")
-    assert res["result"] == "raw response"
-
-
-def test_run_with_runtime_document_store():
-    mock_store1 = Mock(spec=OpenSearchDocumentStore)
-    mock_store2 = Mock(spec=OpenSearchDocumentStore)
-    mock_store2._query_sql.return_value = [{"result": "from store 2"}]
-    retriever = OpenSearchSQLRetriever(document_store=mock_store1)
-    res = retriever.run(query="SELECT * FROM my_index", document_store=mock_store2)
-    mock_store1._query_sql.assert_not_called()
-    mock_store2._query_sql.assert_called_once_with(query="SELECT * FROM my_index", response_format="json")
-    assert res["result"] == [{"result": "from store 2"}]
-
-
-def test_run_with_error_raise_on_failure():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    mock_store._query_sql.side_effect = Exception("SQL error")
-    retriever = OpenSearchSQLRetriever(document_store=mock_store, raise_on_failure=True)
-    with pytest.raises(Exception, match="SQL error"):
-        retriever.run(query="SELECT * FROM my_index")
-
-
-def test_run_with_error_no_raise():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    mock_store._query_sql.side_effect = Exception("SQL error")
-    retriever = OpenSearchSQLRetriever(document_store=mock_store, raise_on_failure=False)
-    res = retriever.run(query="SELECT * FROM my_index")
-    assert res["result"] is None
-
-
-@pytest.mark.integration
-def test_sql_retriever_basic_query(document_store: OpenSearchDocumentStore):
-    """Test basic SQL query execution with JSON format"""
-    docs = [
-        Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
-        Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
-        Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
-        Document(content="JavaScript development", meta={"category": "C", "status": "active", "priority": 1}),
-    ]
-    document_store.write_documents(docs, refresh=True)
-
-    retriever = OpenSearchSQLRetriever(document_store=document_store)
-    sql_query = (
-        f"SELECT content, category, status, priority FROM {document_store._index} "  # noqa: S608
-        f"WHERE category = 'A' ORDER BY priority"
-    )
-    result = retriever.run(query=sql_query)
-
-    assert "result" in result
-    assert len(result["result"]) == 2
-    assert isinstance(result["result"], list)
-    assert all(isinstance(row, dict) for row in result["result"])
-
-    categories = [row.get("category") for row in result["result"]]
-    assert all(cat == "A" for cat in categories)
-
-    for row in result["result"]:
-        assert "content" in row
-        assert "category" in row
-        assert "status" in row
-        assert "priority" in row
-
-
-@pytest.mark.integration
-def test_sql_retriever_csv_format(document_store: OpenSearchDocumentStore):
-    """Test SQL query with CSV response format"""
-    docs = [
-        Document(content="Python programming", meta={"category": "A", "status": "active"}),
-        Document(content="Java programming", meta={"category": "B", "status": "active"}),
-    ]
-    document_store.write_documents(docs, refresh=True)
-
-    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="csv")
-    sql_query = f"SELECT content, category FROM {document_store._index}"  # noqa: S608
-    result = retriever.run(query=sql_query)
-
-    assert "result" in result
-    assert isinstance(result["result"], str)
-    assert "content" in result["result"]
-    assert "category" in result["result"]
-
-
-@pytest.mark.integration
-def test_sql_retriever_count_query(document_store: OpenSearchDocumentStore):
-    """Test COUNT query execution"""
-    docs = [
-        Document(content="Doc 1", meta={"category": "A"}),
-        Document(content="Doc 2", meta={"category": "B"}),
-        Document(content="Doc 3", meta={"category": "A"}),
-    ]
-    document_store.write_documents(docs, refresh=True)
-
-    retriever = OpenSearchSQLRetriever(document_store=document_store)
-    count_query = f"SELECT COUNT(*) as total FROM {document_store._index}"  # noqa: S608
-    result = retriever.run(query=count_query)
-
-    assert "result" in result
-    assert result["result"] is not None
-
-
-@pytest.mark.integration
-def test_sql_retriever_with_filters(document_store: OpenSearchDocumentStore):
-    """Test SQL query with WHERE clause filtering"""
-
-    docs = [
-        Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
-        Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
-        Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
-    ]
-    document_store.write_documents(docs, refresh=True)
-
-    retriever = OpenSearchSQLRetriever(document_store=document_store)
-    sql_query = (
-        f"SELECT content, category, status FROM {document_store._index} "  # noqa: S608
-        f"WHERE category = 'A' AND status = 'active'"
-    )
-    result = retriever.run(query=sql_query)
-
-    assert "result" in result
-    assert len(result["result"]) == 1
-    assert result["result"][0]["category"] == "A"
-    assert result["result"][0]["status"] == "active"
-
-
-@pytest.mark.integration
-def test_sql_retriever_runtime_response_format(document_store: OpenSearchDocumentStore):
-    """Test overriding response format at runtime"""
-    docs = [
-        Document(content="Python programming", meta={"category": "A"}),
-        Document(content="Java programming", meta={"category": "B"}),
-    ]
-    document_store.write_documents(docs, refresh=True)
-
-    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="json")
-    sql_query = f"SELECT content, category FROM {document_store._index}"  # noqa: S608
-
-    # Override with CSV format at runtime
-    result = retriever.run(query=sql_query, response_format="csv")
-    assert isinstance(result["result"], str)
-    assert "content" in result["result"]
-
-    # Use default JSON format
-    result_json = retriever.run(query=sql_query)
-    assert isinstance(result_json["result"], list)
-
-
-@pytest.mark.integration
-def test_sql_retriever_runtime_document_store_switching(
-    document_store: OpenSearchDocumentStore, document_store_2: OpenSearchDocumentStore
-):
-    """Test switching document stores at runtime"""
-    docs1 = [
-        Document(content="Python programming", meta={"category": "A"}),
-        Document(content="Java programming", meta={"category": "B"}),
-    ]
-    document_store.write_documents(docs1, refresh=True)
-
-    docs2 = [
-        Document(content="JavaScript development", meta={"category": "C"}),
-        Document(content="TypeScript development", meta={"category": "D"}),
-    ]
-    document_store_2.write_documents(docs2, refresh=True)
-
-    retriever = OpenSearchSQLRetriever(document_store=document_store)
-
-    # Query first store
-    sql_query1 = f"SELECT content, category FROM {document_store._index} WHERE category = 'A'"  # noqa: S608
-    result1 = retriever.run(query=sql_query1)
-    assert len(result1["result"]) == 1
-    assert "Python" in result1["result"][0]["content"]
-
-    # Query second store at runtime
-    sql_query2 = f"SELECT content, category FROM {document_store_2._index} WHERE category = 'C'"  # noqa: S608
-    result2 = retriever.run(query=sql_query2, document_store=document_store_2)
-    assert len(result2["result"]) == 1
-    assert "JavaScript" in result2["result"][0]["content"]
-
-    # Verify results are different
-    assert result1["result"][0]["content"] != result2["result"][0]["content"]
-
-
-@pytest.mark.integration
-def test_sql_retriever_error_handling(document_store: OpenSearchDocumentStore):
-    """Test error handling for invalid SQL queries"""
-    retriever = OpenSearchSQLRetriever(document_store=document_store, raise_on_failure=True)
-
-    invalid_query = "SELECT * FROM non_existent_index"
-    with pytest.raises(Exception, match="Failed to execute SQL query"):
-        retriever.run(query=invalid_query)
-
-    # Test with raise_on_failure=False
-    retriever_no_raise = OpenSearchSQLRetriever(document_store=document_store, raise_on_failure=False)
-    result = retriever_no_raise.run(query=invalid_query)
-    assert result["result"] is None
-
-
-@pytest.mark.asyncio
-async def test_run_async():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    mock_store._query_sql_async.return_value = [{"content": "Test doc", "category": "A"}]
-    retriever = OpenSearchSQLRetriever(document_store=mock_store)
-    res = await retriever.run_async(query="SELECT content, category FROM my_index WHERE category = 'A'")
-    mock_store._query_sql_async.assert_called_once_with(
-        query="SELECT content, category FROM my_index WHERE category = 'A'",
-        response_format="json",
-    )
-    assert len(res) == 1
-    assert "result" in res
-    assert res["result"] == [{"content": "Test doc", "category": "A"}]
-
-
-@pytest.mark.asyncio
-async def test_run_async_with_error_raise_on_failure():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    mock_store._query_sql_async.side_effect = Exception("SQL error")
-    retriever = OpenSearchSQLRetriever(document_store=mock_store, raise_on_failure=True)
-    with pytest.raises(Exception, match="SQL error"):
-        await retriever.run_async(query="SELECT * FROM my_index")
-
-
-@pytest.mark.asyncio
-async def test_run_async_with_error_no_raise():
-    mock_store = Mock(spec=OpenSearchDocumentStore)
-    mock_store._query_sql_async.side_effect = Exception("SQL error")
-    retriever = OpenSearchSQLRetriever(document_store=mock_store, raise_on_failure=False)
-    res = await retriever.run_async(query="SELECT * FROM my_index")
-    assert res["result"] is None
-
-
-@pytest.mark.integration
-@pytest.mark.asyncio
-async def test_sql_retriever_async_basic_query(document_store: OpenSearchDocumentStore):
-    """Test basic async SQL query execution"""
-    docs = [
-        Document(content="Python programming", meta={"category": "A", "status": "active", "priority": 1}),
-        Document(content="Java programming", meta={"category": "B", "status": "active", "priority": 2}),
-        Document(content="Python scripting", meta={"category": "A", "status": "inactive", "priority": 3}),
-    ]
-    await document_store.write_documents_async(docs, refresh=True)
-
-    retriever = OpenSearchSQLRetriever(document_store=document_store)
-    sql_query = (
-        f"SELECT content, category, status FROM {document_store._index} "  # noqa: S608
-        f"WHERE category = 'A' ORDER BY priority"
-    )
-    result = await retriever.run_async(query=sql_query)
-
-    assert "result" in result
-    assert len(result["result"]) == 2
-    assert isinstance(result["result"], list)
-    assert all(isinstance(row, dict) for row in result["result"])
-
-    categories = [row.get("category") for row in result["result"]]
-    assert all(cat == "A" for cat in categories)
-
-
-@pytest.mark.integration
-@pytest.mark.asyncio
-async def test_sql_retriever_async_csv_format(document_store: OpenSearchDocumentStore):
-    """Test async SQL query with CSV response format"""
-    docs = [
-        Document(content="Python programming", meta={"category": "A"}),
-        Document(content="Java programming", meta={"category": "B"}),
-    ]
-    await document_store.write_documents_async(docs, refresh=True)
-
-    retriever = OpenSearchSQLRetriever(document_store=document_store, response_format="csv")
-    sql_query = f"SELECT content, category FROM {document_store._index}"  # noqa: S608
-    result = await retriever.run_async(query=sql_query)
-
-    assert "result" in result
-    assert isinstance(result["result"], str)
-    assert "content" in result["result"]
-    assert "category" in result["result"]
-
-
-@pytest.mark.integration
-@pytest.mark.asyncio
-async def test_sql_retriever_async_runtime_document_store_switching(
-    document_store: OpenSearchDocumentStore, document_store_2: OpenSearchDocumentStore
-):
-    """Test async switching document stores at runtime"""
-    docs1 = [
-        Document(content="Python programming", meta={"category": "A"}),
-        Document(content="Java programming", meta={"category": "B"}),
-    ]
-    await document_store.write_documents_async(docs1, refresh=True)
-
-    docs2 = [
-        Document(content="JavaScript development", meta={"category": "C"}),
-        Document(content="TypeScript development", meta={"category": "D"}),
-    ]
-    await document_store_2.write_documents_async(docs2, refresh=True)
-
-    retriever = OpenSearchSQLRetriever(document_store=document_store)
-
-    # Query first store
-    sql_query1 = f"SELECT content, category FROM {document_store._index} WHERE category = 'A'"  # noqa: S608
-    result1 = await retriever.run_async(query=sql_query1)
-    assert len(result1["result"]) == 1
-    assert "Python" in result1["result"][0]["content"]
-
-    # Query second store at runtime
-    sql_query2 = f"SELECT content, category FROM {document_store_2._index} WHERE category = 'C'"  # noqa: S608
-    result2 = await retriever.run_async(query=sql_query2, document_store=document_store_2)
-    assert len(result2["result"]) == 1
-    assert "JavaScript" in result2["result"][0]["content"]
-
-    # Verify results are different
-    assert result1["result"][0]["content"] != result2["result"][0]["content"]
-
-
-@pytest.mark.integration
-@pytest.mark.asyncio
-async def test_sql_retriever_async_error_handling(document_store: OpenSearchDocumentStore):
-    """Test async error handling for invalid SQL queries"""
-    retriever = OpenSearchSQLRetriever(document_store=document_store, raise_on_failure=True)
-
-    invalid_query = "SELECT * FROM non_existent_index"
-    with pytest.raises(Exception, match="Failed to execute SQL query"):
-        await retriever.run_async(query=invalid_query)
-
-    # Test with raise_on_failure=False
-    retriever_no_raise = OpenSearchSQLRetriever(document_store=document_store, raise_on_failure=False)
-    result = await retriever_no_raise.run_async(query=invalid_query)
-    assert result["result"] is None

From a37108c0c1f1f3d3349ebb7f08eb3b57c65e9a51 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Wed, 14 Jan 2026 10:36:46 +0100
Subject: [PATCH 50/58] fixing after sync with main

---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 9f58bf18a3..e7d79d5ceb 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1189,7 +1189,7 @@ def _build_cardinality_aggregations(index_mapping: dict[str, Any]) -> dict[str,
         return aggs
 
     @staticmethod
-    def _build_distinct_values_query_body(filters: Optional[dict[str, Any]], aggs: dict[str, Any]) -> dict[str, Any]:
+    def _build_distinct_values_query_body(filters: dict[str, Any] | None, aggs: dict[str, Any]) -> dict[str, Any]:
         """
         Builds the query body for distinct values counting with filters and aggregations.
         """

From ad13c32e5e5e6858d2a512940b5a799c65ab8ec5 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Wed, 14 Jan 2026 10:46:39 +0100
Subject: [PATCH 51/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index e7d79d5ceb..61efc7029b 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1309,7 +1309,7 @@ def get_metadata_fields_info(self) -> dict[str, dict]:
         index_mapping = {k: v for k, v in index_mapping.items() if k not in SPECIAL_FIELDS}
         return index_mapping
 
-    async def get_metadata_fields_info_async(self) -> dict[str, dict]:
+    async def get_metadata_fields_info_async(self) -> dict[str, dict[str, str]]:
         """
         Asynchronously returns the information about the fields in the index.
 

From e9ccd9e5fd7527f54f4255b68c8b0290997af682 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Wed, 14 Jan 2026 10:46:50 +0100
Subject: [PATCH 52/58] Update
 integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 61efc7029b..b84e53ac69 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1280,7 +1280,7 @@ async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any]) -
         # extract cardinality values from aggregations
         return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
 
-    def get_metadata_fields_info(self) -> dict[str, dict]:
+    def get_metadata_fields_info(self) -> dict[str, dict[str, str]]:
         """
         Returns the information about the fields in the index.
 

From e5b90f2e481a07936d86b0aeb96f5956c7bdcd3b Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Wed, 14 Jan 2026 10:43:32 +0100
Subject: [PATCH 53/58] removing SQLResponse/Retriever related code

---
 .../opensearch/document_store.py              | 35 +------------------
 1 file changed, 1 insertion(+), 34 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index b84e53ac69..6370f8a3ab 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1527,37 +1527,4 @@ async def get_metadata_field_unique_values_async(
         # Extract total count from cardinality aggregation
         total_count = int(aggregations.get("total_count", {}).get("value", 0))
 
-        return unique_values, total_count
-
-    def _prepare_sql_http_request_params(
-        self, base_url: str, response_format: ResponseFormat
-    ) -> tuple[str, dict[str, str], Any]:
-        """
-        Prepares HTTP request parameters for SQL query execution.
-        """
-        url = f"{base_url}/_plugins/_sql?format={response_format}"
-        headers = {"Content-Type": "application/json"}
-        auth = None
-        if self._http_auth:
-            if isinstance(self._http_auth, tuple):
-                auth = self._http_auth
-            elif isinstance(self._http_auth, AWSAuth):
-                # For AWS auth, we need to use the opensearchpy client
-                # Fall through to the try/except below
-                pass
-        return url, headers, auth
-
-    @staticmethod
-    def _process_sql_response(response_data: Any, response_format: ResponseFormat) -> Any:
-        """
-        Processes the SQL query response data.
-        """
-        if response_format == "json":
-            # extract only the query results
-            if isinstance(response_data, dict) and "hits" in response_data:
-                hits = response_data.get("hits", {}).get("hits", [])
-                # extract _source from each hit, which contains the actual document data
-                return [hit.get("_source", {}) for hit in hits]
-            return response_data
-        else:
-            return response_data if isinstance(response_data, str) else str(response_data)
+        return unique_values, total_count
\ No newline at end of file

From 87eaeebf8ced67478d4061ace9a1659bf45ea791 Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Wed, 14 Jan 2026 10:47:29 +0100
Subject: [PATCH 54/58] new line at end of file

---
 .../document_stores/opensearch/document_store.py                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 6370f8a3ab..de6370281c 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1527,4 +1527,4 @@ async def get_metadata_field_unique_values_async(
         # Extract total count from cardinality aggregation
         total_count = int(aggregations.get("total_count", {}).get("value", 0))
 
-        return unique_values, total_count
\ No newline at end of file
+        return unique_values, total_count

From 82f0fc24e1b6610b05c575a701403250073f3b4e Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Thu, 15 Jan 2026 13:12:22 +0100
Subject: [PATCH 55/58] updating return value on get_metadata_field_unique +
 count_unique_metadata_by_filter

---
 .../opensearch/document_store.py              | 86 +++++++++++++------
 .../opensearch/tests/test_document_store.py   | 65 ++++++++------
 .../tests/test_document_store_async.py        | 69 ++++++++-------
 3 files changed, 132 insertions(+), 88 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index de6370281c..ee1e3d8cc4 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1176,15 +1176,19 @@ async def count_documents_by_filter_async(self, filters: dict[str, Any]) -> int:
         return (await self._async_client.count(index=self._index, body=body))["count"]
 
     @staticmethod
-    def _build_cardinality_aggregations(index_mapping: dict[str, Any]) -> dict[str, Any]:
+    def _build_cardinality_aggregations(index_mapping: dict[str, Any], fields: list[str]) -> dict[str, Any]:
         """
-        Builds cardinality aggregations for all metadata fields in the index mapping.
+        Builds cardinality aggregations for specified metadata fields in the index mapping.
+
+        :param index_mapping: The index mapping containing field definitions.
+        :param fields: List of field names to build aggregations for.
+        :returns: Dictionary of cardinality aggregations.
 
         See: https://docs.opensearch.org/latest/aggregations/metric/cardinality/
         """
         aggs = {}
-        for field_name in index_mapping.keys():
-            if field_name not in SPECIAL_FIELDS:
+        for field_name in fields:
+            if field_name not in SPECIAL_FIELDS and field_name in index_mapping:
                 aggs[f"{field_name}_cardinality"] = {"cardinality": {"field": field_name}}
         return aggs
 
@@ -1210,27 +1214,36 @@ def _build_distinct_values_query_body(filters: dict[str, Any] | None, aggs: dict
 
     @staticmethod
     def _extract_distinct_counts_from_aggregations(
-        aggregations: dict[str, Any], index_mapping: dict[str, Any]
+        aggregations: dict[str, Any], index_mapping: dict[str, Any], fields: list[str]
     ) -> dict[str, int]:
         """
         Extracts distinct value counts from search result aggregations.
+
+        :param aggregations: The aggregations result from the search query.
+        :param index_mapping: The index mapping containing field definitions.
+        :param fields: List of field names to extract counts for.
+        :returns: Dictionary mapping field names to their distinct value counts.
         """
         distinct_counts = {}
-        for field_name in index_mapping.keys():
-            if field_name not in SPECIAL_FIELDS:
+        for field_name in fields:
+            if field_name not in SPECIAL_FIELDS and field_name in index_mapping:
                 agg_key = f"{field_name}_cardinality"
                 if agg_key in aggregations:
                     distinct_counts[field_name] = aggregations[agg_key]["value"]
         return distinct_counts
 
-    def count_unique_metadata_by_filter(self, filters: dict[str, Any]) -> dict[str, int]:
+    def count_unique_metadata_by_filter(self, filters: dict[str, Any], fields: list[str]) -> dict[str, int]:
         """
-        Returns the number of unique values for each metadata field of the documents that match the provided filters.
+        Returns the number of unique values for each specified metadata field of the documents
+        that match the provided filters.
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param fields: List of field names to calculate unique values for.
+            Field names can include or omit the "meta." prefix.
         :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
                   documents.
+        :raises ValueError: If any of the requested fields don't exist in the index mapping.
         """
         self._ensure_initialized()
         assert self._client is not None
@@ -1239,8 +1252,16 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any]) -> dict[str,
         mapping = self._client.indices.get_mapping(index=self._index)
         index_mapping = mapping[self._index]["mappings"]["properties"]
 
-        # build aggregations for each metadata field
-        aggs = self._build_cardinality_aggregations(index_mapping)
+        # normalize field names
+        normalized_fields = [self._normalize_metadata_field_name(field) for field in fields]
+        # validate that all requested fields exist in the index mapping
+        missing_fields = [f for f in normalized_fields if f not in index_mapping]
+        if missing_fields:
+            msg = f"Fields not found in index mapping: {missing_fields}"
+            raise ValueError(msg)
+
+        # build aggregations for specified metadata fields
+        aggs = self._build_cardinality_aggregations(index_mapping, normalized_fields)
         if not aggs:
             return {}
 
@@ -1249,17 +1270,22 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any]) -> dict[str,
         result = self._client.search(index=self._index, body=body)
 
         # extract cardinality values from aggregations
-        return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
+        return self._extract_distinct_counts_from_aggregations(
+            result.get("aggregations", {}), index_mapping, normalized_fields
+        )
 
-    async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any]) -> dict[str, int]:
+    async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any], fields: list[str]) -> dict[str, int]:
         """
-        Asynchronously returns the number of unique values for each metadata field of the documents that match the
-        provided filters.
+        Asynchronously returns the number of unique values for each specified metadata field of the documents
+        that match the provided filters.
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
+        :param fields: List of field names to calculate unique values for.
+            Field names can include or omit the "meta." prefix.
         :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
                   documents.
+        :raises ValueError: If any of the requested fields don't exist in the index mapping.
         """
         await self._ensure_initialized_async()
         assert self._async_client is not None
@@ -1268,8 +1294,16 @@ async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any]) -
         mapping = await self._async_client.indices.get_mapping(index=self._index)
         index_mapping = mapping[self._index]["mappings"]["properties"]
 
-        # build aggregations for each metadata field
-        aggs = self._build_cardinality_aggregations(index_mapping)
+        # normalize field names
+        normalized_fields = [self._normalize_metadata_field_name(field) for field in fields]
+        # validate that all requested fields exist in the index mapping
+        missing_fields = [f for f in normalized_fields if f not in index_mapping]
+        if missing_fields:
+            msg = f"Fields not found in index mapping: {missing_fields}"
+            raise ValueError(msg)
+
+        # build aggregations for specified metadata fields
+        aggs = self._build_cardinality_aggregations(index_mapping, normalized_fields)
         if not aggs:
             return {}
 
@@ -1278,7 +1312,9 @@ async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any]) -
         result = await self._async_client.search(index=self._index, body=body)
 
         # extract cardinality values from aggregations
-        return self._extract_distinct_counts_from_aggregations(result.get("aggregations", {}), index_mapping)
+        return self._extract_distinct_counts_from_aggregations(
+            result.get("aggregations", {}), index_mapping, normalized_fields
+        )
 
     def get_metadata_fields_info(self) -> dict[str, dict[str, str]]:
         """
@@ -1409,7 +1445,7 @@ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[st
 
     def get_metadata_field_unique_values(
         self, metadata_field: str, search_term: str | None, from_: int, size: int
-    ) -> tuple[list[str], int]:
+    ) -> list[str]:
         """
         Returns unique values for a metadata field, optionally filtered by a search term in the content.
 
@@ -1463,14 +1499,11 @@ def get_metadata_field_unique_values(
         paginated_buckets = unique_values_buckets[from_ : from_ + size]
         unique_values = [str(bucket["key"]) for bucket in paginated_buckets]
 
-        # Extract total count from cardinality aggregation
-        total_count = int(aggregations.get("total_count", {}).get("value", 0))
-
-        return unique_values, total_count
+        return unique_values
 
     async def get_metadata_field_unique_values_async(
         self, metadata_field: str, search_term: str | None, from_: int, size: int
-    ) -> tuple[list[str], int]:
+    ) -> list[str]:
         """
         Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content.
 
@@ -1524,7 +1557,4 @@ async def get_metadata_field_unique_values_async(
         paginated_buckets = unique_values_buckets[from_ : from_ + size]
         unique_values = [str(bucket["key"]) for bucket in paginated_buckets]
 
-        # Extract total count from cardinality aggregation
-        total_count = int(aggregations.get("total_count", {}).get("value", 0))
-
-        return unique_values, total_count
+        return unique_values
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index e02d7a1aaf..85dedf98fa 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -631,14 +631,17 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
         assert document_store.count_documents() == 5
 
         # Count distinct values for all documents
-        distinct_counts = document_store.count_unique_metadata_by_filter(filters={})
+        distinct_counts = document_store.count_unique_metadata_by_filter(
+            filters={}, fields=["category", "status", "priority"]
+        )
         assert distinct_counts["category"] == 3  # A, B, C
         assert distinct_counts["status"] == 2  # active, inactive
         assert distinct_counts["priority"] == 3  # 1, 2, 3
 
         # Count distinct values for documents with category="A"
         distinct_counts_a = document_store.count_unique_metadata_by_filter(
-            filters={"field": "meta.category", "operator": "==", "value": "A"}
+            filters={"field": "meta.category", "operator": "==", "value": "A"},
+            fields=["category", "status", "priority"],
         )
         assert distinct_counts_a["category"] == 1  # Only A
         assert distinct_counts_a["status"] == 2  # active, inactive
@@ -646,7 +649,8 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
 
         # Count distinct values for documents with status="active"
         distinct_counts_active = document_store.count_unique_metadata_by_filter(
-            filters={"field": "meta.status", "operator": "==", "value": "active"}
+            filters={"field": "meta.status", "operator": "==", "value": "active"},
+            fields=["category", "status", "priority"],
         )
         assert distinct_counts_active["category"] == 3  # A, B, C
         assert distinct_counts_active["status"] == 1  # Only active
@@ -660,12 +664,33 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
                     {"field": "meta.category", "operator": "==", "value": "A"},
                     {"field": "meta.status", "operator": "==", "value": "active"},
                 ],
-            }
+            },
+            fields=["category", "status", "priority"],
         )
         assert distinct_counts_a_active["category"] == 1  # Only A
         assert distinct_counts_a_active["status"] == 1  # Only active
         assert distinct_counts_a_active["priority"] == 2  # 1, 3
 
+        # Test with only a subset of fields
+        distinct_counts_subset = document_store.count_unique_metadata_by_filter(
+            filters={}, fields=["category", "status"]
+        )
+        assert distinct_counts_subset["category"] == 3
+        assert distinct_counts_subset["status"] == 2
+        assert "priority" not in distinct_counts_subset
+
+        # Test field name normalization (with "meta." prefix)
+        distinct_counts_normalized = document_store.count_unique_metadata_by_filter(
+            filters={}, fields=["meta.category", "status", "meta.priority"]
+        )
+        assert distinct_counts_normalized["category"] == 3
+        assert distinct_counts_normalized["status"] == 2
+        assert distinct_counts_normalized["priority"] == 3
+
+        # Test error handling when field doesn't exist
+        with pytest.raises(ValueError, match="Fields not found in index mapping"):
+            document_store.count_unique_metadata_by_filter(filters={}, fields=["nonexistent_field"])
+
     def test_get_metadata_fields_info(self, document_store: OpenSearchDocumentStore):
         docs = [
             Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
@@ -733,40 +758,30 @@ def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocume
         document_store.write_documents(docs)
 
         # Test getting all unique values without search term
-        unique_values, total_count = document_store.get_metadata_field_unique_values("meta.category", None, 0, 10)
+        unique_values = document_store.get_metadata_field_unique_values("meta.category", None, 0, 10)
         assert set(unique_values) == {"A", "B", "C"}
-        assert total_count == 3
 
         # Test with "meta." prefix
-        unique_languages, lang_count = document_store.get_metadata_field_unique_values("meta.language", None, 0, 10)
+        unique_languages = document_store.get_metadata_field_unique_values("meta.language", None, 0, 10)
         assert set(unique_languages) == {"Python", "Java", "JavaScript"}
-        assert lang_count == 3
 
         # Test pagination - first page
-        unique_values_page1, total_count = document_store.get_metadata_field_unique_values("meta.category", None, 0, 2)
+        unique_values_page1 = document_store.get_metadata_field_unique_values("meta.category", None, 0, 2)
         assert len(unique_values_page1) == 2
-        assert total_count == 3
         assert all(val in ["A", "B", "C"] for val in unique_values_page1)
 
         # Test pagination - second page
-        unique_values_page2, total_count = document_store.get_metadata_field_unique_values("meta.category", None, 2, 2)
+        unique_values_page2 = document_store.get_metadata_field_unique_values("meta.category", None, 2, 2)
         assert len(unique_values_page2) == 1
-        assert total_count == 3
         assert unique_values_page2[0] in ["A", "B", "C"]
 
         # Test with search term - filter by content matching "Python"
-        unique_values_filtered, total_count = document_store.get_metadata_field_unique_values(
-            "meta.category", "Python", 0, 10
-        )
+        unique_values_filtered = document_store.get_metadata_field_unique_values("meta.category", "Python", 0, 10)
         assert set(unique_values_filtered) == {"A"}  # Only category A has documents with "Python" in content
-        assert total_count == 1
 
         # Test with search term - filter by content matching "Java"
-        unique_values_java, total_count = document_store.get_metadata_field_unique_values(
-            "meta.category", "Java", 0, 10
-        )
+        unique_values_java = document_store.get_metadata_field_unique_values("meta.category", "Java", 0, 10)
         assert set(unique_values_java) == {"B"}  # Only category B has documents with "Java" in content
-        assert total_count == 1
 
         # Test with integer values
         int_docs = [
@@ -776,18 +791,12 @@ def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocume
             Document(content="Doc 4", meta={"priority": 3}),
         ]
         document_store.write_documents(int_docs)
-        unique_priorities, priority_count = document_store.get_metadata_field_unique_values(
-            "meta.priority", None, 0, 10
-        )
+        unique_priorities = document_store.get_metadata_field_unique_values("meta.priority", None, 0, 10)
         assert set(unique_priorities) == {"1", "2", "3"}
-        assert priority_count == 3
 
         # Test with search term on integer field
-        unique_priorities_filtered, priority_count = document_store.get_metadata_field_unique_values(
-            "meta.priority", "Doc 1", 0, 10
-        )
+        unique_priorities_filtered = document_store.get_metadata_field_unique_values("meta.priority", "Doc 1", 0, 10)
         assert set(unique_priorities_filtered) == {"1"}
-        assert priority_count == 1
 
     @pytest.mark.integration
     def test_write_with_routing(self, document_store: OpenSearchDocumentStore):
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index a1b5271400..6d438c86ba 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -281,14 +281,17 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
         assert await document_store.count_documents_async() == 5
 
         # count distinct values for all documents
-        distinct_counts = await document_store.count_unique_metadata_by_filter_async(filters={})
+        distinct_counts = await document_store.count_unique_metadata_by_filter_async(
+            filters={}, fields=["category", "status", "priority"]
+        )
         assert distinct_counts["category"] == 3  # A, B, C
         assert distinct_counts["status"] == 2  # active, inactive
         assert distinct_counts["priority"] == 3  # 1, 2, 3
 
         # count distinct values for documents with category="A"
         distinct_counts_a = await document_store.count_unique_metadata_by_filter_async(
-            filters={"field": "meta.category", "operator": "==", "value": "A"}
+            filters={"field": "meta.category", "operator": "==", "value": "A"},
+            fields=["category", "status", "priority"],
         )
         assert distinct_counts_a["category"] == 1  # Only A
         assert distinct_counts_a["status"] == 2  # active, inactive
@@ -296,7 +299,8 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
 
         # count distinct values for documents with status="active"
         distinct_counts_active = await document_store.count_unique_metadata_by_filter_async(
-            filters={"field": "meta.status", "operator": "==", "value": "active"}
+            filters={"field": "meta.status", "operator": "==", "value": "active"},
+            fields=["category", "status", "priority"],
         )
         assert distinct_counts_active["category"] == 3  # A, B, C
         assert distinct_counts_active["status"] == 1  # Only active
@@ -310,12 +314,33 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
                     {"field": "meta.category", "operator": "==", "value": "A"},
                     {"field": "meta.status", "operator": "==", "value": "active"},
                 ],
-            }
+            },
+            fields=["category", "status", "priority"],
         )
         assert distinct_counts_a_active["category"] == 1  # Only A
         assert distinct_counts_a_active["status"] == 1  # Only active
         assert distinct_counts_a_active["priority"] == 2  # 1, 3
 
+        # Test with only a subset of fields
+        distinct_counts_subset = await document_store.count_unique_metadata_by_filter_async(
+            filters={}, fields=["category", "status"]
+        )
+        assert distinct_counts_subset["category"] == 3
+        assert distinct_counts_subset["status"] == 2
+        assert "priority" not in distinct_counts_subset
+
+        # Test field name normalization (with "meta." prefix)
+        distinct_counts_normalized = await document_store.count_unique_metadata_by_filter_async(
+            filters={}, fields=["meta.category", "status", "meta.priority"]
+        )
+        assert distinct_counts_normalized["category"] == 3
+        assert distinct_counts_normalized["status"] == 2
+        assert distinct_counts_normalized["priority"] == 3
+
+        # Test error handling when field doesn't exist
+        with pytest.raises(ValueError, match="Fields not found in index mapping"):
+            await document_store.count_unique_metadata_by_filter_async(filters={}, fields=["nonexistent_field"])
+
     @pytest.mark.asyncio
     async def test_delete_documents(self, document_store: OpenSearchDocumentStore):
         doc = Document(content="test doc")
@@ -542,48 +567,32 @@ async def test_get_metadata_field_unique_values_async(self, document_store: Open
         await document_store.write_documents_async(docs)
 
         # Test getting all unique values without search term
-        unique_values, total_count = await document_store.get_metadata_field_unique_values_async(
-            "meta.category", None, 0, 10
-        )
+        unique_values = await document_store.get_metadata_field_unique_values_async("meta.category", None, 0, 10)
         assert set(unique_values) == {"A", "B", "C"}
-        assert total_count == 3
 
         # Test with "meta." prefix
-        unique_languages, lang_count = await document_store.get_metadata_field_unique_values_async(
-            "meta.language", None, 0, 10
-        )
+        unique_languages = await document_store.get_metadata_field_unique_values_async("meta.language", None, 0, 10)
         assert set(unique_languages) == {"Python", "Java", "JavaScript"}
-        assert lang_count == 3
 
         # Test pagination - first page
-        unique_values_page1, total_count = await document_store.get_metadata_field_unique_values_async(
-            "meta.category", None, 0, 2
-        )
+        unique_values_page1 = await document_store.get_metadata_field_unique_values_async("meta.category", None, 0, 2)
         assert len(unique_values_page1) == 2
-        assert total_count == 3
         assert all(val in ["A", "B", "C"] for val in unique_values_page1)
 
         # Test pagination - second page
-        unique_values_page2, total_count = await document_store.get_metadata_field_unique_values_async(
-            "meta.category", None, 2, 2
-        )
+        unique_values_page2 = await document_store.get_metadata_field_unique_values_async("meta.category", None, 2, 2)
         assert len(unique_values_page2) == 1
-        assert total_count == 3
         assert unique_values_page2[0] in ["A", "B", "C"]
 
         # Test with search term - filter by content matching "Python"
-        unique_values_filtered, total_count = await document_store.get_metadata_field_unique_values_async(
+        unique_values_filtered = await document_store.get_metadata_field_unique_values_async(
             "meta.category", "Python", 0, 10
         )
         assert set(unique_values_filtered) == {"A"}  # Only category A has documents with "Python" in content
-        assert total_count == 1
 
         # Test with search term - filter by content matching "Java"
-        unique_values_java, total_count = await document_store.get_metadata_field_unique_values_async(
-            "meta.category", "Java", 0, 10
-        )
+        unique_values_java = await document_store.get_metadata_field_unique_values_async("meta.category", "Java", 0, 10)
         assert set(unique_values_java) == {"B"}  # Only category B has documents with "Java" in content
-        assert total_count == 1
 
         # Test with integer values
         int_docs = [
@@ -593,15 +602,11 @@ async def test_get_metadata_field_unique_values_async(self, document_store: Open
             Document(content="Doc 4", meta={"priority": 3}),
         ]
         await document_store.write_documents_async(int_docs)
-        unique_priorities, priority_count = await document_store.get_metadata_field_unique_values_async(
-            "meta.priority", None, 0, 10
-        )
+        unique_priorities = await document_store.get_metadata_field_unique_values_async("meta.priority", None, 0, 10)
         assert set(unique_priorities) == {"1", "2", "3"}
-        assert priority_count == 3
 
         # Test with search term on integer field
-        unique_priorities_filtered, priority_count = await document_store.get_metadata_field_unique_values_async(
+        unique_priorities_filtered = await document_store.get_metadata_field_unique_values_async(
             "meta.priority", "Doc 1", 0, 10
         )
         assert set(unique_priorities_filtered) == {"1"}
-        assert priority_count == 1

From 1f1dc00791bea568e0291a720aaf31db43a2ba0b Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Thu, 15 Jan 2026 14:37:45 +0100
Subject: [PATCH 56/58] updating params name

---
 .../opensearch/document_store.py              | 26 ++++++++++---------
 .../opensearch/tests/test_document_store.py   | 14 +++++-----
 .../tests/test_document_store_async.py        | 16 +++++++-----
 3 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index ee1e3d8cc4..71a19ffe3a 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1232,14 +1232,14 @@ def _extract_distinct_counts_from_aggregations(
                     distinct_counts[field_name] = aggregations[agg_key]["value"]
         return distinct_counts
 
-    def count_unique_metadata_by_filter(self, filters: dict[str, Any], fields: list[str]) -> dict[str, int]:
+    def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
         """
         Returns the number of unique values for each specified metadata field of the documents
         that match the provided filters.
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
-        :param fields: List of field names to calculate unique values for.
+        :param metadata_fields: List of field names to calculate unique values for.
             Field names can include or omit the "meta." prefix.
         :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
                   documents.
@@ -1253,15 +1253,15 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any], fields: list[
         index_mapping = mapping[self._index]["mappings"]["properties"]
 
         # normalize field names
-        normalized_fields = [self._normalize_metadata_field_name(field) for field in fields]
+        normalized_metadata_fields = [self._normalize_metadata_field_name(field) for field in metadata_fields]
         # validate that all requested fields exist in the index mapping
-        missing_fields = [f for f in normalized_fields if f not in index_mapping]
+        missing_fields = [f for f in normalized_metadata_fields if f not in index_mapping]
         if missing_fields:
             msg = f"Fields not found in index mapping: {missing_fields}"
             raise ValueError(msg)
 
         # build aggregations for specified metadata fields
-        aggs = self._build_cardinality_aggregations(index_mapping, normalized_fields)
+        aggs = self._build_cardinality_aggregations(index_mapping, normalized_metadata_fields)
         if not aggs:
             return {}
 
@@ -1271,17 +1271,19 @@ def count_unique_metadata_by_filter(self, filters: dict[str, Any], fields: list[
 
         # extract cardinality values from aggregations
         return self._extract_distinct_counts_from_aggregations(
-            result.get("aggregations", {}), index_mapping, normalized_fields
+            result.get("aggregations", {}), index_mapping, normalized_metadata_fields
         )
 
-    async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any], fields: list[str]) -> dict[str, int]:
+    async def count_unique_metadata_by_filter_async(
+        self, filters: dict[str, Any], metadata_fields: list[str]
+    ) -> dict[str, int]:
         """
         Asynchronously returns the number of unique values for each specified metadata field of the documents
         that match the provided filters.
 
         :param filters: The filters to apply to count documents.
             For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
-        :param fields: List of field names to calculate unique values for.
+        :param metadata_fields: List of field names to calculate unique values for.
             Field names can include or omit the "meta." prefix.
         :returns: A dictionary mapping each metadata field name to the count of its unique values among the filtered
                   documents.
@@ -1295,15 +1297,15 @@ async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any], f
         index_mapping = mapping[self._index]["mappings"]["properties"]
 
         # normalize field names
-        normalized_fields = [self._normalize_metadata_field_name(field) for field in fields]
+        normalized_metadata_fields = [self._normalize_metadata_field_name(field) for field in metadata_fields]
         # validate that all requested fields exist in the index mapping
-        missing_fields = [f for f in normalized_fields if f not in index_mapping]
+        missing_fields = [f for f in normalized_metadata_fields if f not in index_mapping]
         if missing_fields:
             msg = f"Fields not found in index mapping: {missing_fields}"
             raise ValueError(msg)
 
         # build aggregations for specified metadata fields
-        aggs = self._build_cardinality_aggregations(index_mapping, normalized_fields)
+        aggs = self._build_cardinality_aggregations(index_mapping, normalized_metadata_fields)
         if not aggs:
             return {}
 
@@ -1313,7 +1315,7 @@ async def count_unique_metadata_by_filter_async(self, filters: dict[str, Any], f
 
         # extract cardinality values from aggregations
         return self._extract_distinct_counts_from_aggregations(
-            result.get("aggregations", {}), index_mapping, normalized_fields
+            result.get("aggregations", {}), index_mapping, normalized_metadata_fields
         )
 
     def get_metadata_fields_info(self) -> dict[str, dict[str, str]]:
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index 85dedf98fa..bad19c4dbe 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -632,7 +632,7 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
 
         # Count distinct values for all documents
         distinct_counts = document_store.count_unique_metadata_by_filter(
-            filters={}, fields=["category", "status", "priority"]
+            filters={}, metadata_fields=["category", "status", "priority"]
         )
         assert distinct_counts["category"] == 3  # A, B, C
         assert distinct_counts["status"] == 2  # active, inactive
@@ -641,7 +641,7 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
         # Count distinct values for documents with category="A"
         distinct_counts_a = document_store.count_unique_metadata_by_filter(
             filters={"field": "meta.category", "operator": "==", "value": "A"},
-            fields=["category", "status", "priority"],
+            metadata_fields=["category", "status", "priority"],
         )
         assert distinct_counts_a["category"] == 1  # Only A
         assert distinct_counts_a["status"] == 2  # active, inactive
@@ -650,7 +650,7 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
         # Count distinct values for documents with status="active"
         distinct_counts_active = document_store.count_unique_metadata_by_filter(
             filters={"field": "meta.status", "operator": "==", "value": "active"},
-            fields=["category", "status", "priority"],
+            metadata_fields=["category", "status", "priority"],
         )
         assert distinct_counts_active["category"] == 3  # A, B, C
         assert distinct_counts_active["status"] == 1  # Only active
@@ -665,7 +665,7 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
                     {"field": "meta.status", "operator": "==", "value": "active"},
                 ],
             },
-            fields=["category", "status", "priority"],
+            metadata_fields=["category", "status", "priority"],
         )
         assert distinct_counts_a_active["category"] == 1  # Only A
         assert distinct_counts_a_active["status"] == 1  # Only active
@@ -673,7 +673,7 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
 
         # Test with only a subset of fields
         distinct_counts_subset = document_store.count_unique_metadata_by_filter(
-            filters={}, fields=["category", "status"]
+            filters={}, metadata_fields=["category", "status"]
         )
         assert distinct_counts_subset["category"] == 3
         assert distinct_counts_subset["status"] == 2
@@ -681,7 +681,7 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
 
         # Test field name normalization (with "meta." prefix)
         distinct_counts_normalized = document_store.count_unique_metadata_by_filter(
-            filters={}, fields=["meta.category", "status", "meta.priority"]
+            filters={}, metadata_fields=["meta.category", "status", "meta.priority"]
         )
         assert distinct_counts_normalized["category"] == 3
         assert distinct_counts_normalized["status"] == 2
@@ -689,7 +689,7 @@ def test_count_unique_metadata_by_filter(self, document_store: OpenSearchDocumen
 
         # Test error handling when field doesn't exist
         with pytest.raises(ValueError, match="Fields not found in index mapping"):
-            document_store.count_unique_metadata_by_filter(filters={}, fields=["nonexistent_field"])
+            document_store.count_unique_metadata_by_filter(filters={}, metadata_fields=["nonexistent_field"])
 
     def test_get_metadata_fields_info(self, document_store: OpenSearchDocumentStore):
         docs = [
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index 6d438c86ba..b33ee2677e 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -282,7 +282,7 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
 
         # count distinct values for all documents
         distinct_counts = await document_store.count_unique_metadata_by_filter_async(
-            filters={}, fields=["category", "status", "priority"]
+            filters={}, metadata_fields=["category", "status", "priority"]
         )
         assert distinct_counts["category"] == 3  # A, B, C
         assert distinct_counts["status"] == 2  # active, inactive
@@ -291,7 +291,7 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
         # count distinct values for documents with category="A"
         distinct_counts_a = await document_store.count_unique_metadata_by_filter_async(
             filters={"field": "meta.category", "operator": "==", "value": "A"},
-            fields=["category", "status", "priority"],
+            metadata_fields=["category", "status", "priority"],
         )
         assert distinct_counts_a["category"] == 1  # Only A
         assert distinct_counts_a["status"] == 2  # active, inactive
@@ -300,7 +300,7 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
         # count distinct values for documents with status="active"
         distinct_counts_active = await document_store.count_unique_metadata_by_filter_async(
             filters={"field": "meta.status", "operator": "==", "value": "active"},
-            fields=["category", "status", "priority"],
+            metadata_fields=["category", "status", "priority"],
         )
         assert distinct_counts_active["category"] == 3  # A, B, C
         assert distinct_counts_active["status"] == 1  # Only active
@@ -315,7 +315,7 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
                     {"field": "meta.status", "operator": "==", "value": "active"},
                 ],
             },
-            fields=["category", "status", "priority"],
+            metadata_fields=["category", "status", "priority"],
         )
         assert distinct_counts_a_active["category"] == 1  # Only A
         assert distinct_counts_a_active["status"] == 1  # Only active
@@ -323,7 +323,7 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
 
         # Test with only a subset of fields
         distinct_counts_subset = await document_store.count_unique_metadata_by_filter_async(
-            filters={}, fields=["category", "status"]
+            filters={}, metadata_fields=["category", "status"]
         )
         assert distinct_counts_subset["category"] == 3
         assert distinct_counts_subset["status"] == 2
@@ -331,7 +331,7 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
 
         # Test field name normalization (with "meta." prefix)
         distinct_counts_normalized = await document_store.count_unique_metadata_by_filter_async(
-            filters={}, fields=["meta.category", "status", "meta.priority"]
+            filters={}, metadata_fields=["meta.category", "status", "meta.priority"]
         )
         assert distinct_counts_normalized["category"] == 3
         assert distinct_counts_normalized["status"] == 2
@@ -339,7 +339,9 @@ async def test_count_unique_metadata_by_filter(self, document_store: OpenSearchD
 
         # Test error handling when field doesn't exist
         with pytest.raises(ValueError, match="Fields not found in index mapping"):
-            await document_store.count_unique_metadata_by_filter_async(filters={}, fields=["nonexistent_field"])
+            await document_store.count_unique_metadata_by_filter_async(
+                filters={}, metadata_fields=["nonexistent_field"]
+            )
 
     @pytest.mark.asyncio
     async def test_delete_documents(self, document_store: OpenSearchDocumentStore):

From da07149743177904dbf6f5473316501733b1bc6c Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Thu, 15 Jan 2026 15:07:22 +0100
Subject: [PATCH 57/58] updating
 document_store.get_metadata_field_unique_values

---
 .../opensearch/document_store.py              | 122 ++++++++++--------
 .../opensearch/tests/test_document_store.py   |  26 ++--
 .../tests/test_document_store_async.py        |  34 +++--
 3 files changed, 108 insertions(+), 74 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 71a19ffe3a..3bfe7a3675 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -1446,16 +1446,24 @@ async def get_metadata_field_min_max_async(self, metadata_field: str) -> dict[st
         return self._extract_min_max_from_stats(stats)
 
     def get_metadata_field_unique_values(
-        self, metadata_field: str, search_term: str | None, from_: int, size: int
-    ) -> list[str]:
+        self,
+        metadata_field: str,
+        search_term: str | None = None,
+        size: int | None = 10000,
+        after: dict[str, Any] | None = None,
+    ) -> tuple[list[str], dict[str, Any] | None]:
         """
         Returns unique values for a metadata field, optionally filtered by a search term in the content.
+        Uses composite aggregations for proper pagination beyond 10k results.
 
         :param metadata_field: The metadata field to get unique values for.
         :param search_term: Optional search term to filter documents by matching in the content field.
-        :param from_: The starting index for pagination.
-        :param size: The number of unique values to return.
-        :returns: A tuple containing (list of unique values, total count of unique values).
+        :param size: The number of unique values to return per page. Defaults to 10000.
+        :param after: Optional pagination key from the previous response. Use None for the first page.
+            For subsequent pages, pass the `after_key` from the previous response.
+        :returns: A tuple containing (list of unique values, after_key for pagination).
+            The after_key is None when there are no more results. Use it in the `after` parameter
+            for the next page.
         """
         self._ensure_initialized()
         assert self._client is not None
@@ -1468,26 +1476,20 @@ def get_metadata_field_unique_values(
             # Use match_phrase for exact phrase matching to avoid tokenization issues
             query = {"match_phrase": {"content": search_term}}
 
-        # Build aggregations
-        # Terms aggregation for paginated unique values
-        # Note: Terms aggregation doesn't support 'from' parameter directly,
-        # so we fetch from_ + size results and slice them
-        # Cardinality aggregation for total count
-        terms_size = from_ + size if from_ > 0 else size
+        # Build composite aggregation for proper pagination
+        composite_agg: dict[str, Any] = {
+            "size": size,
+            "sources": [{field_name: {"terms": {"field": field_name}}}],
+        }
+        if after is not None:
+            composite_agg["after"] = after
+
         body = {
             "query": query,
             "aggs": {
                 "unique_values": {
-                    "terms": {
-                        "field": field_name,
-                        "size": terms_size,
-                    }
-                },
-                "total_count": {
-                    "cardinality": {
-                        "field": field_name,
-                    }
-                },
+                    "composite": composite_agg,
+                }
             },
             "size": 0,  # we only need aggregations, not documents
         }
@@ -1495,25 +1497,38 @@ def get_metadata_field_unique_values(
         result = self._client.search(index=self._index, body=body)
         aggregations = result.get("aggregations", {})
 
-        # Extract unique values from terms aggregation buckets
-        unique_values_buckets = aggregations.get("unique_values", {}).get("buckets", [])
-        # Apply pagination by slicing the results
-        paginated_buckets = unique_values_buckets[from_ : from_ + size]
-        unique_values = [str(bucket["key"]) for bucket in paginated_buckets]
+        # Extract unique values from composite aggregation buckets
+        unique_values_agg = aggregations.get("unique_values", {})
+        unique_values_buckets = unique_values_agg.get("buckets", [])
+        unique_values = [str(bucket["key"][field_name]) for bucket in unique_values_buckets]
+
+        # Extract after_key for pagination
+        # If we got fewer results than requested, we've reached the end
+        after_key = unique_values_agg.get("after_key")
+        if after_key is not None and size is not None and len(unique_values_buckets) < size:
+            after_key = None
 
-        return unique_values
+        return unique_values, after_key
 
     async def get_metadata_field_unique_values_async(
-        self, metadata_field: str, search_term: str | None, from_: int, size: int
-    ) -> list[str]:
+        self,
+        metadata_field: str,
+        search_term: str | None = None,
+        size: int | None = 10000,
+        after: dict[str, Any] | None = None,
+    ) -> tuple[list[str], dict[str, Any] | None]:
         """
         Asynchronously returns unique values for a metadata field, optionally filtered by a search term in the content.
+        Uses composite aggregations for proper pagination beyond 10k results.
 
         :param metadata_field: The metadata field to get unique values for.
         :param search_term: Optional search term to filter documents by matching in the content field.
-        :param from_: The starting index for pagination.
-        :param size: The number of unique values to return.
-        :returns: A tuple containing (list of unique values, total count of unique values).
+        :param size: The number of unique values to return per page. Defaults to 10000.
+        :param after: Optional pagination key from the previous response. Use None for the first page.
+            For subsequent pages, pass the `after_key` from the previous response.
+        :returns: A tuple containing (list of unique values, after_key for pagination).
+            The after_key is None when there are no more results. Use it in the `after` parameter
+            for the next page.
         """
         await self._ensure_initialized_async()
         assert self._async_client is not None
@@ -1526,26 +1541,20 @@ async def get_metadata_field_unique_values_async(
             # Use match_phrase for exact phrase matching to avoid tokenization issues
             query = {"match_phrase": {"content": search_term}}
 
-        # Build aggregations
-        # Terms aggregation for paginated unique values
-        # Note: Terms aggregation doesn't support 'from' parameter directly,
-        # so we fetch from_ + size results and slice them
-        # Cardinality aggregation for total count
-        terms_size = from_ + size if from_ > 0 else size
+        # Build composite aggregation for proper pagination
+        composite_agg: dict[str, Any] = {
+            "size": size,
+            "sources": [{field_name: {"terms": {"field": field_name}}}],
+        }
+        if after is not None:
+            composite_agg["after"] = after
+
         body = {
             "query": query,
             "aggs": {
                 "unique_values": {
-                    "terms": {
-                        "field": field_name,
-                        "size": terms_size,
-                    }
-                },
-                "total_count": {
-                    "cardinality": {
-                        "field": field_name,
-                    }
-                },
+                    "composite": composite_agg,
+                }
             },
             "size": 0,  # we only need aggregations, not documents
         }
@@ -1553,10 +1562,15 @@ async def get_metadata_field_unique_values_async(
         result = await self._async_client.search(index=self._index, body=body)
         aggregations = result.get("aggregations", {})
 
-        # Extract unique values from terms aggregation buckets
-        unique_values_buckets = aggregations.get("unique_values", {}).get("buckets", [])
-        # Apply pagination by slicing the results
-        paginated_buckets = unique_values_buckets[from_ : from_ + size]
-        unique_values = [str(bucket["key"]) for bucket in paginated_buckets]
+        # Extract unique values from composite aggregation buckets
+        unique_values_agg = aggregations.get("unique_values", {})
+        unique_values_buckets = unique_values_agg.get("buckets", [])
+        unique_values = [str(bucket["key"][field_name]) for bucket in unique_values_buckets]
+
+        # Extract after_key for pagination
+        # If we got fewer results than requested, we've reached the end
+        after_key = unique_values_agg.get("after_key")
+        if after_key is not None and size is not None and len(unique_values_buckets) < size:
+            after_key = None
 
-        return unique_values
+        return unique_values, after_key
diff --git a/integrations/opensearch/tests/test_document_store.py b/integrations/opensearch/tests/test_document_store.py
index bad19c4dbe..c45288b3d7 100644
--- a/integrations/opensearch/tests/test_document_store.py
+++ b/integrations/opensearch/tests/test_document_store.py
@@ -758,29 +758,37 @@ def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocume
         document_store.write_documents(docs)
 
         # Test getting all unique values without search term
-        unique_values = document_store.get_metadata_field_unique_values("meta.category", None, 0, 10)
+        unique_values, after_key = document_store.get_metadata_field_unique_values("meta.category", None, 10)
         assert set(unique_values) == {"A", "B", "C"}
+        # after_key should be None when all results are returned
+        assert after_key is None
 
         # Test with "meta." prefix
-        unique_languages = document_store.get_metadata_field_unique_values("meta.language", None, 0, 10)
+        unique_languages, _ = document_store.get_metadata_field_unique_values("meta.language", None, 10)
         assert set(unique_languages) == {"Python", "Java", "JavaScript"}
 
         # Test pagination - first page
-        unique_values_page1 = document_store.get_metadata_field_unique_values("meta.category", None, 0, 2)
+        unique_values_page1, after_key_page1 = document_store.get_metadata_field_unique_values("meta.category", None, 2)
         assert len(unique_values_page1) == 2
         assert all(val in ["A", "B", "C"] for val in unique_values_page1)
+        # Should have an after_key for pagination
+        assert after_key_page1 is not None
 
-        # Test pagination - second page
-        unique_values_page2 = document_store.get_metadata_field_unique_values("meta.category", None, 2, 2)
+        # Test pagination - second page using after_key
+        unique_values_page2, after_key_page2 = document_store.get_metadata_field_unique_values(
+            "meta.category", None, 2, after=after_key_page1
+        )
         assert len(unique_values_page2) == 1
         assert unique_values_page2[0] in ["A", "B", "C"]
+        # Should have no more results
+        assert after_key_page2 is None
 
         # Test with search term - filter by content matching "Python"
-        unique_values_filtered = document_store.get_metadata_field_unique_values("meta.category", "Python", 0, 10)
+        unique_values_filtered, _ = document_store.get_metadata_field_unique_values("meta.category", "Python", 10)
         assert set(unique_values_filtered) == {"A"}  # Only category A has documents with "Python" in content
 
         # Test with search term - filter by content matching "Java"
-        unique_values_java = document_store.get_metadata_field_unique_values("meta.category", "Java", 0, 10)
+        unique_values_java, _ = document_store.get_metadata_field_unique_values("meta.category", "Java", 10)
         assert set(unique_values_java) == {"B"}  # Only category B has documents with "Java" in content
 
         # Test with integer values
@@ -791,11 +799,11 @@ def test_get_metadata_field_unique_values(self, document_store: OpenSearchDocume
             Document(content="Doc 4", meta={"priority": 3}),
         ]
         document_store.write_documents(int_docs)
-        unique_priorities = document_store.get_metadata_field_unique_values("meta.priority", None, 0, 10)
+        unique_priorities, _ = document_store.get_metadata_field_unique_values("meta.priority", None, 10)
         assert set(unique_priorities) == {"1", "2", "3"}
 
         # Test with search term on integer field
-        unique_priorities_filtered = document_store.get_metadata_field_unique_values("meta.priority", "Doc 1", 0, 10)
+        unique_priorities_filtered, _ = document_store.get_metadata_field_unique_values("meta.priority", "Doc 1", 10)
         assert set(unique_priorities_filtered) == {"1"}
 
     @pytest.mark.integration
diff --git a/integrations/opensearch/tests/test_document_store_async.py b/integrations/opensearch/tests/test_document_store_async.py
index b33ee2677e..18ceb3c4de 100644
--- a/integrations/opensearch/tests/test_document_store_async.py
+++ b/integrations/opensearch/tests/test_document_store_async.py
@@ -569,31 +569,43 @@ async def test_get_metadata_field_unique_values_async(self, document_store: Open
         await document_store.write_documents_async(docs)
 
         # Test getting all unique values without search term
-        unique_values = await document_store.get_metadata_field_unique_values_async("meta.category", None, 0, 10)
+        unique_values, after_key = await document_store.get_metadata_field_unique_values_async(
+            "meta.category", None, 10
+        )
         assert set(unique_values) == {"A", "B", "C"}
+        # after_key should be None when all results are returned
+        assert after_key is None
 
         # Test with "meta." prefix
-        unique_languages = await document_store.get_metadata_field_unique_values_async("meta.language", None, 0, 10)
+        unique_languages, _ = await document_store.get_metadata_field_unique_values_async("meta.language", None, 10)
         assert set(unique_languages) == {"Python", "Java", "JavaScript"}
 
         # Test pagination - first page
-        unique_values_page1 = await document_store.get_metadata_field_unique_values_async("meta.category", None, 0, 2)
+        unique_values_page1, after_key_page1 = await document_store.get_metadata_field_unique_values_async(
+            "meta.category", None, 2
+        )
         assert len(unique_values_page1) == 2
         assert all(val in ["A", "B", "C"] for val in unique_values_page1)
+        # Should have an after_key for pagination
+        assert after_key_page1 is not None
 
-        # Test pagination - second page
-        unique_values_page2 = await document_store.get_metadata_field_unique_values_async("meta.category", None, 2, 2)
+        # Test pagination - second page using after_key
+        unique_values_page2, after_key_page2 = await document_store.get_metadata_field_unique_values_async(
+            "meta.category", None, 2, after=after_key_page1
+        )
         assert len(unique_values_page2) == 1
         assert unique_values_page2[0] in ["A", "B", "C"]
+        # Should have no more results
+        assert after_key_page2 is None
 
         # Test with search term - filter by content matching "Python"
-        unique_values_filtered = await document_store.get_metadata_field_unique_values_async(
-            "meta.category", "Python", 0, 10
+        unique_values_filtered, _ = await document_store.get_metadata_field_unique_values_async(
+            "meta.category", "Python", 10
         )
         assert set(unique_values_filtered) == {"A"}  # Only category A has documents with "Python" in content
 
         # Test with search term - filter by content matching "Java"
-        unique_values_java = await document_store.get_metadata_field_unique_values_async("meta.category", "Java", 0, 10)
+        unique_values_java, _ = await document_store.get_metadata_field_unique_values_async("meta.category", "Java", 10)
         assert set(unique_values_java) == {"B"}  # Only category B has documents with "Java" in content
 
         # Test with integer values
@@ -604,11 +616,11 @@ async def test_get_metadata_field_unique_values_async(self, document_store: Open
             Document(content="Doc 4", meta={"priority": 3}),
         ]
         await document_store.write_documents_async(int_docs)
-        unique_priorities = await document_store.get_metadata_field_unique_values_async("meta.priority", None, 0, 10)
+        unique_priorities, _ = await document_store.get_metadata_field_unique_values_async("meta.priority", None, 10)
         assert set(unique_priorities) == {"1", "2", "3"}
 
         # Test with search term on integer field
-        unique_priorities_filtered = await document_store.get_metadata_field_unique_values_async(
-            "meta.priority", "Doc 1", 0, 10
+        unique_priorities_filtered, _ = await document_store.get_metadata_field_unique_values_async(
+            "meta.priority", "Doc 1", 10
         )
         assert set(unique_priorities_filtered) == {"1"}

From 27e1eaee166aba285f0b527f386cb38f4466fbad Mon Sep 17 00:00:00 2001
From: "David S. Batista" <dsbatista@gmail.com>
Date: Fri, 16 Jan 2026 10:57:13 +0100
Subject: [PATCH 58/58] removing ResponseFormat

---
 .../document_stores/opensearch/document_store.py                | 2 --
 1 file changed, 2 deletions(-)

diff --git a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
index 3bfe7a3675..41b9c9abf3 100644
--- a/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
+++ b/integrations/opensearch/src/haystack_integrations/document_stores/opensearch/document_store.py
@@ -23,8 +23,6 @@
 
 SPECIAL_FIELDS = {"content", "embedding", "id", "score", "sparse_embedding", "blob"}
 
-ResponseFormat = Literal["json", "jdbc", "csv", "raw"]
-
 Hosts = str | list[str | Mapping[str, str | int]]
 
 # document scores are essentially unbounded and will be scaled to values between 0 and 1 if scale_score is set to