deepset-ai
diff --git a/‎integrations/elasticsearch/CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions b/‎integrations/elasticsearch/CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎integrations/elasticsearch/pydoc/config_docusaurus.yml‎
Lines changed: 1 addition & 0 deletions b/‎integrations/elasticsearch/pydoc/config_docusaurus.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py‎
Lines changed: 6 additions & 1 deletion b/‎integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/__init__.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/sql_retriever.py‎
Lines changed: 203 additions & 0 deletions b/‎integrations/elasticsearch/src/haystack_integrations/components/retrievers/elasticsearch/sql_retriever.py‎
Lines changed: 203 additions & 0 deletions
diff --git a/‎integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py‎
Lines changed: 58 additions & 0 deletions b/‎integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎integrations/elasticsearch/tests/conftest.py‎
Lines changed: 60 additions & 0 deletions b/‎integrations/elasticsearch/tests/conftest.py‎
Lines changed: 60 additions & 0 deletions
@@ -1,5 +1,12 @@
 # Changelog
 
+## [integrations/elasticsearch-v5.2.0] - 2026-02-02
+
+### 🚀 Features
+
+- Add SQLRetriever to ElasticsearchDocumentStore (#2801)
+
+
 ## [integrations/elasticsearch-v5.1.1] - 2026-01-29
 
 ### 🐛 Bug Fixes
 
@@ -4,6 +4,7 @@ loaders:
   modules:
   - haystack_integrations.components.retrievers.elasticsearch.bm25_retriever
   - haystack_integrations.components.retrievers.elasticsearch.embedding_retriever
+  - haystack_integrations.components.retrievers.elasticsearch.sql_retriever
   - haystack_integrations.document_stores.elasticsearch.document_store
   - haystack_integrations.document_stores.elasticsearch.filters
   search_path:
 
@@ -3,5 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 from .bm25_retriever import ElasticsearchBM25Retriever
 from .embedding_retriever import ElasticsearchEmbeddingRetriever
+from .sql_retriever import ElasticsearchSQLRetriever
 
-__all__ = ["ElasticsearchBM25Retriever", "ElasticsearchEmbeddingRetriever"]
+__all__ = [
+    "ElasticsearchBM25Retriever",
+    "ElasticsearchEmbeddingRetriever",
+    "ElasticsearchSQLRetriever",
+]
@@ -0,0 +1,203 @@
+# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any
+
+from haystack import component, default_from_dict, default_to_dict, logging
+
+from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
+
+logger = logging.getLogger(__name__)
+
+
+@component
+class ElasticsearchSQLRetriever:
+    """
+    Executes raw Elasticsearch SQL queries against an ElasticsearchDocumentStore.
+
+    This component allows you to execute SQL queries directly against the Elasticsearch index,
+    which is useful for fetching metadata, aggregations, and other structured data at runtime.
+
+    Returns the raw JSON response from the Elasticsearch SQL API.
+
+    Usage example:
+    ```python
+    from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
+    from haystack_integrations.components.retrievers.elasticsearch import ElasticsearchSQLRetriever
+
+    document_store = ElasticsearchDocumentStore(hosts="http://localhost:9200")
+    retriever = ElasticsearchSQLRetriever(document_store=document_store)
+
+    result = retriever.run(
+        query="SELECT content, category FROM \\"my_index\\" WHERE category = 'A'"
+    )
+    # result["result"] contains the raw Elasticsearch JSON response
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        document_store: ElasticsearchDocumentStore,
+        raise_on_failure: bool = True,
+        fetch_size: int | None = None,
+    ):
+        """
+        Creates the ElasticsearchSQLRetriever component.
+
+        :param document_store: An instance of ElasticsearchDocumentStore to use with the Retriever.
+        :param raise_on_failure:
+            Whether to raise an exception if the API call fails. Otherwise, log a warning and return an empty dict.
+        :param fetch_size: Optional number of results to fetch per page. If not provided, the default
+            fetch size set in Elasticsearch is used.
+
+        :raises ValueError: If `document_store` is not an instance of ElasticsearchDocumentStore.
+        """
+        if not isinstance(document_store, ElasticsearchDocumentStore):
+            msg = "document_store must be an instance of ElasticsearchDocumentStore"
+            raise ValueError(msg)
+
+        self._document_store = document_store
+        self._raise_on_failure = raise_on_failure
+        self._fetch_size = fetch_size
+
+    def to_dict(self) -> dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
+        """
+        return default_to_dict(
+            self,
+            document_store=self._document_store.to_dict(),
+            raise_on_failure=self._raise_on_failure,
+            fetch_size=self._fetch_size,
+        )
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "ElasticsearchSQLRetriever":
+        """
+        Deserializes the component from a dictionary.
+
+        :param data:
+            Dictionary to deserialize from.
+
+        :returns:
+            Deserialized component.
+        """
+        data["init_parameters"]["document_store"] = ElasticsearchDocumentStore.from_dict(
+            data["init_parameters"]["document_store"]
+        )
+        return default_from_dict(cls, data)
+
+    @component.output_types(result=dict[str, Any])
+    def run(
+        self,
+        query: str,
+        document_store: ElasticsearchDocumentStore | None = None,
+        fetch_size: int | None = None,
+    ) -> dict[str, dict[str, Any]]:
+        """
+        Execute a raw Elasticsearch SQL query against the index.
+
+        :param query: The Elasticsearch SQL query to execute.
+        :param document_store: Optionally, an instance of ElasticsearchDocumentStore to use with the Retriever.
+        :param fetch_size: Optional number of results to fetch per page. If not provided, uses the value
+            specified during initialization, or the default fetch size set in Elasticsearch.
+
+        :returns:
+            A dictionary containing the raw JSON response from Elasticsearch SQL API:
+            - result: The raw JSON response from Elasticsearch (dict) or empty dict on error.
+
+        Example:
+            ```python
+            retriever = ElasticsearchSQLRetriever(document_store=document_store)
+            result = retriever.run(
+                query="SELECT content, category FROM \\"my_index\\" WHERE category = 'A'"
+            )
+            # result["result"] contains the raw Elasticsearch JSON response
+            # result["result"]["columns"] contains column metadata
+            # result["result"]["rows"] contains the data rows
+            ```
+        """
+        if document_store is not None:
+            if not isinstance(document_store, ElasticsearchDocumentStore):
+                msg = "document_store must be an instance of ElasticsearchDocumentStore"
+                raise ValueError(msg)
+            doc_store = document_store
+        else:
+            doc_store = self._document_store
+
+        fetch_size = fetch_size if fetch_size is not None else self._fetch_size
+
+        try:
+            result = doc_store._query_sql(query=query, fetch_size=fetch_size)
+        except Exception as e:
+            if self._raise_on_failure:
+                raise e
+            else:
+                logger.warning(
+                    "An error during SQL query execution occurred and will be ignored by returning empty dict: {error}",
+                    error=str(e),
+                    exc_info=True,
+                )
+                result = {}
+
+        return {"result": result}
+
+    @component.output_types(result=dict[str, Any])
+    async def run_async(
+        self,
+        query: str,
+        document_store: ElasticsearchDocumentStore | None = None,
+        fetch_size: int | None = None,
+    ) -> dict[str, dict[str, Any]]:
+        """
+        Asynchronously execute a raw Elasticsearch SQL query against the index.
+
+        :param query: The Elasticsearch SQL query to execute.
+        :param document_store: Optionally, an instance of ElasticsearchDocumentStore to use with the Retriever.
+        :param fetch_size: Optional number of results to fetch per page. If not provided, uses the value
+            specified during initialization, or the default fetch size set in Elasticsearch.
+
+        :returns:
+            A dictionary containing the raw JSON response from Elasticsearch SQL API:
+            - result: The raw JSON response from Elasticsearch (dict) or empty dict on error.
+
+        Example:
+            ```python
+            retriever = ElasticsearchSQLRetriever(document_store=document_store)
+            result = await retriever.run_async(
+                query="SELECT content, category FROM \\"my_index\\" WHERE category = 'A'"
+            )
+            # result["result"] contains the raw Elasticsearch JSON response
+            # result["result"]["columns"] contains column metadata
+            # result["result"]["rows"] contains the data rows
+            ```
+        """
+        if document_store is not None:
+            if not isinstance(document_store, ElasticsearchDocumentStore):
+                msg = "document_store must be an instance of ElasticsearchDocumentStore"
+                raise ValueError(msg)
+            doc_store = document_store
+        else:
+            doc_store = self._document_store
+
+        fetch_size = fetch_size if fetch_size is not None else self._fetch_size
+
+        try:
+            result = await doc_store._query_sql_async(query=query, fetch_size=fetch_size)
+        except Exception as e:
+            if self._raise_on_failure:
+                raise e
+            else:
+                logger.warning(
+                    "An error during SQL query execution occurred and will be ignored by returning empty dict: {error}",
+                    error=str(e),
+                    exc_info=True,
+                )
+                result = {}
+
+        return {"result": result}
@@ -1475,3 +1475,61 @@ async def get_metadata_field_unique_values_async(
             after_key = None
 
         return unique_values, after_key
+
+    def _query_sql(self, query: str, fetch_size: int | None = None) -> dict[str, Any]:
+        """
+        Execute a raw Elasticsearch SQL query against the index.
+
+        This method is not meant to be part of the public interface of
+        `ElasticsearchDocumentStore` nor called directly.
+        `ElasticsearchSQLRetriever` uses this method directly and is the public interface for it.
+
+        See `ElasticsearchSQLRetriever` for more information.
+
+        :param query: The Elasticsearch SQL query to execute
+        :param fetch_size: Optional number of results to fetch per page.
+        :returns: The raw JSON response from Elasticsearch SQL API.
+        """
+        self._ensure_initialized()
+        assert self._client is not None
+
+        try:
+            body: dict[str, Any] = {"query": query}
+            if fetch_size is not None:
+                body["fetch_size"] = fetch_size
+
+            response = self._client.sql.query(body=body)
+
+            return dict(response)
+        except Exception as e:
+            msg = f"Failed to execute SQL query in Elasticsearch: {e!s}"
+            raise DocumentStoreError(msg) from e
+
+    async def _query_sql_async(self, query: str, fetch_size: int | None = None) -> dict[str, Any]:
+        """
+        Asynchronously execute a raw Elasticsearch SQL query against the index.
+
+        This method is not meant to be part of the public interface of
+        `ElasticsearchDocumentStore` nor called directly.
+        `ElasticsearchSQLRetriever` uses this method directly and is the public interface for it.
+
+        See `ElasticsearchSQLRetriever` for more information.
+
+        :param query: The Elasticsearch SQL query to execute
+        :param fetch_size: Optional number of results to fetch per page.
+        :returns: The raw JSON response from Elasticsearch SQL API.
+        """
+        self._ensure_initialized()
+        assert self._async_client is not None
+
+        try:
+            body: dict[str, Any] = {"query": query}
+            if fetch_size is not None:
+                body["fetch_size"] = fetch_size
+
+            response = await self._async_client.sql.query(body=body)
+
+            return dict(response)
+        except Exception as e:
+            msg = f"Failed to execute SQL query in Elasticsearch: {e!s}"
+            raise DocumentStoreError(msg) from e
@@ -0,0 +1,60 @@
+# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import uuid
+
+import pytest
+
+from haystack_integrations.document_stores.elasticsearch import ElasticsearchDocumentStore
+
+
+def _get_unique_index_name() -> str:
+    """
+    Generate a unique, valid Elasticsearch index name for test isolation.
+
+    Each test gets its own index to enable parallel test execution without conflicts.
+    """
+    return f"test_sql_{uuid.uuid4().hex}"
+
+
+@pytest.fixture
+def document_store():
+    """
+    Document store fixture for SQL retriever integration tests.
+    """
+    hosts = ["http://localhost:9200"]
+    index = _get_unique_index_name()
+    embedding_similarity_function = "max_inner_product"
+
+    store = ElasticsearchDocumentStore(
+        hosts=hosts,
+        index=index,
+        embedding_similarity_function=embedding_similarity_function,
+    )
+    yield store
+
+    store._ensure_initialized()
+    store.client.options(ignore_status=[400, 404]).indices.delete(index=index)
+    store.client.close()
+
+
+@pytest.fixture
+def document_store_2():
+    """
+    Second document store fixture for runtime document store switching tests.
+    """
+    hosts = ["http://localhost:9200"]
+    index = f"test_sql_2_{uuid.uuid4().hex}"
+    embedding_similarity_function = "max_inner_product"
+
+    store = ElasticsearchDocumentStore(
+        hosts=hosts,
+        index=index,
+        embedding_similarity_function=embedding_similarity_function,
+    )
+    yield store
+
+    store._ensure_initialized()
+    store.client.options(ignore_status=[400, 404]).indices.delete(index=index)
+    store.client.close()