diff --git a/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py b/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py index c0177bc334..423e766e1c 100644 --- a/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py +++ b/integrations/elasticsearch/src/haystack_integrations/document_stores/elasticsearch/document_store.py @@ -1,12 +1,11 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 +import copy # ruff: noqa: FBT002, FBT001 boolean-type-hint-positional-argument and boolean-default-value-positional-argument # ruff: noqa: B008 function-call-in-default-argument # ruff: noqa: S101 disable checks for uses of the assert keyword - - from collections.abc import Mapping from dataclasses import replace from typing import Any, Literal @@ -86,6 +85,7 @@ def __init__( api_key: Secret | str | None = Secret.from_env_var("ELASTIC_API_KEY", strict=False), api_key_id: Secret | str | None = Secret.from_env_var("ELASTIC_API_KEY_ID", strict=False), embedding_similarity_function: Literal["cosine", "dot_product", "l2_norm", "max_inner_product"] = "cosine", + sparse_vector_field: str | None = None, **kwargs: Any, ) -> None: """ @@ -117,6 +117,9 @@ def __init__( To choose the most appropriate function, look for information about your embedding model. To understand how document scores are computed, see the Elasticsearch [documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html#dense-vector-params) + :param sparse_vector_field: If set, the name of the Elasticsearch field where sparse embeddings + will be stored using the `sparse_vector` field type. When not set, any `sparse_embedding` + data on Documents is silently dropped during writes. :param **kwargs: Optional arguments that `Elasticsearch` takes. """ self._hosts = hosts @@ -126,16 +129,26 @@ def __init__( self._api_key = api_key self._api_key_id = api_key_id self._embedding_similarity_function = embedding_similarity_function + self._sparse_vector_field = sparse_vector_field self._custom_mapping = custom_mapping self._kwargs = kwargs self._initialized = False + if self._sparse_vector_field and self._sparse_vector_field in SPECIAL_FIELDS: + msg = f"sparse_vector_field '{self._sparse_vector_field}' conflicts with a reserved field name." + raise ValueError(msg) + if self._custom_mapping and not isinstance(self._custom_mapping, dict): msg = "custom_mapping must be a dictionary" raise ValueError(msg) + if self._custom_mapping and self._sparse_vector_field: + self._custom_mapping = copy.deepcopy(custom_mapping) # original custom_mapping dict is left unchanged + self._custom_mapping.setdefault("properties", {}) # type: ignore # can't be None here + self._custom_mapping["properties"][self._sparse_vector_field] = {"type": "sparse_vector"} # type: ignore # can't be None here + if not self._custom_mapping: - self._default_mappings = { + self._default_mappings: dict[str, Any] = { "properties": { "embedding": { "type": "dense_vector", @@ -156,6 +169,8 @@ def __init__( } ], } + if self._sparse_vector_field: + self._default_mappings["properties"][self._sparse_vector_field] = {"type": "sparse_vector"} def _ensure_initialized(self) -> None: """ @@ -277,6 +292,7 @@ def to_dict(self) -> dict[str, Any]: api_key=self._api_key.to_dict() if isinstance(self._api_key, Secret) else None, api_key_id=self._api_key_id.to_dict() if isinstance(self._api_key_id, Secret) else None, embedding_similarity_function=self._embedding_similarity_function, + sparse_vector_field=self._sparse_vector_field, **self._kwargs, ) @@ -404,12 +420,11 @@ async def filter_documents_async(self, filters: dict[str, Any] | None = None) -> documents = await self._search_documents_async(query=query) return documents - @staticmethod - def _deserialize_document(hit: dict[str, Any]) -> Document: + def _deserialize_document(self, hit: dict[str, Any]) -> Document: """ Creates a `Document` from the search hit provided. - This is mostly useful in self.filter_documents(). + This is mostly useful in self.filter_documents() and self.filter_documents_async(). :param hit: A search hit from Elasticsearch. :returns: `Document` created from the search hit. @@ -420,8 +435,40 @@ def _deserialize_document(hit: dict[str, Any]) -> Document: data["metadata"]["highlighted"] = hit["highlight"] data["score"] = hit["_score"] + if self._sparse_vector_field and self._sparse_vector_field in data: + es_sparse = data.pop(self._sparse_vector_field) + sorted_items = sorted(es_sparse.items(), key=lambda x: int(x[0])) + data["sparse_embedding"] = { + "indices": [int(k) for k, _ in sorted_items], + "values": [v for _, v in sorted_items], + } + return Document.from_dict(data) + def _handle_sparse_embedding(self, doc_dict: dict[str, Any], doc_id: str) -> None: + """ + Extracts the sparse_embedding from a document dict and converts it to the Elasticsearch sparse_vector format. + + :param doc_dict: The dictionary representation of the document. + :param doc_id: The document ID, used for warning messages. + """ + if "sparse_embedding" not in doc_dict: + return + sparse_embedding = doc_dict.pop("sparse_embedding") + if not sparse_embedding: + return + if self._sparse_vector_field: + doc_dict[self._sparse_vector_field] = { + str(idx): val for idx, val in zip(sparse_embedding["indices"], sparse_embedding["values"], strict=True) + } + else: + logger.warning( + "Document {doc_id} has the `sparse_embedding` field set, " + "but `sparse_vector_field` is not configured for this ElasticsearchDocumentStore. " + "The `sparse_embedding` field will be ignored.", + doc_id=doc_id, + ) + def write_documents( self, documents: list[Document], @@ -457,16 +504,7 @@ def write_documents( elasticsearch_actions = [] for doc in documents: doc_dict = doc.to_dict() - - if "sparse_embedding" in doc_dict: - sparse_embedding = doc_dict.pop("sparse_embedding", None) - if sparse_embedding: - logger.warning( - "Document {doc_id} has the `sparse_embedding` field set," - "but storing sparse embeddings in Elasticsearch is not currently supported." - "The `sparse_embedding` field will be ignored.", - doc_id=doc.id, - ) + self._handle_sparse_embedding(doc_dict, doc.id) elasticsearch_actions.append( { "_op_type": action, @@ -544,16 +582,7 @@ async def write_documents_async( actions = [] for doc in documents: doc_dict = doc.to_dict() - - if "sparse_embedding" in doc_dict: - sparse_embedding = doc_dict.pop("sparse_embedding", None) - if sparse_embedding: - logger.warning( - "Document {doc_id} has the `sparse_embedding` field set," - "but storing sparse embeddings in Elasticsearch is not currently supported." - "The `sparse_embedding` field will be ignored.", - doc_id=doc.id, - ) + self._handle_sparse_embedding(doc_dict, doc.id) action = { "_op_type": "create" if policy == DuplicatePolicy.FAIL else "index", diff --git a/integrations/elasticsearch/tests/test_bm25_retriever.py b/integrations/elasticsearch/tests/test_bm25_retriever.py index 4be26c7b35..92b7e82794 100644 --- a/integrations/elasticsearch/tests/test_bm25_retriever.py +++ b/integrations/elasticsearch/tests/test_bm25_retriever.py @@ -56,6 +56,7 @@ def test_to_dict(_mock_elasticsearch_client): "custom_mapping": None, "index": "default", "embedding_similarity_function": "cosine", + "sparse_vector_field": None, }, "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", }, @@ -74,7 +75,7 @@ def test_from_dict(_mock_elasticsearch_client): "type": "haystack_integrations.components.retrievers.elasticsearch.bm25_retriever.ElasticsearchBM25Retriever", "init_parameters": { "document_store": { - "init_parameters": {"hosts": "some fake host", "index": "default"}, + "init_parameters": {"hosts": "some fake host", "index": "default", "sparse_vector_field": None}, "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", }, "filters": {}, @@ -99,7 +100,7 @@ def test_from_dict_no_filter_policy(_mock_elasticsearch_client): "type": "haystack_integrations.components.retrievers.elasticsearch.bm25_retriever.ElasticsearchBM25Retriever", "init_parameters": { "document_store": { - "init_parameters": {"hosts": "some fake host", "index": "default"}, + "init_parameters": {"hosts": "some fake host", "index": "default", "sparse_vector_field": None}, "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", }, "filters": {}, diff --git a/integrations/elasticsearch/tests/test_document_store.py b/integrations/elasticsearch/tests/test_document_store.py index 993472dc11..c37a7518a7 100644 --- a/integrations/elasticsearch/tests/test_document_store.py +++ b/integrations/elasticsearch/tests/test_document_store.py @@ -8,6 +8,7 @@ import pytest from elasticsearch.exceptions import BadRequestError # type: ignore[import-not-found] from haystack.dataclasses.document import Document +from haystack.dataclasses.sparse_embedding import SparseEmbedding from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy from haystack.testing.document_store import ( @@ -30,6 +31,48 @@ def test_init_is_lazy(_mock_es_client): _mock_es_client.assert_not_called() +def test_init_with_special_fields_raises_error(): + with pytest.raises(ValueError, match=r"sparse_vector_field 'content' conflicts with a reserved field name\."): + ElasticsearchDocumentStore(sparse_vector_field="content") + + +def test_init_with_custom_mapping_injects_sparse_vector(): + custom_mapping = {"properties": {"some_field": {"type": "text"}}} + store = ElasticsearchDocumentStore(custom_mapping=custom_mapping, sparse_vector_field="my_sparse_vec") + assert "my_sparse_vec" in store._custom_mapping["properties"] + assert store._custom_mapping["properties"]["my_sparse_vec"] == {"type": "sparse_vector"} + + +def test_handle_sparse_embedding_no_op_when_absent(): + store = ElasticsearchDocumentStore(hosts="testhost") + doc_dict = {"id": "doc-1", "content": "hello"} + store._handle_sparse_embedding(doc_dict, "doc-1") + assert doc_dict == {"id": "doc-1", "content": "hello"} + + +def test_handle_sparse_embedding_converts_to_es_format(): + store = ElasticsearchDocumentStore(hosts="testhost", sparse_vector_field="my_sparse") + doc_dict = { + "id": "doc-1", + "sparse_embedding": {"indices": [0, 5], "values": [0.3, 0.7]}, + } + store._handle_sparse_embedding(doc_dict, "doc-1") + assert "sparse_embedding" not in doc_dict + assert doc_dict["my_sparse"] == {"0": 0.3, "5": 0.7} + + +def test_handle_sparse_embedding_warns_when_no_field_configured(caplog): + store = ElasticsearchDocumentStore(hosts="testhost") + doc_dict = { + "id": "doc-1", + "content": "hello", + "sparse_embedding": {"indices": [0, 1], "values": [0.5, 0.5]}, + } + store._handle_sparse_embedding(doc_dict, "doc-1") + assert "but `sparse_vector_field` is not configured" in caplog.text + assert "sparse_embedding" not in doc_dict + + @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") def test_headers_are_supported(_mock_es_client): _ = ElasticsearchDocumentStore( @@ -69,6 +112,7 @@ def test_to_dict(): "custom_mapping": None, "index": "default", "embedding_similarity_function": "cosine", + "sparse_vector_field": None, }, } @@ -83,6 +127,7 @@ def test_from_dict(): "api_key": None, "api_key_id": None, "embedding_similarity_function": "cosine", + "sparse_vector_field": None, }, } document_store = ElasticsearchDocumentStore.from_dict(data) @@ -90,6 +135,7 @@ def test_from_dict(): assert document_store._index == "default" assert document_store._custom_mapping is None assert document_store._api_key is None + assert document_store._sparse_vector_field is None assert document_store._api_key_id is None assert document_store._embedding_similarity_function == "cosine" @@ -134,6 +180,7 @@ def test_from_dict_with_api_keys_env_vars(): "api_key": {"type": "env_var", "env_vars": ["ELASTIC_API_KEY"], "strict": False}, "api_key_id": {"type": "env_var", "env_vars": ["ELASTIC_API_KEY_ID"], "strict": False}, "embedding_similarity_function": "cosine", + "sparse_vector_field": None, }, } @@ -152,6 +199,7 @@ def test_from_dict_with_api_keys_str(): "api_key": "my_api_key", "api_key_id": "my_api_key_id", "embedding_similarity_function": "cosine", + "sparse_vector_field": None, }, } @@ -160,6 +208,23 @@ def test_from_dict_with_api_keys_str(): assert document_store._api_key_id == "my_api_key_id" +def test_from_dict_without_sparse_vector_field(): + data = { + "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", + "init_parameters": { + "hosts": "some hosts", + "custom_mapping": None, + "index": "default", + "api_key": "my_api_key", + "api_key_id": "my_api_key_id", + "embedding_similarity_function": "cosine", + }, + } + + document_store = ElasticsearchDocumentStore.from_dict(data) + assert document_store._sparse_vector_field is None + + def test_api_key_validation_only_api_key(): api_key = Secret.from_token("test_api_key") document_store = ElasticsearchDocumentStore(hosts="https://localhost:9200", api_key=api_key) @@ -332,6 +397,81 @@ def test_write_documents(self, document_store: ElasticsearchDocumentStore): with pytest.raises(DuplicateDocumentError): document_store.write_documents(docs, DuplicatePolicy.FAIL) + def test_write_documents_with_sparse_vectors(self): + store = ElasticsearchDocumentStore( + hosts=["http://localhost:9200"], index="test_sync_sparse", sparse_vector_field="sparse_vec" + ) + store.client.options(ignore_status=[400, 404]).indices.delete(index="test_sync_sparse") + + doc = Document(id="1", content="test", sparse_embedding=SparseEmbedding(indices=[0, 1], values=[0.5, 0.5])) + store.write_documents([doc]) + + # check ES natively + raw_doc = store.client.get(index="test_sync_sparse", id="1") + assert raw_doc["_source"]["sparse_vec"] == {"0": 0.5, "1": 0.5} + + # check retrieval reconstruction + results = store.filter_documents() + assert len(results) == 1 + assert results[0].sparse_embedding is not None + assert results[0].sparse_embedding.indices == [0, 1] + assert results[0].sparse_embedding.values == [0.5, 0.5] + + store.client.indices.delete(index="test_sync_sparse") + + def test_write_documents_with_non_contiguous_sparse_indices(self): + store = ElasticsearchDocumentStore( + hosts=["http://localhost:9200"], index="test_sync_sparse_noncontiguous", sparse_vector_field="sparse_vec" + ) + store.client.options(ignore_status=[400, 404]).indices.delete(index="test_sync_sparse_noncontiguous") + + doc = Document( + id="1", content="test", sparse_embedding=SparseEmbedding(indices=[100, 5, 42], values=[0.1, 0.9, 0.5]) + ) + store.write_documents([doc]) + + results = store.filter_documents() + assert len(results) == 1 + assert results[0].sparse_embedding is not None + assert results[0].sparse_embedding.indices == [5, 42, 100] + assert results[0].sparse_embedding.values == [0.9, 0.5, 0.1] + + store.client.indices.delete(index="test_sync_sparse_noncontiguous") + + def test_write_documents_mixed_sparse_and_non_sparse(self): + store = ElasticsearchDocumentStore( + hosts=["http://localhost:9200"], index="test_sync_sparse_mixed", sparse_vector_field="sparse_vec" + ) + store.client.options(ignore_status=[400, 404]).indices.delete(index="test_sync_sparse_mixed") + + docs = [ + Document( + id="1", content="with sparse", sparse_embedding=SparseEmbedding(indices=[0, 1], values=[0.5, 0.5]) + ), + Document(id="2", content="without sparse"), + ] + store.write_documents(docs) + + results = sorted(store.filter_documents(), key=lambda d: d.id) + assert len(results) == 2 + assert results[0].sparse_embedding is not None + assert results[0].sparse_embedding.indices == [0, 1] + assert results[1].sparse_embedding is None + + store.client.indices.delete(index="test_sync_sparse_mixed") + + def test_write_documents_with_sparse_embedding_warning(self, document_store, caplog): + """Test write_documents with document containing sparse_embedding field""" + doc = Document(id="1", content="test", sparse_embedding=SparseEmbedding(indices=[0, 1], values=[0.5, 0.5])) + + document_store.write_documents([doc]) + assert "but `sparse_vector_field` is not configured" in caplog.text + + results = document_store.filter_documents() + assert len(results) == 1 + assert results[0].id == "1" + assert not hasattr(results[0], "sparse_embedding") or results[0].sparse_embedding is None + def test_bm25_retrieval(self, document_store: ElasticsearchDocumentStore): document_store.write_documents( [ @@ -507,6 +647,43 @@ def test_write_documents_different_embedding_sizes_fail(self, document_store: El with pytest.raises(DocumentStoreError): document_store.write_documents(docs) + def test_init_with_sparse_vector_field(self): + store = ElasticsearchDocumentStore( + hosts=["http://localhost:9200"], index="test_init_sparse", sparse_vector_field="sparse_vec" + ) + assert "sparse_vec" in store._default_mappings["properties"] + assert store._default_mappings["properties"]["sparse_vec"]["type"] == "sparse_vector" + + @patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch") + def test_init_with_custom_mapping(self, mock_elasticsearch): + custom_mapping = { + "properties": { + "embedding": {"type": "dense_vector", "index": True, "similarity": "dot_product"}, + "content": {"type": "text"}, + }, + "dynamic_templates": [ + { + "strings": { + "path_match": "*", + "match_mapping_type": "string", + "mapping": { + "type": "keyword", + }, + } + } + ], + } + mock_client = Mock( + indices=Mock(create=Mock(), exists=Mock(return_value=False)), + ) + mock_elasticsearch.return_value = mock_client + + _ = ElasticsearchDocumentStore(hosts="http://testhost:9200", custom_mapping=custom_mapping).client + mock_client.indices.create.assert_called_once_with( + index="default", + mappings=custom_mapping, + ) + def test_delete_all_documents_index_recreation(self, document_store: ElasticsearchDocumentStore): # populate the index with some documents docs = [Document(id="1", content="A first document"), Document(id="2", content="Second document")] diff --git a/integrations/elasticsearch/tests/test_document_store_async.py b/integrations/elasticsearch/tests/test_document_store_async.py index 3aa0552f86..a08cea8995 100644 --- a/integrations/elasticsearch/tests/test_document_store_async.py +++ b/integrations/elasticsearch/tests/test_document_store_async.py @@ -152,13 +152,82 @@ async def test_write_documents_async_with_sparse_embedding_warning(self, documen doc = Document(id="1", content="test", sparse_embedding=SparseEmbedding(indices=[0, 1], values=[0.5, 0.5])) await document_store.write_documents_async([doc]) - assert "but storing sparse embeddings in Elasticsearch is not currently supported." in caplog.text + assert "but `sparse_vector_field` is not configured" in caplog.text results = await document_store.filter_documents_async() assert len(results) == 1 assert results[0].id == "1" assert not hasattr(results[0], "sparse_embedding") or results[0].sparse_embedding is None + @pytest.mark.asyncio + async def test_write_documents_async_with_sparse_vectors(self): + """Test write_documents with document containing sparse_embedding field""" + store = ElasticsearchDocumentStore( + hosts=["http://localhost:9200"], index="test_async_sparse", sparse_vector_field="sparse_vec" + ) + await store.async_client.options(ignore_status=[400, 404]).indices.delete(index="test_async_sparse") + + doc = Document(id="1", content="test", sparse_embedding=SparseEmbedding(indices=[0, 1], values=[0.5, 0.5])) + await store.write_documents_async([doc]) + + # check ES natively + raw_doc = await store.async_client.get(index="test_async_sparse", id="1") + assert raw_doc["_source"]["sparse_vec"] == {"0": 0.5, "1": 0.5} + + # check retrieval + results = await store.filter_documents_async() + assert len(results) == 1 + assert results[0].sparse_embedding is not None + assert results[0].sparse_embedding.indices == [0, 1] + assert results[0].sparse_embedding.values == [0.5, 0.5] + + await store.async_client.indices.delete(index="test_async_sparse") + + @pytest.mark.asyncio + async def test_write_documents_async_with_non_contiguous_sparse_indices(self): + store = ElasticsearchDocumentStore( + hosts=["http://localhost:9200"], index="test_async_sparse_noncontiguous", sparse_vector_field="sparse_vec" + ) + await store.async_client.options(ignore_status=[400, 404]).indices.delete( + index="test_async_sparse_noncontiguous" + ) + + doc = Document( + id="1", content="test", sparse_embedding=SparseEmbedding(indices=[100, 5, 42], values=[0.1, 0.9, 0.5]) + ) + await store.write_documents_async([doc]) + + results = await store.filter_documents_async() + assert len(results) == 1 + assert results[0].sparse_embedding is not None + assert results[0].sparse_embedding.indices == [5, 42, 100] + assert results[0].sparse_embedding.values == [0.9, 0.5, 0.1] + + await store.async_client.indices.delete(index="test_async_sparse_noncontiguous") + + @pytest.mark.asyncio + async def test_write_documents_async_mixed_sparse_and_non_sparse(self): + store = ElasticsearchDocumentStore( + hosts=["http://localhost:9200"], index="test_async_sparse_mixed", sparse_vector_field="sparse_vec" + ) + await store.async_client.options(ignore_status=[400, 404]).indices.delete(index="test_async_sparse_mixed") + + docs = [ + Document( + id="1", content="with sparse", sparse_embedding=SparseEmbedding(indices=[0, 1], values=[0.5, 0.5]) + ), + Document(id="2", content="without sparse"), + ] + await store.write_documents_async(docs) + + results = sorted(await store.filter_documents_async(), key=lambda d: d.id) + assert len(results) == 2 + assert results[0].sparse_embedding is not None + assert results[0].sparse_embedding.indices == [0, 1] + assert results[1].sparse_embedding is None + + await store.async_client.indices.delete(index="test_async_sparse_mixed") + @pytest.mark.asyncio async def test_delete_all_documents_async(self, document_store): docs = [ diff --git a/integrations/elasticsearch/tests/test_elasticsearch_hybrid_retriever.py b/integrations/elasticsearch/tests/test_elasticsearch_hybrid_retriever.py index 4803ff5c64..1abd62fdb8 100644 --- a/integrations/elasticsearch/tests/test_elasticsearch_hybrid_retriever.py +++ b/integrations/elasticsearch/tests/test_elasticsearch_hybrid_retriever.py @@ -37,6 +37,7 @@ class TestElasticsearchHybridRetriever: "api_key": {"type": "env_var", "env_vars": ["ELASTIC_API_KEY"], "strict": False}, "api_key_id": {"type": "env_var", "env_vars": ["ELASTIC_API_KEY_ID"], "strict": False}, "embedding_similarity_function": "cosine", + "sparse_vector_field": None, }, }, "embedder": { diff --git a/integrations/elasticsearch/tests/test_embedding_retriever.py b/integrations/elasticsearch/tests/test_embedding_retriever.py index abb00050f0..555ddce727 100644 --- a/integrations/elasticsearch/tests/test_embedding_retriever.py +++ b/integrations/elasticsearch/tests/test_embedding_retriever.py @@ -55,6 +55,7 @@ def test_to_dict(_mock_elasticsearch_client): "custom_mapping": None, "index": "default", "embedding_similarity_function": "cosine", + "sparse_vector_field": None, }, "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", }, @@ -73,7 +74,7 @@ def test_from_dict(_mock_elasticsearch_client): "type": t, "init_parameters": { "document_store": { - "init_parameters": {"hosts": "some fake host", "index": "default"}, + "init_parameters": {"hosts": "some fake host", "index": "default", "sparse_vector_field": None}, "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", }, "filters": {}, @@ -96,7 +97,7 @@ def test_from_dict_no_filter_policy(_mock_elasticsearch_client): "type": t, "init_parameters": { "document_store": { - "init_parameters": {"hosts": "some fake host", "index": "default"}, + "init_parameters": {"hosts": "some fake host", "index": "default", "sparse_vector_field": None}, "type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore", }, "filters": {},