Skip to content

Commit 766def9

Browse files
committed
test: address PR review feedback for sparse vector tests
- Add SPECIAL_FIELDS validation test - Add custom_mapping injection test - Add legacy from_dict backward compat test - Fix async test to use async_client for index deletion - Add retrieval reconstruction assertions to sync and async sparse tests
1 parent b8f77c1 commit 766def9

File tree

2 files changed

+45
-2
lines changed

2 files changed

+45
-2
lines changed

integrations/elasticsearch/tests/test_document_store.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,18 @@ def test_init_is_lazy(_mock_es_client):
2424
_mock_es_client.assert_not_called()
2525

2626

27+
def test_init_with_special_fields_raises_error():
28+
with pytest.raises(ValueError, match=r"sparse_vector_field 'content' conflicts with a reserved field name\."):
29+
ElasticsearchDocumentStore(sparse_vector_field="content")
30+
31+
32+
def test_init_with_custom_mapping_injects_sparse_vector():
33+
custom_mapping = {"properties": {"some_field": {"type": "text"}}}
34+
store = ElasticsearchDocumentStore(custom_mapping=custom_mapping, sparse_vector_field="my_sparse_vec")
35+
assert "my_sparse_vec" in store._custom_mapping["properties"]
36+
assert store._custom_mapping["properties"]["my_sparse_vec"] == {"type": "sparse_vector"}
37+
38+
2739
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
2840
def test_headers_are_supported(_mock_es_client):
2941
_ = ElasticsearchDocumentStore(
@@ -159,6 +171,23 @@ def test_from_dict_with_api_keys_str():
159171
assert document_store._api_key_id == "my_api_key_id"
160172

161173

174+
def test_from_dict_without_sparse_vector_field():
175+
data = {
176+
"type": "haystack_integrations.document_stores.elasticsearch.document_store.ElasticsearchDocumentStore",
177+
"init_parameters": {
178+
"hosts": "some hosts",
179+
"custom_mapping": None,
180+
"index": "default",
181+
"api_key": "my_api_key",
182+
"api_key_id": "my_api_key_id",
183+
"embedding_similarity_function": "cosine",
184+
},
185+
}
186+
187+
document_store = ElasticsearchDocumentStore.from_dict(data)
188+
assert document_store._sparse_vector_field is None
189+
190+
162191
def test_api_key_validation_only_api_key():
163192
api_key = Secret.from_token("test_api_key")
164193
document_store = ElasticsearchDocumentStore(hosts="https://localhost:9200", api_key=api_key)
@@ -306,6 +335,13 @@ def test_write_documents_with_sparse_vectors(self):
306335
raw_doc = store.client.get(index="test_sync_sparse", id="1")
307336
assert raw_doc["_source"]["sparse_vec"] == {"0": 0.5, "1": 0.5}
308337

338+
# check retrieval reconstruction
339+
results = store.filter_documents()
340+
assert len(results) == 1
341+
assert results[0].sparse_embedding is not None
342+
assert results[0].sparse_embedding.indices == [0, 1]
343+
assert results[0].sparse_embedding.values == [0.5, 0.5]
344+
309345
store.client.indices.delete(index="test_sync_sparse")
310346

311347
def test_write_documents_with_sparse_embedding_warning(self, document_store, caplog):

integrations/elasticsearch/tests/test_document_store_async.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ async def test_write_documents_async_with_sparse_vectors(self):
165165
store = ElasticsearchDocumentStore(
166166
hosts=["http://localhost:9200"], index="test_async_sparse", sparse_vector_field="sparse_vec"
167167
)
168-
store.client.options(ignore_status=[400, 404]).indices.delete(index="test_async_sparse")
168+
await store.async_client.options(ignore_status=[400, 404]).indices.delete(index="test_async_sparse")
169169

170170
doc = Document(id="1", content="test", sparse_embedding=SparseEmbedding(indices=[0, 1], values=[0.5, 0.5]))
171171
await store.write_documents_async([doc])
@@ -174,7 +174,14 @@ async def test_write_documents_async_with_sparse_vectors(self):
174174
raw_doc = await store.async_client.get(index="test_async_sparse", id="1")
175175
assert raw_doc["_source"]["sparse_vec"] == {"0": 0.5, "1": 0.5}
176176

177-
store.client.indices.delete(index="test_async_sparse")
177+
# check retrieval
178+
results = await store.filter_documents_async()
179+
assert len(results) == 1
180+
assert results[0].sparse_embedding is not None
181+
assert results[0].sparse_embedding.indices == [0, 1]
182+
assert results[0].sparse_embedding.values == [0.5, 0.5]
183+
184+
await store.async_client.indices.delete(index="test_async_sparse")
178185

179186
@pytest.mark.asyncio
180187
async def test_delete_all_documents_async(self, document_store):

0 commit comments

Comments
 (0)