Skip to content

Commit 471d174

Browse files
authored
test: better categorize some Document Stores tests (#3085)
1 parent 4771e83 commit 471d174

8 files changed

Lines changed: 177 additions & 162 deletions

File tree

integrations/chroma/tests/test_document_store.py

Lines changed: 62 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -42,53 +42,7 @@ def clear_chroma_system_cache():
4242
SharedSystemClient.clear_system_cache()
4343

4444

45-
class TestDocumentStore(
46-
CountDocumentsTest,
47-
DeleteDocumentsTest,
48-
FilterDocumentsTest,
49-
FilterableDocsFixtureMixin,
50-
UpdateByFilterTest,
51-
DeleteAllTest,
52-
DeleteByFilterTest,
53-
CountDocumentsByFilterTest,
54-
CountUniqueMetadataByFilterTest,
55-
GetMetadataFieldsInfoTest,
56-
GetMetadataFieldMinMaxTest,
57-
GetMetadataFieldUniqueValuesTest,
58-
):
59-
"""
60-
Common test cases will be provided by `DocumentStoreBaseTests` but
61-
you can add more to this class.
62-
"""
63-
64-
@pytest.fixture
65-
def document_store(self, embedding_function) -> ChromaDocumentStore:
66-
"""
67-
This is the most basic requirement for the child class: provide
68-
an instance of this document store so the base class can use it.
69-
"""
70-
with mock.patch(
71-
"haystack_integrations.document_stores.chroma.document_store.get_embedding_function"
72-
) as get_func:
73-
get_func.return_value = embedding_function
74-
return ChromaDocumentStore(embedding_function="test_function", collection_name=str(uuid.uuid1()))
75-
76-
def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
77-
"""
78-
Assert that two lists of Documents are equal.
79-
This is used in every test, if a Document Store implementation has a different behaviour
80-
it should override this method.
81-
82-
This can happen for example when the Document Store sets a score to returned Documents.
83-
Since we can't know what the score will be, we can't compare the Documents reliably.
84-
"""
85-
received.sort(key=operator.attrgetter("id"))
86-
expected.sort(key=operator.attrgetter("id"))
87-
88-
for doc_received, doc_expected in zip(received, expected, strict=True):
89-
assert doc_received.content == doc_expected.content
90-
assert doc_received.meta == doc_expected.meta
91-
45+
class TestDocumentStoreUnit:
9246
def test_init_in_memory(self):
9347
store = ChromaDocumentStore()
9448

@@ -122,17 +76,6 @@ def test_invalid_initialization_both_host_and_persist_path(self):
12276
store = ChromaDocumentStore(persist_path="./path/to/local/store", host="localhost")
12377
store._ensure_initialized()
12478

125-
def test_client_settings_applied(self, clear_chroma_system_cache):
126-
"""
127-
Chroma's in-memory client uses a singleton pattern with an internal cache.
128-
Once a client is created with certain settings, Chroma rejects creating another
129-
with different settings in the same process. We clear the cache before and after
130-
this test to avoid conflicts with other tests that use default settings.
131-
"""
132-
store = ChromaDocumentStore(client_settings={"anonymized_telemetry": False})
133-
store._ensure_initialized()
134-
assert store._client.get_settings().anonymized_telemetry is False
135-
13679
def test_to_dict(self, request):
13780
ds = ChromaDocumentStore(
13881
collection_name=request.node.name,
@@ -182,6 +125,66 @@ def test_same_collection_name_reinitialization(self):
182125
ChromaDocumentStore("test_1")
183126
ChromaDocumentStore("test_1")
184127

128+
129+
@pytest.mark.integration
130+
class TestDocumentStore(
131+
CountDocumentsTest,
132+
DeleteDocumentsTest,
133+
FilterDocumentsTest,
134+
FilterableDocsFixtureMixin,
135+
UpdateByFilterTest,
136+
DeleteAllTest,
137+
DeleteByFilterTest,
138+
CountDocumentsByFilterTest,
139+
CountUniqueMetadataByFilterTest,
140+
GetMetadataFieldsInfoTest,
141+
GetMetadataFieldMinMaxTest,
142+
GetMetadataFieldUniqueValuesTest,
143+
):
144+
"""
145+
Common test cases will be provided by `DocumentStoreBaseTests` but
146+
you can add more to this class.
147+
"""
148+
149+
@pytest.fixture
150+
def document_store(self, embedding_function) -> ChromaDocumentStore:
151+
"""
152+
This is the most basic requirement for the child class: provide
153+
an instance of this document store so the base class can use it.
154+
"""
155+
with mock.patch(
156+
"haystack_integrations.document_stores.chroma.document_store.get_embedding_function"
157+
) as get_func:
158+
get_func.return_value = embedding_function
159+
return ChromaDocumentStore(embedding_function="test_function", collection_name=str(uuid.uuid1()))
160+
161+
def assert_documents_are_equal(self, received: list[Document], expected: list[Document]):
162+
"""
163+
Assert that two lists of Documents are equal.
164+
This is used in every test, if a Document Store implementation has a different behaviour
165+
it should override this method.
166+
167+
This can happen for example when the Document Store sets a score to returned Documents.
168+
Since we can't know what the score will be, we can't compare the Documents reliably.
169+
"""
170+
received.sort(key=operator.attrgetter("id"))
171+
expected.sort(key=operator.attrgetter("id"))
172+
173+
for doc_received, doc_expected in zip(received, expected, strict=True):
174+
assert doc_received.content == doc_expected.content
175+
assert doc_received.meta == doc_expected.meta
176+
177+
def test_client_settings_applied(self, clear_chroma_system_cache):
178+
"""
179+
Chroma's in-memory client uses a singleton pattern with an internal cache.
180+
Once a client is created with certain settings, Chroma rejects creating another
181+
with different settings in the same process. We clear the cache before and after
182+
this test to avoid conflicts with other tests that use default settings.
183+
"""
184+
store = ChromaDocumentStore(client_settings={"anonymized_telemetry": False})
185+
store._ensure_initialized()
186+
assert store._client.get_settings().anonymized_telemetry is False
187+
185188
def test_distance_metric_initialization(self):
186189
store = ChromaDocumentStore("test_2", distance_function="cosine")
187190
store._ensure_initialized()
@@ -445,7 +448,6 @@ def test_comparison_less_than_equal_with_none(self, document_store, filterable_d
445448
def test_not_operator(self, document_store, filterable_docs):
446449
pass
447450

448-
@pytest.mark.integration
449451
def test_search(self):
450452
document_store = ChromaDocumentStore()
451453
documents = [
@@ -491,7 +493,6 @@ def test_delete_all_documents_index_recreation(self, document_store: ChromaDocum
491493
document_store.write_documents(docs)
492494
assert document_store.count_documents() == 2
493495

494-
@pytest.mark.integration
495496
def test_search_embeddings(self, document_store: ChromaDocumentStore):
496497
query_embedding = TEST_EMBEDDING_1
497498
documents = [
@@ -515,6 +516,7 @@ def test_search_embeddings(self, document_store: ChromaDocumentStore):
515516
assert len(result_empty_filters[0]) == 2
516517

517518

519+
@pytest.mark.integration
518520
class TestMetadataOperations:
519521
"""Test new metadata query operations for ChromaDocumentStore"""
520522

integrations/chroma/tests/test_document_store_async.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,6 @@ async def test_update_by_filter_async_no_matches(self, document_store: ChromaDoc
264264
assert updated_count == 0
265265
assert await document_store.count_documents_async() == 2
266266

267-
@pytest.mark.integration
268267
async def test_search_embeddings_async(self, document_store: ChromaDocumentStore):
269268
query_embedding = TEST_EMBEDDING_1
270269
documents = [

integrations/elasticsearch/tests/test_document_store.py

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,37 @@ def test_client_initialization_with_api_key_string(_mock_async_es, _mock_es):
232232
assert async_call_args[1]["api_key"] == "test_api_key"
233233

234234

235+
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
236+
def test_init_with_custom_mapping(mock_elasticsearch):
237+
custom_mapping = {
238+
"properties": {
239+
"embedding": {"type": "dense_vector", "index": True, "similarity": "dot_product"},
240+
"content": {"type": "text"},
241+
},
242+
"dynamic_templates": [
243+
{
244+
"strings": {
245+
"path_match": "*",
246+
"match_mapping_type": "string",
247+
"mapping": {
248+
"type": "keyword",
249+
},
250+
}
251+
}
252+
],
253+
}
254+
mock_client = Mock(
255+
indices=Mock(create=Mock(), exists=Mock(return_value=False)),
256+
)
257+
mock_elasticsearch.return_value = mock_client
258+
259+
_ = ElasticsearchDocumentStore(hosts="http://testhost:9200", custom_mapping=custom_mapping).client
260+
mock_client.indices.create.assert_called_once_with(
261+
index="default",
262+
mappings=custom_mapping,
263+
)
264+
265+
235266
@pytest.mark.integration
236267
class TestDocumentStore(
237268
DocumentStoreBaseExtendedTests,
@@ -476,36 +507,6 @@ def test_write_documents_different_embedding_sizes_fail(self, document_store: El
476507
with pytest.raises(DocumentStoreError):
477508
document_store.write_documents(docs)
478509

479-
@patch("haystack_integrations.document_stores.elasticsearch.document_store.Elasticsearch")
480-
def test_init_with_custom_mapping(self, mock_elasticsearch):
481-
custom_mapping = {
482-
"properties": {
483-
"embedding": {"type": "dense_vector", "index": True, "similarity": "dot_product"},
484-
"content": {"type": "text"},
485-
},
486-
"dynamic_templates": [
487-
{
488-
"strings": {
489-
"path_match": "*",
490-
"match_mapping_type": "string",
491-
"mapping": {
492-
"type": "keyword",
493-
},
494-
}
495-
}
496-
],
497-
}
498-
mock_client = Mock(
499-
indices=Mock(create=Mock(), exists=Mock(return_value=False)),
500-
)
501-
mock_elasticsearch.return_value = mock_client
502-
503-
_ = ElasticsearchDocumentStore(hosts="http://testhost:9200", custom_mapping=custom_mapping).client
504-
mock_client.indices.create.assert_called_once_with(
505-
index="default",
506-
mappings=custom_mapping,
507-
)
508-
509510
def test_delete_all_documents_index_recreation(self, document_store: ElasticsearchDocumentStore):
510511
# populate the index with some documents
511512
docs = [Document(id="1", content="A first document"), Document(id="2", content="Second document")]

integrations/mongodb_atlas/tests/test_document_store.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,6 @@ def test_get_metadata_field_unique_values(self, document_store: MongoDBAtlasDocu
272272
assert len(values_page) == 1
273273
assert values_page[0] in ["alpha", "beta", "gamma"]
274274

275-
@pytest.mark.integration
276275
def test_custom_embedding_field(self):
277276
"""Test that the custom embedding field is correctly used in the document store."""
278277
# Create a document store with a custom embedding field
@@ -315,7 +314,6 @@ def test_custom_embedding_field(self):
315314
finally:
316315
database[collection_name].drop()
317316

318-
@pytest.mark.integration
319317
def test_custom_content_field(self):
320318
"""Test that the custom content field is correctly used in the document store."""
321319
# Create a document store with a custom content field

integrations/mongodb_atlas/tests/test_fulltext_retrieval.py

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import os
66
from time import sleep
7-
from unittest.mock import MagicMock
7+
from unittest.mock import MagicMock, patch
88

99
import pytest
1010
from haystack import Document
@@ -13,9 +13,11 @@
1313
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
1414

1515

16-
def get_document_store(**kwargs):
16+
def get_document_store(connection_string=None, **kwargs):
17+
if connection_string is None:
18+
connection_string = Secret.from_env_var("MONGO_CONNECTION_STRING_2")
1719
return MongoDBAtlasDocumentStore(
18-
mongo_connection_string=Secret.from_env_var("MONGO_CONNECTION_STRING_2"),
20+
mongo_connection_string=connection_string,
1921
database_name="haystack_test",
2022
collection_name="test_collection",
2123
vector_search_index="cosine_index",
@@ -24,36 +26,12 @@ def get_document_store(**kwargs):
2426
)
2527

2628

27-
@pytest.mark.skipif(
28-
not os.environ.get("MONGO_CONNECTION_STRING_2"),
29-
reason="No MongoDB Atlas connection string provided",
29+
@patch(
30+
"haystack_integrations.document_stores.mongodb_atlas.document_store.MongoDBAtlasDocumentStore._ensure_connection_setup"
3031
)
31-
@pytest.mark.integration
32-
class TestFullTextRetrieval:
33-
@pytest.fixture(scope="class")
34-
def document_store(self) -> MongoDBAtlasDocumentStore:
35-
return get_document_store()
36-
37-
@pytest.fixture(autouse=True, scope="class")
38-
def setup_teardown(self, document_store):
39-
document_store._ensure_connection_setup()
40-
document_store._collection.delete_many({})
41-
document_store.write_documents(
42-
[
43-
Document(content="The quick brown fox chased the dog", meta={"meta_field": "right_value"}),
44-
Document(content="The fox was brown", meta={"meta_field": "right_value"}),
45-
Document(content="The lazy dog"),
46-
Document(content="fox fox fox"),
47-
]
48-
)
49-
50-
# Wait for documents to be indexed
51-
sleep(5)
52-
53-
yield
54-
55-
def test_pipeline_correctly_passes_parameters(self, document_store):
56-
document_store = get_document_store()
32+
class TestFullTextRetrievalUnit:
33+
def test_pipeline_correctly_passes_parameters(self, _mock_setup):
34+
document_store = get_document_store(connection_string=Secret.from_token("test"))
5735
mock_collection = MagicMock()
5836
document_store._collection = mock_collection
5937
mock_collection.aggregate.return_value = []
@@ -98,9 +76,9 @@ def test_pipeline_correctly_passes_parameters(self, document_store):
9876
# Explicitly verify that the path in the text search is using the content_field
9977
assert actual_pipeline[0]["$search"]["compound"]["must"][0]["text"]["path"] == document_store.content_field
10078

101-
def test_pipeline_with_custom_content_field(self, document_store):
79+
def test_pipeline_with_custom_content_field(self, _mock_setup):
10280
# Create a document store with a custom content field
103-
document_store = get_document_store(content_field="custom_text")
81+
document_store = get_document_store(connection_string=Secret.from_token("test"), content_field="custom_text")
10482
mock_collection = MagicMock()
10583
document_store._collection = mock_collection
10684
mock_collection.aggregate.return_value = []
@@ -125,6 +103,35 @@ def test_pipeline_with_custom_content_field(self, document_store):
125103
assert "$addFields" in actual_pipeline[3]
126104
assert "$project" in actual_pipeline[4]
127105

106+
107+
@pytest.mark.skipif(
108+
not os.environ.get("MONGO_CONNECTION_STRING_2"),
109+
reason="No MongoDB Atlas connection string provided",
110+
)
111+
@pytest.mark.integration
112+
class TestFullTextRetrieval:
113+
@pytest.fixture(scope="class")
114+
def document_store(self) -> MongoDBAtlasDocumentStore:
115+
return get_document_store()
116+
117+
@pytest.fixture(autouse=True, scope="class")
118+
def setup_teardown(self, document_store):
119+
document_store._ensure_connection_setup()
120+
document_store._collection.delete_many({})
121+
document_store.write_documents(
122+
[
123+
Document(content="The quick brown fox chased the dog", meta={"meta_field": "right_value"}),
124+
Document(content="The fox was brown", meta={"meta_field": "right_value"}),
125+
Document(content="The lazy dog"),
126+
Document(content="fox fox fox"),
127+
]
128+
)
129+
130+
# Wait for documents to be indexed
131+
sleep(5)
132+
133+
yield
134+
128135
def test_query_retrieval(self, document_store: MongoDBAtlasDocumentStore):
129136
results = document_store._fulltext_retrieval(query="fox", top_k=2)
130137
assert len(results) == 2

0 commit comments

Comments
 (0)