Skip to content

Commit 35c9bef

Browse files
authored
test: Chroma - add unit tests (#3175)
* chroma: add unit tests * mv test
1 parent bb29392 commit 35c9bef

4 files changed

Lines changed: 227 additions & 1 deletion

File tree

integrations/chroma/tests/test_document_store.py

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import pytest
1111
from chromadb.api.shared_system_client import SharedSystemClient
12-
from haystack.dataclasses import ByteStream, Document
12+
from haystack.dataclasses import ByteStream, Document, SparseEmbedding
1313
from haystack.testing.document_store import (
1414
TEST_EMBEDDING_1,
1515
CountDocumentsByFilterTest,
@@ -27,6 +27,8 @@
2727
)
2828

2929
from haystack_integrations.document_stores.chroma import ChromaDocumentStore
30+
from haystack_integrations.document_stores.chroma.errors import ChromaDocumentStoreConfigError
31+
from haystack_integrations.document_stores.chroma.utils import get_embedding_function
3032

3133

3234
@pytest.fixture
@@ -42,6 +44,11 @@ def clear_chroma_system_cache():
4244
SharedSystemClient.clear_system_cache()
4345

4446

47+
def test_get_embedding_function_invalid_name_raises():
48+
with pytest.raises(ChromaDocumentStoreConfigError, match="Invalid function name"):
49+
get_embedding_function("NonExistentEmbeddingFunction")
50+
51+
4552
class TestDocumentStoreUnit:
4653
def test_init_in_memory(self):
4754
store = ChromaDocumentStore()
@@ -125,6 +132,92 @@ def test_same_collection_name_reinitialization(self):
125132
ChromaDocumentStore("test_1")
126133
ChromaDocumentStore("test_1")
127134

135+
def test_ensure_initialized_invalid_client_settings_raises(self):
136+
with mock.patch(
137+
"haystack_integrations.document_stores.chroma.document_store.Settings",
138+
side_effect=ValueError("bad setting"),
139+
):
140+
store = ChromaDocumentStore(client_settings={"foo": "bar"})
141+
with pytest.raises(ValueError, match="Invalid client_settings"):
142+
store._ensure_initialized()
143+
144+
def test_infer_type_from_value_fallback_for_unknown_type(self):
145+
assert ChromaDocumentStore._infer_type_from_value(None) == "keyword"
146+
assert ChromaDocumentStore._infer_type_from_value(["a", "b"]) == "keyword"
147+
148+
def test_count_unique_metadata_empty_returns_zero_counts(self):
149+
assert ChromaDocumentStore._count_unique_metadata(None, ["a", "b"]) == {"a": 0, "b": 0}
150+
assert ChromaDocumentStore._count_unique_metadata([], ["x"]) == {"x": 0}
151+
152+
def test_compute_field_min_max_skips_non_scalar_values(self):
153+
metadatas = [{"cat": ["a", "b"]}, {"cat": "X"}, {"cat": "Z"}]
154+
result = ChromaDocumentStore._compute_field_min_max(metadatas, "cat")
155+
assert result == {"min": "X", "max": "Z"}
156+
157+
@pytest.mark.parametrize(
158+
"result",
159+
[
160+
{"ids": ["1"], "documents": None, "metadatas": [{"cat": "A"}]},
161+
{"ids": ["1"], "documents": ["hello world"], "metadatas": [{"cat": "A"}]},
162+
],
163+
ids=["documents_none", "no_matches"],
164+
)
165+
def test_compute_field_unique_values_with_search_term_edge_cases(self, result):
166+
values, total = ChromaDocumentStore._compute_field_unique_values(result, "cat", "absent", 0, 10)
167+
assert values == []
168+
assert total == 0
169+
170+
def test_filter_metadata_discards_unsupported_types(self, caplog):
171+
meta = {"ok": "x", "also_ok": None, "bad": {"nested": 1}, "worse": object()}
172+
with caplog.at_level(logging.WARNING):
173+
result = ChromaDocumentStore._filter_metadata(meta)
174+
assert result == {"ok": "x", "also_ok": None}
175+
assert "bad" in caplog.text and "worse" in caplog.text
176+
177+
def test_convert_document_to_chroma_rejects_non_document(self):
178+
with pytest.raises(ValueError, match="must contain a list of objects of type Document"):
179+
ChromaDocumentStore._convert_document_to_chroma("not a document") # type: ignore[arg-type]
180+
181+
def test_convert_document_to_chroma_warns_on_sparse_embedding(self, caplog):
182+
doc = Document(content="hello", sparse_embedding=SparseEmbedding(indices=[0, 1], values=[0.1, 0.2]))
183+
with caplog.at_level(logging.WARNING):
184+
data = ChromaDocumentStore._convert_document_to_chroma(doc)
185+
assert data is not None
186+
assert "sparse_embedding" in caplog.text
187+
188+
@pytest.mark.parametrize(
189+
("result", "expected_embedding"),
190+
[
191+
(
192+
{"ids": ["1"], "documents": ["c"], "metadatas": [{"k": "v"}], "embeddings": [[0.1, 0.2]]},
193+
[0.1, 0.2],
194+
),
195+
({"ids": ["1"], "documents": ["c"], "metadatas": [None]}, None),
196+
],
197+
ids=["list_embeddings", "no_embeddings"],
198+
)
199+
def test_get_result_to_documents_embedding_variants(self, result, expected_embedding):
200+
docs = ChromaDocumentStore._get_result_to_documents(result) # type: ignore[arg-type]
201+
assert docs[0].embedding == expected_embedding
202+
203+
@pytest.mark.parametrize(
204+
("result", "check"),
205+
[
206+
({"documents": None}, lambda docs: docs == []),
207+
(
208+
{"ids": [["a", "b"]], "documents": [["c1", "c2"]], "metadatas": [[{"k": "v"}]]},
209+
lambda docs: docs[0][0].meta == {"k": "v"} and docs[0][1].meta == {},
210+
),
211+
(
212+
{"ids": [["a"]], "documents": [["c"]], "metadatas": [[{"k": "v"}]]},
213+
lambda docs: docs[0][0].embedding is None and docs[0][0].score is None,
214+
),
215+
],
216+
ids=["documents_none", "metadata_index_error", "no_embeddings_no_distances"],
217+
)
218+
def test_query_result_to_documents_edge_cases(self, result, check):
219+
assert check(ChromaDocumentStore._query_result_to_documents(result)) # type: ignore[arg-type]
220+
128221

129222
@pytest.mark.integration
130223
class TestDocumentStore(

integrations/chroma/tests/test_document_store_async.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,23 @@
1414
from haystack_integrations.document_stores.chroma import ChromaDocumentStore
1515

1616

17+
@pytest.mark.asyncio
18+
class TestDocumentStoreAsyncUnit:
19+
async def test_ensure_initialized_async_requires_host_and_port(self):
20+
store = ChromaDocumentStore()
21+
with pytest.raises(ValueError, match="Async support"):
22+
await store._ensure_initialized_async()
23+
24+
async def test_ensure_initialized_async_invalid_client_settings_raises(self):
25+
with mock.patch(
26+
"haystack_integrations.document_stores.chroma.document_store.Settings",
27+
side_effect=ValueError("bad setting"),
28+
):
29+
store = ChromaDocumentStore(host="localhost", port=8000, client_settings={"foo": "bar"})
30+
with pytest.raises(ValueError, match="Invalid client_settings"):
31+
await store._ensure_initialized_async()
32+
33+
1734
@pytest.mark.skipif(
1835
sys.platform == "win32",
1936
reason="We do not run the Chroma server on Windows and async is only supported with HTTP connections",
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import pytest
6+
7+
from haystack_integrations.document_stores.chroma.errors import ChromaDocumentStoreFilterError
8+
from haystack_integrations.document_stores.chroma.filters import (
9+
_convert_filters,
10+
_parse_comparison_condition,
11+
_parse_logical_condition,
12+
)
13+
14+
15+
def test_id_filter_with_empty_value_raises():
16+
with pytest.raises(ChromaDocumentStoreFilterError, match="id filter only supports"):
17+
_convert_filters({"field": "id", "operator": "==", "value": ""})
18+
19+
20+
@pytest.mark.parametrize(
21+
("condition", "match"),
22+
[
23+
({"conditions": []}, "'operator' key missing"),
24+
({"operator": "AND"}, "'conditions' key missing"),
25+
({"operator": "XOR", "conditions": []}, "Unknown operator"),
26+
],
27+
ids=["missing_operator", "missing_conditions", "unknown_operator"],
28+
)
29+
def test_parse_logical_condition_errors(condition, match):
30+
with pytest.raises(ChromaDocumentStoreFilterError, match=match):
31+
_parse_logical_condition(condition)
32+
33+
34+
@pytest.mark.parametrize(
35+
("condition", "match"),
36+
[
37+
({"operator": "==", "value": "x"}, "'field' key missing"),
38+
({"field": "meta.a", "value": "x"}, "'operator' key missing"),
39+
({"field": "meta.a", "operator": "=="}, "'value' key missing"),
40+
({"field": "meta.a", "operator": "~~", "value": "x"}, "Unknown operator"),
41+
],
42+
ids=["missing_field", "missing_operator", "missing_value", "unknown_operator"],
43+
)
44+
def test_parse_comparison_condition_errors(condition, match):
45+
with pytest.raises(ChromaDocumentStoreFilterError, match=match):
46+
_parse_comparison_condition(condition)

integrations/chroma/tests/test_retriever.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
from unittest import mock
6+
57
import pytest
8+
from haystack.dataclasses import Document
69
from haystack.document_stores.types import FilterPolicy
710

811
from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever, ChromaQueryTextRetriever
@@ -102,6 +105,29 @@ def test_from_dict_no_filter_policy(self, request):
102105
assert retriever.top_k == 42
103106
assert retriever.filter_policy == FilterPolicy.REPLACE # default even if not specified
104107

108+
def test_run_delegates_to_document_store_search(self):
109+
ds = mock.Mock(spec=ChromaDocumentStore)
110+
expected = [Document(content="hit")]
111+
ds.search.return_value = [expected]
112+
retriever = ChromaQueryTextRetriever(ds, top_k=5)
113+
114+
result = retriever.run(query="q")
115+
116+
ds.search.assert_called_once_with(["q"], 5, {})
117+
assert result == {"documents": expected}
118+
119+
@pytest.mark.asyncio
120+
async def test_run_async_delegates_to_document_store_search_async(self):
121+
ds = mock.Mock(spec=ChromaDocumentStore)
122+
expected = [Document(content="hit")]
123+
ds.search_async = mock.AsyncMock(return_value=[expected])
124+
retriever = ChromaQueryTextRetriever(ds, top_k=3)
125+
126+
result = await retriever.run_async(query="q")
127+
128+
ds.search_async.assert_awaited_once_with(["q"], 3, {})
129+
assert result == {"documents": expected}
130+
105131

106132
class TestChromaEmbeddingRetriever:
107133
def test_init(self, request):
@@ -165,3 +191,47 @@ def test_from_dict(self, request):
165191
assert retriever.filters == {"bar": "baz"}
166192
assert retriever.top_k == 42
167193
assert retriever.filter_policy == FilterPolicy.REPLACE
194+
195+
def test_from_dict_no_filter_policy(self, request):
196+
data = {
197+
"type": "haystack_integrations.components.retrievers.chroma.retriever.ChromaEmbeddingRetriever",
198+
"init_parameters": {
199+
"filters": {"bar": "baz"},
200+
"top_k": 42,
201+
"document_store": {
202+
"type": "haystack_integrations.document_stores.chroma.document_store.ChromaDocumentStore",
203+
"init_parameters": {
204+
"collection_name": "test_from_dict_no_filter_policy",
205+
"embedding_function": "HuggingFaceEmbeddingFunction",
206+
"persist_path": ".",
207+
"api_key": "1234567890",
208+
"distance_function": "l2",
209+
},
210+
},
211+
},
212+
}
213+
retriever = ChromaEmbeddingRetriever.from_dict(data)
214+
assert retriever.filter_policy == FilterPolicy.REPLACE
215+
216+
def test_run_delegates_to_document_store_search_embeddings(self):
217+
ds = mock.Mock(spec=ChromaDocumentStore)
218+
expected = [Document(content="hit")]
219+
ds.search_embeddings.return_value = [expected]
220+
retriever = ChromaEmbeddingRetriever(ds, top_k=7)
221+
222+
result = retriever.run(query_embedding=[0.1, 0.2])
223+
224+
ds.search_embeddings.assert_called_once_with([[0.1, 0.2]], 7, {})
225+
assert result == {"documents": expected}
226+
227+
@pytest.mark.asyncio
228+
async def test_run_async_delegates_to_document_store_search_embeddings_async(self):
229+
ds = mock.Mock(spec=ChromaDocumentStore)
230+
expected = [Document(content="hit")]
231+
ds.search_embeddings_async = mock.AsyncMock(return_value=[expected])
232+
retriever = ChromaEmbeddingRetriever(ds, top_k=4)
233+
234+
result = await retriever.run_async(query_embedding=[0.5])
235+
236+
ds.search_embeddings_async.assert_awaited_once_with([[0.5]], 4, {})
237+
assert result == {"documents": expected}

0 commit comments

Comments
 (0)