99
1010import pytest
1111from chromadb .api .shared_system_client import SharedSystemClient
12- from haystack .dataclasses import ByteStream , Document
12+ from haystack .dataclasses import ByteStream , Document , SparseEmbedding
1313from haystack .testing .document_store import (
1414 TEST_EMBEDDING_1 ,
1515 CountDocumentsByFilterTest ,
2727)
2828
2929from haystack_integrations .document_stores .chroma import ChromaDocumentStore
30+ from haystack_integrations .document_stores .chroma .errors import ChromaDocumentStoreConfigError
31+ from haystack_integrations .document_stores .chroma .utils import get_embedding_function
3032
3133
3234@pytest .fixture
@@ -42,6 +44,11 @@ def clear_chroma_system_cache():
4244 SharedSystemClient .clear_system_cache ()
4345
4446
47+ def test_get_embedding_function_invalid_name_raises ():
48+ with pytest .raises (ChromaDocumentStoreConfigError , match = "Invalid function name" ):
49+ get_embedding_function ("NonExistentEmbeddingFunction" )
50+
51+
4552class TestDocumentStoreUnit :
4653 def test_init_in_memory (self ):
4754 store = ChromaDocumentStore ()
@@ -125,6 +132,92 @@ def test_same_collection_name_reinitialization(self):
125132 ChromaDocumentStore ("test_1" )
126133 ChromaDocumentStore ("test_1" )
127134
135+ def test_ensure_initialized_invalid_client_settings_raises (self ):
136+ with mock .patch (
137+ "haystack_integrations.document_stores.chroma.document_store.Settings" ,
138+ side_effect = ValueError ("bad setting" ),
139+ ):
140+ store = ChromaDocumentStore (client_settings = {"foo" : "bar" })
141+ with pytest .raises (ValueError , match = "Invalid client_settings" ):
142+ store ._ensure_initialized ()
143+
144+ def test_infer_type_from_value_fallback_for_unknown_type (self ):
145+ assert ChromaDocumentStore ._infer_type_from_value (None ) == "keyword"
146+ assert ChromaDocumentStore ._infer_type_from_value (["a" , "b" ]) == "keyword"
147+
148+ def test_count_unique_metadata_empty_returns_zero_counts (self ):
149+ assert ChromaDocumentStore ._count_unique_metadata (None , ["a" , "b" ]) == {"a" : 0 , "b" : 0 }
150+ assert ChromaDocumentStore ._count_unique_metadata ([], ["x" ]) == {"x" : 0 }
151+
152+ def test_compute_field_min_max_skips_non_scalar_values (self ):
153+ metadatas = [{"cat" : ["a" , "b" ]}, {"cat" : "X" }, {"cat" : "Z" }]
154+ result = ChromaDocumentStore ._compute_field_min_max (metadatas , "cat" )
155+ assert result == {"min" : "X" , "max" : "Z" }
156+
157+ @pytest .mark .parametrize (
158+ "result" ,
159+ [
160+ {"ids" : ["1" ], "documents" : None , "metadatas" : [{"cat" : "A" }]},
161+ {"ids" : ["1" ], "documents" : ["hello world" ], "metadatas" : [{"cat" : "A" }]},
162+ ],
163+ ids = ["documents_none" , "no_matches" ],
164+ )
165+ def test_compute_field_unique_values_with_search_term_edge_cases (self , result ):
166+ values , total = ChromaDocumentStore ._compute_field_unique_values (result , "cat" , "absent" , 0 , 10 )
167+ assert values == []
168+ assert total == 0
169+
170+ def test_filter_metadata_discards_unsupported_types (self , caplog ):
171+ meta = {"ok" : "x" , "also_ok" : None , "bad" : {"nested" : 1 }, "worse" : object ()}
172+ with caplog .at_level (logging .WARNING ):
173+ result = ChromaDocumentStore ._filter_metadata (meta )
174+ assert result == {"ok" : "x" , "also_ok" : None }
175+ assert "bad" in caplog .text and "worse" in caplog .text
176+
177+ def test_convert_document_to_chroma_rejects_non_document (self ):
178+ with pytest .raises (ValueError , match = "must contain a list of objects of type Document" ):
179+ ChromaDocumentStore ._convert_document_to_chroma ("not a document" ) # type: ignore[arg-type]
180+
181+ def test_convert_document_to_chroma_warns_on_sparse_embedding (self , caplog ):
182+ doc = Document (content = "hello" , sparse_embedding = SparseEmbedding (indices = [0 , 1 ], values = [0.1 , 0.2 ]))
183+ with caplog .at_level (logging .WARNING ):
184+ data = ChromaDocumentStore ._convert_document_to_chroma (doc )
185+ assert data is not None
186+ assert "sparse_embedding" in caplog .text
187+
188+ @pytest .mark .parametrize (
189+ ("result" , "expected_embedding" ),
190+ [
191+ (
192+ {"ids" : ["1" ], "documents" : ["c" ], "metadatas" : [{"k" : "v" }], "embeddings" : [[0.1 , 0.2 ]]},
193+ [0.1 , 0.2 ],
194+ ),
195+ ({"ids" : ["1" ], "documents" : ["c" ], "metadatas" : [None ]}, None ),
196+ ],
197+ ids = ["list_embeddings" , "no_embeddings" ],
198+ )
199+ def test_get_result_to_documents_embedding_variants (self , result , expected_embedding ):
200+ docs = ChromaDocumentStore ._get_result_to_documents (result ) # type: ignore[arg-type]
201+ assert docs [0 ].embedding == expected_embedding
202+
203+ @pytest .mark .parametrize (
204+ ("result" , "check" ),
205+ [
206+ ({"documents" : None }, lambda docs : docs == []),
207+ (
208+ {"ids" : [["a" , "b" ]], "documents" : [["c1" , "c2" ]], "metadatas" : [[{"k" : "v" }]]},
209+ lambda docs : docs [0 ][0 ].meta == {"k" : "v" } and docs [0 ][1 ].meta == {},
210+ ),
211+ (
212+ {"ids" : [["a" ]], "documents" : [["c" ]], "metadatas" : [[{"k" : "v" }]]},
213+ lambda docs : docs [0 ][0 ].embedding is None and docs [0 ][0 ].score is None ,
214+ ),
215+ ],
216+ ids = ["documents_none" , "metadata_index_error" , "no_embeddings_no_distances" ],
217+ )
218+ def test_query_result_to_documents_edge_cases (self , result , check ):
219+ assert check (ChromaDocumentStore ._query_result_to_documents (result )) # type: ignore[arg-type]
220+
128221
129222@pytest .mark .integration
130223class TestDocumentStore (
0 commit comments