@@ -43,6 +43,93 @@ def test_to_dict(mock_auth): # noqa
4343 }
4444
4545
46+ @pytest .mark .usefixtures ("mock_auth" )
47+ @mock .patch ("haystack_integrations.document_stores.astra.document_store.AstraClient" )
48+ def test_count_documents_by_filter (mock_astra_client ):
49+ mock_index = mock_astra_client .return_value
50+ mock_index .count_documents .return_value = 2
51+
52+ store = AstraDocumentStore ()
53+
54+ count = store .count_documents_by_filter ({"field" : "meta.status" , "operator" : "==" , "value" : "draft" })
55+
56+ assert count == 2
57+ mock_index .count_documents .assert_called_once_with (
58+ filters = {"meta.status" : {"$eq" : "draft" }}, upper_bound = 1_000_000_000
59+ )
60+
61+
62+ @pytest .mark .usefixtures ("mock_auth" )
63+ @mock .patch ("haystack_integrations.document_stores.astra.document_store.AstraClient" )
64+ def test_count_unique_metadata_by_filter (mock_astra_client ):
65+ mock_index = mock_astra_client .return_value
66+ mock_index .distinct .side_effect = [["news" , "docs" , ["docs" , "faq" ], None ], [1 , 2 , 2 ]]
67+
68+ store = AstraDocumentStore ()
69+
70+ counts = store .count_unique_metadata_by_filter (
71+ {"field" : "meta.status" , "operator" : "==" , "value" : "published" }, ["category" , "priority" ]
72+ )
73+
74+ assert counts == {"category" : 3 , "priority" : 2 }
75+ assert mock_index .distinct .call_args_list == [
76+ mock .call ("meta.category" , filters = {"meta.status" : {"$eq" : "published" }}),
77+ mock .call ("meta.priority" , filters = {"meta.status" : {"$eq" : "published" }}),
78+ ]
79+
80+
81+ @pytest .mark .usefixtures ("mock_auth" )
82+ @mock .patch ("haystack_integrations.document_stores.astra.document_store.AstraClient" )
83+ def test_get_metadata_fields_info (mock_astra_client ):
84+ mock_index = mock_astra_client .return_value
85+ mock_index .find_documents .return_value = [
86+ {"content" : "Doc 1" , "meta" : {"category" : "news" , "priority" : 1 , "active" : True }},
87+ {"content" : "Doc 2" , "meta" : {"category" : "docs" , "priority" : 2.5 , "tags" : ["a" , "b" ]}},
88+ ]
89+
90+ store = AstraDocumentStore ()
91+
92+ fields_info = store .get_metadata_fields_info ()
93+
94+ assert fields_info == {
95+ "content" : {"type" : "text" },
96+ "category" : {"type" : "keyword" },
97+ "priority" : {"type" : "long" },
98+ "active" : {"type" : "boolean" },
99+ "tags" : {"type" : "keyword" },
100+ }
101+ mock_index .find_documents .assert_called_once_with ({}, projection = {"content" : 1 , "meta" : 1 })
102+
103+
104+ @pytest .mark .usefixtures ("mock_auth" )
105+ @mock .patch ("haystack_integrations.document_stores.astra.document_store.AstraClient" )
106+ def test_get_metadata_field_min_max (mock_astra_client ):
107+ mock_index = mock_astra_client .return_value
108+ mock_index .distinct .return_value = [10 , 3 , 7 ]
109+
110+ store = AstraDocumentStore ()
111+
112+ result = store .get_metadata_field_min_max ("priority" )
113+
114+ assert result == {"min" : 3 , "max" : 10 }
115+ mock_index .distinct .assert_called_once_with ("meta.priority" )
116+
117+
118+ @pytest .mark .usefixtures ("mock_auth" )
119+ @mock .patch ("haystack_integrations.document_stores.astra.document_store.AstraClient" )
120+ def test_get_metadata_field_unique_values (mock_astra_client ):
121+ mock_index = mock_astra_client .return_value
122+ mock_index .distinct .return_value = ["Beta" , "alpha" , ["gamma" , "alphabet" ], None ]
123+
124+ store = AstraDocumentStore ()
125+
126+ values , total_count = store .get_metadata_field_unique_values ("category" , search_term = "alp" , from_ = 0 , size = 5 )
127+
128+ assert values == ["alpha" , "alphabet" ]
129+ assert total_count == 2
130+ mock_index .distinct .assert_called_once_with ("meta.category" )
131+
132+
46133@pytest .mark .integration
47134@pytest .mark .skipif (
48135 os .environ .get ("ASTRA_DB_APPLICATION_TOKEN" , "" ) == "" , reason = "ASTRA_DB_APPLICATION_TOKEN env var not set"
@@ -204,6 +291,80 @@ def test_filter_documents_by_in_operator(self, document_store):
204291 self .assert_documents_are_equal ([result [0 ]], [docs [0 ]])
205292 self .assert_documents_are_equal ([result [1 ]], [docs [1 ]])
206293
294+ def test_count_documents_by_filter (self , document_store : AstraDocumentStore ):
295+ docs = [
296+ Document (id = "1" , content = "Doc 1" , meta = {"category" : "news" , "status" : "published" , "priority" : 3 }),
297+ Document (id = "2" , content = "Doc 2" , meta = {"category" : "docs" , "status" : "draft" , "priority" : 1 }),
298+ Document (id = "3" , content = "Doc 3" , meta = {"category" : "news" , "status" : "published" , "priority" : 5 }),
299+ ]
300+ document_store .write_documents (docs )
301+
302+ count = document_store .count_documents_by_filter (
303+ {"field" : "meta.status" , "operator" : "==" , "value" : "published" }
304+ )
305+
306+ assert count == 2
307+
308+ def test_count_unique_metadata_by_filter (self , document_store : AstraDocumentStore ):
309+ docs = [
310+ Document (id = "1" , content = "Doc 1" , meta = {"category" : "news" , "status" : "published" , "priority" : 1 }),
311+ Document (id = "2" , content = "Doc 2" , meta = {"category" : "docs" , "status" : "published" , "priority" : 2 }),
312+ Document (id = "3" , content = "Doc 3" , meta = {"category" : "news" , "status" : "published" , "priority" : 2 }),
313+ Document (id = "4" , content = "Doc 4" , meta = {"category" : "faq" , "status" : "draft" , "priority" : 3 }),
314+ ]
315+ document_store .write_documents (docs )
316+
317+ counts = document_store .count_unique_metadata_by_filter (
318+ {"field" : "meta.status" , "operator" : "==" , "value" : "published" },
319+ ["category" , "priority" ],
320+ )
321+
322+ assert counts == {"category" : 2 , "priority" : 2 }
323+
324+ def test_get_metadata_fields_info (self , document_store : AstraDocumentStore ):
325+ docs = [
326+ Document (id = "1" , content = "Doc 1" , meta = {"category" : "news" , "status" : "published" , "priority" : 1 }),
327+ Document (id = "2" , content = "Doc 2" , meta = {"category" : "docs" , "status" : "draft" , "priority" : 2 }),
328+ ]
329+ document_store .write_documents (docs )
330+
331+ fields_info = document_store .get_metadata_fields_info ()
332+
333+ assert fields_info == {
334+ "content" : {"type" : "text" },
335+ "category" : {"type" : "keyword" },
336+ "status" : {"type" : "keyword" },
337+ "priority" : {"type" : "long" },
338+ }
339+
340+ def test_get_metadata_field_min_max (self , document_store : AstraDocumentStore ):
341+ docs = [
342+ Document (id = "1" , content = "Doc 1" , meta = {"priority" : 3 }),
343+ Document (id = "2" , content = "Doc 2" , meta = {"priority" : 1 }),
344+ Document (id = "3" , content = "Doc 3" , meta = {"priority" : 7 }),
345+ ]
346+ document_store .write_documents (docs )
347+
348+ result = document_store .get_metadata_field_min_max ("priority" )
349+
350+ assert result == {"min" : 1 , "max" : 7 }
351+
352+ def test_get_metadata_field_unique_values (self , document_store : AstraDocumentStore ):
353+ docs = [
354+ Document (id = "1" , content = "Doc 1" , meta = {"category" : "alpha" }),
355+ Document (id = "2" , content = "Doc 2" , meta = {"category" : "beta" }),
356+ Document (id = "3" , content = "Doc 3" , meta = {"category" : "alphabet" }),
357+ Document (id = "4" , content = "Doc 4" , meta = {"category" : "gamma" }),
358+ ]
359+ document_store .write_documents (docs )
360+
361+ values , total_count = document_store .get_metadata_field_unique_values (
362+ "category" , search_term = "alp" , from_ = 0 , size = 10
363+ )
364+
365+ assert values == ["alpha" , "alphabet" ]
366+ assert total_count == 2
367+
207368 @pytest .mark .skip (reason = "Unsupported filter operator not." )
208369 def test_not_operator (self , document_store , filterable_docs ):
209370 pass
0 commit comments