1010from haystack .dataclasses .document import Document
1111from haystack .document_stores .errors import DocumentStoreError , DuplicateDocumentError
1212from haystack .document_stores .types import DuplicatePolicy
13- from haystack .testing .document_store import DocumentStoreBaseExtendedTests
13+ from haystack .testing .document_store import (
14+ CountDocumentsByFilterTest ,
15+ CountUniqueMetadataByFilterTest ,
16+ DocumentStoreBaseExtendedTests ,
17+ GetMetadataFieldMinMaxTest ,
18+ GetMetadataFieldsInfoTest ,
19+ GetMetadataFieldUniqueValuesTest ,
20+ )
1421from haystack .utils import Secret
1522from haystack .utils .auth import TokenSecret
1623
@@ -226,7 +233,14 @@ def test_client_initialization_with_api_key_string(_mock_async_es, _mock_es):
226233
227234
228235@pytest .mark .integration
229- class TestDocumentStore (DocumentStoreBaseExtendedTests ):
236+ class TestDocumentStore (
237+ DocumentStoreBaseExtendedTests ,
238+ CountDocumentsByFilterTest ,
239+ CountUniqueMetadataByFilterTest ,
240+ GetMetadataFieldsInfoTest ,
241+ GetMetadataFieldMinMaxTest ,
242+ GetMetadataFieldUniqueValuesTest ,
243+ ):
230244 """
231245 Common test cases will be provided by `DocumentStoreBaseExtendedTests` but
232246 you can add more to this class.
@@ -528,31 +542,6 @@ def test_delete_all_documents_index_recreation(self, document_store: Elasticsear
528542 assert len (results ) == 1
529543 assert results [0 ].content == "New document after delete all"
530544
531- def test_count_documents_by_filter (self , document_store : ElasticsearchDocumentStore ):
532- docs = [
533- Document (content = "Doc 1" , meta = {"category" : "A" , "status" : "active" }),
534- Document (content = "Doc 2" , meta = {"category" : "B" , "status" : "active" }),
535- Document (content = "Doc 3" , meta = {"category" : "A" , "status" : "inactive" }),
536- Document (content = "Doc 4" , meta = {"category" : "A" , "status" : "active" }),
537- ]
538- document_store .write_documents (docs )
539-
540- count_a = document_store .count_documents_by_filter (
541- filters = {"field" : "category" , "operator" : "==" , "value" : "A" }
542- )
543- assert count_a == 3
544-
545- count_a_active = document_store .count_documents_by_filter (
546- filters = {
547- "operator" : "AND" ,
548- "conditions" : [
549- {"field" : "category" , "operator" : "==" , "value" : "A" },
550- {"field" : "status" , "operator" : "==" , "value" : "active" },
551- ],
552- }
553- )
554- assert count_a_active == 2
555-
556545 def test_count_unique_metadata_by_filter (self , document_store : ElasticsearchDocumentStore ):
557546 docs = [
558547 Document (content = "Doc 1" , meta = {"category" : "A" , "status" : "active" , "priority" : 1 }),
@@ -563,47 +552,6 @@ def test_count_unique_metadata_by_filter(self, document_store: ElasticsearchDocu
563552 ]
564553 document_store .write_documents (docs )
565554
566- # count distinct values for all documents
567- distinct_counts = document_store .count_unique_metadata_by_filter (
568- filters = {}, metadata_fields = ["category" , "status" , "priority" ]
569- )
570- assert distinct_counts ["category" ] == 3 # A, B, C
571- assert distinct_counts ["status" ] == 2 # active, inactive
572- assert distinct_counts ["priority" ] == 3 # 1, 2, 3
573-
574- # distinct values for documents with category="A"
575- distinct_counts_a = document_store .count_unique_metadata_by_filter (
576- filters = {"field" : "category" , "operator" : "==" , "value" : "A" },
577- metadata_fields = ["category" , "status" , "priority" ],
578- )
579- assert distinct_counts_a ["category" ] == 1 # Only A
580- assert distinct_counts_a ["status" ] == 2 # active, inactive
581- assert distinct_counts_a ["priority" ] == 2 # 1, 3
582-
583- # distinct values for documents with status="active"
584- distinct_counts_active = document_store .count_unique_metadata_by_filter (
585- filters = {"field" : "status" , "operator" : "==" , "value" : "active" },
586- metadata_fields = ["category" , "status" , "priority" ],
587- )
588- assert distinct_counts_active ["category" ] == 3 # A, B, C
589- assert distinct_counts_active ["status" ] == 1 # Only active
590- assert distinct_counts_active ["priority" ] == 3 # 1, 2, 3
591-
592- # distinct values with complex filter (category="A" AND status="active")
593- distinct_counts_a_active = document_store .count_unique_metadata_by_filter (
594- filters = {
595- "operator" : "AND" ,
596- "conditions" : [
597- {"field" : "category" , "operator" : "==" , "value" : "A" },
598- {"field" : "status" , "operator" : "==" , "value" : "active" },
599- ],
600- },
601- metadata_fields = ["category" , "status" , "priority" ],
602- )
603- assert distinct_counts_a_active ["category" ] == 1 # Only A
604- assert distinct_counts_a_active ["status" ] == 1 # Only active
605- assert distinct_counts_a_active ["priority" ] == 2 # 1, 3
606-
607555 # test with only a subset of fields
608556 distinct_counts_subset = document_store .count_unique_metadata_by_filter (
609557 filters = {}, metadata_fields = ["category" , "status" ]
@@ -642,36 +590,6 @@ def test_get_metadata_fields_info(self, document_store: ElasticsearchDocumentSto
642590 assert fields_info ["status" ]["type" ] == "keyword"
643591 assert fields_info ["priority" ]["type" ] == "long"
644592
645- def test_get_metadata_field_min_max (self , document_store : ElasticsearchDocumentStore ):
646- docs = [
647- Document (content = "Doc 1" , meta = {"priority" : 1 , "age" : 10 }),
648- Document (content = "Doc 2" , meta = {"priority" : 5 , "age" : 20 }),
649- Document (content = "Doc 3" , meta = {"priority" : 3 , "age" : 15 }),
650- Document (content = "Doc 4" , meta = {"priority" : 10 , "age" : 5 }),
651- Document (content = "Doc 6" , meta = {"rating" : 10.5 }),
652- Document (content = "Doc 7" , meta = {"rating" : 20.3 }),
653- Document (content = "Doc 8" , meta = {"rating" : 15.7 }),
654- Document (content = "Doc 9" , meta = {"rating" : 5.2 }),
655- ]
656- document_store .write_documents (docs )
657-
658- # test with "meta." prefix for integer field
659- min_max_priority = document_store .get_metadata_field_min_max ("meta.priority" )
660- assert min_max_priority ["min" ] == 1
661- assert min_max_priority ["max" ] == 10
662-
663- # test with single value
664- single_doc = [Document (content = "Doc 5" , meta = {"single_value" : 42 })]
665- document_store .write_documents (single_doc )
666- min_max_single = document_store .get_metadata_field_min_max ("meta.single_value" )
667- assert min_max_single ["min" ] == 42
668- assert min_max_single ["max" ] == 42
669-
670- # test with float values
671- min_max_score = document_store .get_metadata_field_min_max ("meta.rating" )
672- assert min_max_score ["min" ] == pytest .approx (5.2 )
673- assert min_max_score ["max" ] == pytest .approx (20.3 )
674-
675593 def test_get_metadata_field_unique_values (self , document_store : ElasticsearchDocumentStore ):
676594 docs = [
677595 Document (content = "Python programming" , meta = {"category" : "A" , "language" : "Python" }),
0 commit comments