1414from haystack .dataclasses .document import Document
1515from haystack .document_stores .errors import DocumentStoreError
1616from haystack .testing .document_store import (
17+ CountDocumentsByFilterTest ,
18+ CountUniqueMetadataByFilterTest ,
1719 DocumentStoreBaseExtendedTests ,
20+ GetMetadataFieldMinMaxTest ,
21+ GetMetadataFieldsInfoTest ,
22+ GetMetadataFieldUniqueValuesTest ,
1823 create_filterable_docs ,
1924)
2025from haystack .utils .auth import Secret
@@ -45,7 +50,14 @@ def test_init_is_lazy(_mock_client):
4550
4651
4752@pytest .mark .integration
48- class TestWeaviateDocumentStore (DocumentStoreBaseExtendedTests ):
53+ class TestWeaviateDocumentStore (
54+ DocumentStoreBaseExtendedTests ,
55+ CountDocumentsByFilterTest ,
56+ CountUniqueMetadataByFilterTest ,
57+ GetMetadataFieldsInfoTest ,
58+ GetMetadataFieldMinMaxTest ,
59+ GetMetadataFieldUniqueValuesTest ,
60+ ):
4961 @pytest .fixture
5062 def document_store (self , request ) -> Generator [WeaviateDocumentStore , None , None ]:
5163 # Use a different index for each test so we can run them in parallel
@@ -876,31 +888,6 @@ def test_update_by_filter_with_pagination(self, document_store, monkeypatch):
876888 assert "index" in doc .meta
877889 assert 0 <= doc .meta ["index" ] < 250
878890
879- def test_count_documents_by_filter (self , document_store ):
880- docs = [
881- Document (content = "Doc 1" , meta = {"category" : "TypeA" }),
882- Document (content = "Doc 2" , meta = {"category" : "TypeB" }),
883- Document (content = "Doc 3" , meta = {"category" : "TypeA" }),
884- Document (content = "Doc 4" , meta = {"category" : "TypeA" }),
885- ]
886- document_store .write_documents (docs )
887- assert document_store .count_documents () == 4
888-
889- count = document_store .count_documents_by_filter (
890- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" }
891- )
892- assert count == 3
893-
894- count = document_store .count_documents_by_filter (
895- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeB" }
896- )
897- assert count == 1
898-
899- count = document_store .count_documents_by_filter (
900- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeC" }
901- )
902- assert count == 0
903-
904891 def test_get_metadata_fields_info (self , document_store ):
905892 fields_info = document_store .get_metadata_fields_info ()
906893
@@ -920,30 +907,6 @@ def test_get_metadata_fields_info(self, document_store):
920907 assert "status" in fields_info
921908 assert fields_info ["status" ]["type" ] == "text"
922909
923- def test_get_metadata_field_min_max (self , document_store ):
924- docs = [
925- Document (content = "Doc 1" , meta = {"number" : 10 }),
926- Document (content = "Doc 2" , meta = {"number" : 5 }),
927- Document (content = "Doc 3" , meta = {"number" : 20 }),
928- Document (content = "Doc 4" , meta = {"number" : 15 }),
929- ]
930- document_store .write_documents (docs )
931-
932- result = document_store .get_metadata_field_min_max ("number" )
933- assert result ["min" ] == 5
934- assert result ["max" ] == 20
935-
936- def test_get_metadata_field_min_max_with_meta_prefix (self , document_store ):
937- docs = [
938- Document (content = "Doc 1" , meta = {"number" : 100 }),
939- Document (content = "Doc 2" , meta = {"number" : 200 }),
940- ]
941- document_store .write_documents (docs )
942-
943- result = document_store .get_metadata_field_min_max ("meta.number" )
944- assert result ["min" ] == 100
945- assert result ["max" ] == 200
946-
947910 def test_get_metadata_field_min_max_unsupported_type (self , document_store ):
948911 with pytest .raises (ValueError , match = "doesn't support min/max aggregation" ):
949912 document_store .get_metadata_field_min_max ("category" )
@@ -952,34 +915,6 @@ def test_get_metadata_field_min_max_field_not_found(self, document_store):
952915 with pytest .raises (ValueError , match = "not found in collection schema" ):
953916 document_store .get_metadata_field_min_max ("nonexistent_field" )
954917
955- def test_count_unique_metadata_by_filter (self , document_store ):
956- docs = [
957- Document (content = "Doc 1" , meta = {"category" : "TypeA" , "status" : "draft" }),
958- Document (content = "Doc 2" , meta = {"category" : "TypeB" , "status" : "published" }),
959- Document (content = "Doc 3" , meta = {"category" : "TypeA" , "status" : "draft" }),
960- Document (content = "Doc 4" , meta = {"category" : "TypeC" , "status" : "published" }),
961- Document (content = "Doc 5" , meta = {"category" : "TypeA" , "status" : "archived" }),
962- ]
963- document_store .write_documents (docs )
964-
965- result = document_store .count_unique_metadata_by_filter (
966- filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" }, metadata_fields = ["status" ]
967- )
968- assert result ["status" ] == 2
969-
970- result = document_store .count_unique_metadata_by_filter (
971- filters = {
972- "operator" : "OR" ,
973- "conditions" : [
974- {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" },
975- {"field" : "meta.category" , "operator" : "==" , "value" : "TypeB" },
976- ],
977- },
978- metadata_fields = ["category" , "status" ],
979- )
980- assert result ["category" ] == 2
981- assert result ["status" ] == 3
982-
983918 def test_count_unique_metadata_by_filter_with_meta_prefix (self , document_store ):
984919 docs = [
985920 Document (content = "Doc 1" , meta = {"category" : "TypeA" }),
@@ -1012,20 +947,6 @@ def test_count_unique_metadata_by_filter_field_not_found(self, document_store):
1012947 metadata_fields = ["nonexistent_field" ],
1013948 )
1014949
1015- def test_get_metadata_field_unique_values (self , document_store ):
1016- docs = [
1017- Document (content = "Doc 1" , meta = {"category" : "TypeA" }),
1018- Document (content = "Doc 2" , meta = {"category" : "TypeB" }),
1019- Document (content = "Doc 3" , meta = {"category" : "TypeA" }),
1020- Document (content = "Doc 4" , meta = {"category" : "TypeC" }),
1021- Document (content = "Doc 5" , meta = {"category" : "TypeB" }),
1022- ]
1023- document_store .write_documents (docs )
1024-
1025- values , total_count = document_store .get_metadata_field_unique_values ("category" )
1026- assert total_count == 3
1027- assert set (values ) == {"TypeA" , "TypeB" , "TypeC" }
1028-
1029950 def test_get_metadata_field_unique_values_with_meta_prefix (self , document_store ):
1030951 docs = [
1031952 Document (content = "Doc 1" , meta = {"category" : "TypeA" }),
@@ -1078,3 +999,138 @@ def test_get_metadata_field_unique_values_empty_result(self, document_store):
1078999 values , total_count = document_store .get_metadata_field_unique_values ("category" )
10791000 assert total_count == 0
10801001 assert values == []
1002+
1003+ # --- Overrides of mixin tests to account for Weaviate-specific behaviour ---
1004+
1005+ def test_count_documents_by_filter_simple (self , document_store ):
1006+ docs = [
1007+ Document (content = "Doc 1" , meta = {"category" : "TypeA" }),
1008+ Document (content = "Doc 2" , meta = {"category" : "TypeB" }),
1009+ Document (content = "Doc 3" , meta = {"category" : "TypeA" }),
1010+ Document (content = "Doc 4" , meta = {"category" : "TypeA" }),
1011+ ]
1012+ document_store .write_documents (docs )
1013+ assert document_store .count_documents () == 4
1014+
1015+ count = document_store .count_documents_by_filter (
1016+ filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" }
1017+ )
1018+ assert count == 3
1019+
1020+ count = document_store .count_documents_by_filter (
1021+ filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeB" }
1022+ )
1023+ assert count == 1
1024+
1025+ count = document_store .count_documents_by_filter (
1026+ filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeC" }
1027+ )
1028+ assert count == 0
1029+
1030+ def test_count_documents_by_filter_compound (self , document_store ):
1031+ """Test count_documents_by_filter() with AND filter."""
1032+ docs = [
1033+ Document (content = "Doc 1" , meta = {"category" : "TypeA" , "status" : "active" }),
1034+ Document (content = "Doc 2" , meta = {"category" : "TypeB" , "status" : "active" }),
1035+ Document (content = "Doc 3" , meta = {"category" : "TypeA" , "status" : "inactive" }),
1036+ Document (content = "Doc 4" , meta = {"category" : "TypeA" , "status" : "active" }),
1037+ ]
1038+ document_store .write_documents (docs )
1039+
1040+ count = document_store .count_documents_by_filter ( # type:ignore[attr-defined]
1041+ filters = {
1042+ "operator" : "AND" ,
1043+ "conditions" : [
1044+ {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" },
1045+ {"field" : "meta.status" , "operator" : "==" , "value" : "active" },
1046+ ],
1047+ }
1048+ )
1049+ assert count == 2
1050+
1051+ def test_count_documents_by_filter_empty_collection (self , document_store ):
1052+ """Test count_documents_by_filter() on an empty store."""
1053+ assert document_store .count_documents () == 0
1054+
1055+ count = document_store .count_documents_by_filter ( # type:ignore[attr-defined]
1056+ filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" }
1057+ )
1058+ assert count == 0
1059+
1060+ def test_count_unique_metadata_by_filter_with_filter (self , document_store ):
1061+ docs = [
1062+ Document (content = "Doc 1" , meta = {"category" : "TypeA" , "status" : "draft" }),
1063+ Document (content = "Doc 2" , meta = {"category" : "TypeB" , "status" : "published" }),
1064+ Document (content = "Doc 3" , meta = {"category" : "TypeA" , "status" : "draft" }),
1065+ Document (content = "Doc 4" , meta = {"category" : "TypeC" , "status" : "published" }),
1066+ Document (content = "Doc 5" , meta = {"category" : "TypeA" , "status" : "archived" }),
1067+ ]
1068+ document_store .write_documents (docs )
1069+
1070+ result = document_store .count_unique_metadata_by_filter (
1071+ filters = {"field" : "meta.category" , "operator" : "==" , "value" : "TypeA" }, metadata_fields = ["status" ]
1072+ )
1073+ assert result ["status" ] == 2
1074+
1075+ def test_count_unique_metadata_by_filter_with_multiple_filters (self , document_store ):
1076+ """Test counting with multiple filters"""
1077+ docs = [
1078+ Document (content = "Doc 1" , meta = {"category" : "TypeA" , "year" : 2023 }),
1079+ Document (content = "Doc 2" , meta = {"category" : "TypeA" , "year" : 2024 }),
1080+ Document (content = "Doc 3" , meta = {"category" : "TypeB" , "year" : 2023 }),
1081+ Document (content = "Doc 4" , meta = {"category" : "TypeB" , "year" : 2024 }),
1082+ ]
1083+ document_store .write_documents (docs )
1084+ count = document_store .count_documents_by_filter ( # type:ignore[attr-defined]
1085+ filters = {
1086+ "operator" : "AND" ,
1087+ "conditions" : [
1088+ {"field" : "meta.category" , "operator" : "==" , "value" : "TypeB" },
1089+ {"field" : "meta.year" , "operator" : "==" , "value" : 2023 },
1090+ ],
1091+ }
1092+ )
1093+ assert count == 1
1094+
1095+ @staticmethod
1096+ def test_get_metadata_field_min_max_empty_collection (document_store ):
1097+ # Weaviate requires fields to be declared in the schema before querying them.
1098+ # The mixin uses "priority" which is not in the pre-defined schema, so we use
1099+ # "number" which IS declared in the fixture's collection_settings.
1100+ assert document_store .count_documents () == 0
1101+ result = document_store .get_metadata_field_min_max ("number" )
1102+ assert result ["min" ] == 0
1103+ assert result ["max" ] == 0
1104+
1105+ @staticmethod
1106+ def test_get_metadata_fields_info_empty_collection (document_store ):
1107+ # Weaviate collections always carry a fixed schema regardless of whether any
1108+ # documents have been written. The fixture pre-declares "number", "date",
1109+ # "category" and "status", so get_metadata_fields_info() will return those
1110+ # even on an empty collection instead of the empty dict the generic mixin expects.
1111+ assert document_store .count_documents () == 0
1112+ fields_info = document_store .get_metadata_fields_info ()
1113+ assert set (fields_info .keys ()) == {"number" , "date" , "category" , "status" }
1114+
1115+ @staticmethod
1116+ def test_count_unique_metadata_by_filter_all_documents (document_store ):
1117+ # The generic mixin passes filters={} (empty dict) to mean "no filter".
1118+ # Weaviate's convert_filters() does not accept an empty dict; a filter that
1119+ # explicitly selects all documents must be used instead.
1120+ docs = [
1121+ Document (content = "Doc 1" , meta = {"category" : "A" , "status" : "active" , "priority" : 1 }),
1122+ Document (content = "Doc 2" , meta = {"category" : "B" , "status" : "active" , "priority" : 2 }),
1123+ Document (content = "Doc 3" , meta = {"category" : "A" , "status" : "inactive" , "priority" : 1 }),
1124+ Document (content = "Doc 4" , meta = {"category" : "A" , "status" : "active" , "priority" : 3 }),
1125+ Document (content = "Doc 5" , meta = {"category" : "C" , "status" : "active" , "priority" : 2 }),
1126+ ]
1127+ document_store .write_documents (docs )
1128+ assert document_store .count_documents () == 5
1129+
1130+ counts = document_store .count_unique_metadata_by_filter (
1131+ filters = {"field" : "meta.priority" , "operator" : ">=" , "value" : 1 },
1132+ metadata_fields = ["category" , "status" , "priority" ],
1133+ )
1134+ assert counts ["category" ] == 3
1135+ assert counts ["status" ] == 2
1136+ assert counts ["priority" ] == 3
0 commit comments