Skip to content

Commit c18e60d

Browse files
Test: WeaviateDocumentStore rely on Mixin tests (#3005)
* adding Mixin tests * overriding some Mixin tests * overriding some Mixin tests * chore: update haystack-ai dependency to >=2.26.1 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * increasing lowest haystack dependency * ruff fix * formatting --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 2093503 commit c18e60d

2 files changed

Lines changed: 149 additions & 93 deletions

File tree

integrations/weaviate/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ classifiers = [
2424
"Programming Language :: Python :: Implementation :: PyPy",
2525
]
2626
dependencies = [
27-
"haystack-ai>=2.26.0",
27+
"haystack-ai>=2.26.1",
2828
"weaviate-client>=4.20",
2929
"python-dateutil",
3030
]

integrations/weaviate/tests/test_document_store.py

Lines changed: 148 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,12 @@
1414
from haystack.dataclasses.document import Document
1515
from haystack.document_stores.errors import DocumentStoreError
1616
from haystack.testing.document_store import (
17+
CountDocumentsByFilterTest,
18+
CountUniqueMetadataByFilterTest,
1719
DocumentStoreBaseExtendedTests,
20+
GetMetadataFieldMinMaxTest,
21+
GetMetadataFieldsInfoTest,
22+
GetMetadataFieldUniqueValuesTest,
1823
create_filterable_docs,
1924
)
2025
from haystack.utils.auth import Secret
@@ -45,7 +50,14 @@ def test_init_is_lazy(_mock_client):
4550

4651

4752
@pytest.mark.integration
48-
class TestWeaviateDocumentStore(DocumentStoreBaseExtendedTests):
53+
class TestWeaviateDocumentStore(
54+
DocumentStoreBaseExtendedTests,
55+
CountDocumentsByFilterTest,
56+
CountUniqueMetadataByFilterTest,
57+
GetMetadataFieldsInfoTest,
58+
GetMetadataFieldMinMaxTest,
59+
GetMetadataFieldUniqueValuesTest,
60+
):
4961
@pytest.fixture
5062
def document_store(self, request) -> Generator[WeaviateDocumentStore, None, None]:
5163
# Use a different index for each test so we can run them in parallel
@@ -876,31 +888,6 @@ def test_update_by_filter_with_pagination(self, document_store, monkeypatch):
876888
assert "index" in doc.meta
877889
assert 0 <= doc.meta["index"] < 250
878890

879-
def test_count_documents_by_filter(self, document_store):
880-
docs = [
881-
Document(content="Doc 1", meta={"category": "TypeA"}),
882-
Document(content="Doc 2", meta={"category": "TypeB"}),
883-
Document(content="Doc 3", meta={"category": "TypeA"}),
884-
Document(content="Doc 4", meta={"category": "TypeA"}),
885-
]
886-
document_store.write_documents(docs)
887-
assert document_store.count_documents() == 4
888-
889-
count = document_store.count_documents_by_filter(
890-
filters={"field": "meta.category", "operator": "==", "value": "TypeA"}
891-
)
892-
assert count == 3
893-
894-
count = document_store.count_documents_by_filter(
895-
filters={"field": "meta.category", "operator": "==", "value": "TypeB"}
896-
)
897-
assert count == 1
898-
899-
count = document_store.count_documents_by_filter(
900-
filters={"field": "meta.category", "operator": "==", "value": "TypeC"}
901-
)
902-
assert count == 0
903-
904891
def test_get_metadata_fields_info(self, document_store):
905892
fields_info = document_store.get_metadata_fields_info()
906893

@@ -920,30 +907,6 @@ def test_get_metadata_fields_info(self, document_store):
920907
assert "status" in fields_info
921908
assert fields_info["status"]["type"] == "text"
922909

923-
def test_get_metadata_field_min_max(self, document_store):
924-
docs = [
925-
Document(content="Doc 1", meta={"number": 10}),
926-
Document(content="Doc 2", meta={"number": 5}),
927-
Document(content="Doc 3", meta={"number": 20}),
928-
Document(content="Doc 4", meta={"number": 15}),
929-
]
930-
document_store.write_documents(docs)
931-
932-
result = document_store.get_metadata_field_min_max("number")
933-
assert result["min"] == 5
934-
assert result["max"] == 20
935-
936-
def test_get_metadata_field_min_max_with_meta_prefix(self, document_store):
937-
docs = [
938-
Document(content="Doc 1", meta={"number": 100}),
939-
Document(content="Doc 2", meta={"number": 200}),
940-
]
941-
document_store.write_documents(docs)
942-
943-
result = document_store.get_metadata_field_min_max("meta.number")
944-
assert result["min"] == 100
945-
assert result["max"] == 200
946-
947910
def test_get_metadata_field_min_max_unsupported_type(self, document_store):
948911
with pytest.raises(ValueError, match="doesn't support min/max aggregation"):
949912
document_store.get_metadata_field_min_max("category")
@@ -952,34 +915,6 @@ def test_get_metadata_field_min_max_field_not_found(self, document_store):
952915
with pytest.raises(ValueError, match="not found in collection schema"):
953916
document_store.get_metadata_field_min_max("nonexistent_field")
954917

955-
def test_count_unique_metadata_by_filter(self, document_store):
956-
docs = [
957-
Document(content="Doc 1", meta={"category": "TypeA", "status": "draft"}),
958-
Document(content="Doc 2", meta={"category": "TypeB", "status": "published"}),
959-
Document(content="Doc 3", meta={"category": "TypeA", "status": "draft"}),
960-
Document(content="Doc 4", meta={"category": "TypeC", "status": "published"}),
961-
Document(content="Doc 5", meta={"category": "TypeA", "status": "archived"}),
962-
]
963-
document_store.write_documents(docs)
964-
965-
result = document_store.count_unique_metadata_by_filter(
966-
filters={"field": "meta.category", "operator": "==", "value": "TypeA"}, metadata_fields=["status"]
967-
)
968-
assert result["status"] == 2
969-
970-
result = document_store.count_unique_metadata_by_filter(
971-
filters={
972-
"operator": "OR",
973-
"conditions": [
974-
{"field": "meta.category", "operator": "==", "value": "TypeA"},
975-
{"field": "meta.category", "operator": "==", "value": "TypeB"},
976-
],
977-
},
978-
metadata_fields=["category", "status"],
979-
)
980-
assert result["category"] == 2
981-
assert result["status"] == 3
982-
983918
def test_count_unique_metadata_by_filter_with_meta_prefix(self, document_store):
984919
docs = [
985920
Document(content="Doc 1", meta={"category": "TypeA"}),
@@ -1012,20 +947,6 @@ def test_count_unique_metadata_by_filter_field_not_found(self, document_store):
1012947
metadata_fields=["nonexistent_field"],
1013948
)
1014949

1015-
def test_get_metadata_field_unique_values(self, document_store):
1016-
docs = [
1017-
Document(content="Doc 1", meta={"category": "TypeA"}),
1018-
Document(content="Doc 2", meta={"category": "TypeB"}),
1019-
Document(content="Doc 3", meta={"category": "TypeA"}),
1020-
Document(content="Doc 4", meta={"category": "TypeC"}),
1021-
Document(content="Doc 5", meta={"category": "TypeB"}),
1022-
]
1023-
document_store.write_documents(docs)
1024-
1025-
values, total_count = document_store.get_metadata_field_unique_values("category")
1026-
assert total_count == 3
1027-
assert set(values) == {"TypeA", "TypeB", "TypeC"}
1028-
1029950
def test_get_metadata_field_unique_values_with_meta_prefix(self, document_store):
1030951
docs = [
1031952
Document(content="Doc 1", meta={"category": "TypeA"}),
@@ -1078,3 +999,138 @@ def test_get_metadata_field_unique_values_empty_result(self, document_store):
1078999
values, total_count = document_store.get_metadata_field_unique_values("category")
10791000
assert total_count == 0
10801001
assert values == []
1002+
1003+
# --- Overrides of mixin tests to account for Weaviate-specific behaviour ---
1004+
1005+
def test_count_documents_by_filter_simple(self, document_store):
1006+
docs = [
1007+
Document(content="Doc 1", meta={"category": "TypeA"}),
1008+
Document(content="Doc 2", meta={"category": "TypeB"}),
1009+
Document(content="Doc 3", meta={"category": "TypeA"}),
1010+
Document(content="Doc 4", meta={"category": "TypeA"}),
1011+
]
1012+
document_store.write_documents(docs)
1013+
assert document_store.count_documents() == 4
1014+
1015+
count = document_store.count_documents_by_filter(
1016+
filters={"field": "meta.category", "operator": "==", "value": "TypeA"}
1017+
)
1018+
assert count == 3
1019+
1020+
count = document_store.count_documents_by_filter(
1021+
filters={"field": "meta.category", "operator": "==", "value": "TypeB"}
1022+
)
1023+
assert count == 1
1024+
1025+
count = document_store.count_documents_by_filter(
1026+
filters={"field": "meta.category", "operator": "==", "value": "TypeC"}
1027+
)
1028+
assert count == 0
1029+
1030+
def test_count_documents_by_filter_compound(self, document_store):
1031+
"""Test count_documents_by_filter() with AND filter."""
1032+
docs = [
1033+
Document(content="Doc 1", meta={"category": "TypeA", "status": "active"}),
1034+
Document(content="Doc 2", meta={"category": "TypeB", "status": "active"}),
1035+
Document(content="Doc 3", meta={"category": "TypeA", "status": "inactive"}),
1036+
Document(content="Doc 4", meta={"category": "TypeA", "status": "active"}),
1037+
]
1038+
document_store.write_documents(docs)
1039+
1040+
count = document_store.count_documents_by_filter( # type:ignore[attr-defined]
1041+
filters={
1042+
"operator": "AND",
1043+
"conditions": [
1044+
{"field": "meta.category", "operator": "==", "value": "TypeA"},
1045+
{"field": "meta.status", "operator": "==", "value": "active"},
1046+
],
1047+
}
1048+
)
1049+
assert count == 2
1050+
1051+
def test_count_documents_by_filter_empty_collection(self, document_store):
1052+
"""Test count_documents_by_filter() on an empty store."""
1053+
assert document_store.count_documents() == 0
1054+
1055+
count = document_store.count_documents_by_filter( # type:ignore[attr-defined]
1056+
filters={"field": "meta.category", "operator": "==", "value": "TypeA"}
1057+
)
1058+
assert count == 0
1059+
1060+
def test_count_unique_metadata_by_filter_with_filter(self, document_store):
1061+
docs = [
1062+
Document(content="Doc 1", meta={"category": "TypeA", "status": "draft"}),
1063+
Document(content="Doc 2", meta={"category": "TypeB", "status": "published"}),
1064+
Document(content="Doc 3", meta={"category": "TypeA", "status": "draft"}),
1065+
Document(content="Doc 4", meta={"category": "TypeC", "status": "published"}),
1066+
Document(content="Doc 5", meta={"category": "TypeA", "status": "archived"}),
1067+
]
1068+
document_store.write_documents(docs)
1069+
1070+
result = document_store.count_unique_metadata_by_filter(
1071+
filters={"field": "meta.category", "operator": "==", "value": "TypeA"}, metadata_fields=["status"]
1072+
)
1073+
assert result["status"] == 2
1074+
1075+
def test_count_unique_metadata_by_filter_with_multiple_filters(self, document_store):
1076+
"""Test counting with multiple filters"""
1077+
docs = [
1078+
Document(content="Doc 1", meta={"category": "TypeA", "year": 2023}),
1079+
Document(content="Doc 2", meta={"category": "TypeA", "year": 2024}),
1080+
Document(content="Doc 3", meta={"category": "TypeB", "year": 2023}),
1081+
Document(content="Doc 4", meta={"category": "TypeB", "year": 2024}),
1082+
]
1083+
document_store.write_documents(docs)
1084+
count = document_store.count_documents_by_filter( # type:ignore[attr-defined]
1085+
filters={
1086+
"operator": "AND",
1087+
"conditions": [
1088+
{"field": "meta.category", "operator": "==", "value": "TypeB"},
1089+
{"field": "meta.year", "operator": "==", "value": 2023},
1090+
],
1091+
}
1092+
)
1093+
assert count == 1
1094+
1095+
@staticmethod
1096+
def test_get_metadata_field_min_max_empty_collection(document_store):
1097+
# Weaviate requires fields to be declared in the schema before querying them.
1098+
# The mixin uses "priority" which is not in the pre-defined schema, so we use
1099+
# "number" which IS declared in the fixture's collection_settings.
1100+
assert document_store.count_documents() == 0
1101+
result = document_store.get_metadata_field_min_max("number")
1102+
assert result["min"] == 0
1103+
assert result["max"] == 0
1104+
1105+
@staticmethod
1106+
def test_get_metadata_fields_info_empty_collection(document_store):
1107+
# Weaviate collections always carry a fixed schema regardless of whether any
1108+
# documents have been written. The fixture pre-declares "number", "date",
1109+
# "category" and "status", so get_metadata_fields_info() will return those
1110+
# even on an empty collection instead of the empty dict the generic mixin expects.
1111+
assert document_store.count_documents() == 0
1112+
fields_info = document_store.get_metadata_fields_info()
1113+
assert set(fields_info.keys()) == {"number", "date", "category", "status"}
1114+
1115+
@staticmethod
1116+
def test_count_unique_metadata_by_filter_all_documents(document_store):
1117+
# The generic mixin passes filters={} (empty dict) to mean "no filter".
1118+
# Weaviate's convert_filters() does not accept an empty dict; a filter that
1119+
# explicitly selects all documents must be used instead.
1120+
docs = [
1121+
Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}),
1122+
Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}),
1123+
Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}),
1124+
Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}),
1125+
Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}),
1126+
]
1127+
document_store.write_documents(docs)
1128+
assert document_store.count_documents() == 5
1129+
1130+
counts = document_store.count_unique_metadata_by_filter(
1131+
filters={"field": "meta.priority", "operator": ">=", "value": 1},
1132+
metadata_fields=["category", "status", "priority"],
1133+
)
1134+
assert counts["category"] == 3
1135+
assert counts["status"] == 2
1136+
assert counts["priority"] == 3

0 commit comments

Comments
 (0)