From 9bb6b31462a4fe7a94d8ad5a2668de6e3e76e694 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Wed, 18 Mar 2026 09:50:04 +0100 Subject: [PATCH 1/7] adding Mixin tests --- integrations/weaviate/tests/test_document_store.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index aee5549e0b..89bb093ec5 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -16,6 +16,11 @@ from haystack.testing.document_store import ( DocumentStoreBaseExtendedTests, create_filterable_docs, + CountDocumentsByFilterTest, + CountUniqueMetadataByFilterTest, + GetMetadataFieldsInfoTest, + GetMetadataFieldMinMaxTest, + GetMetadataFieldUniqueValuesTest, ) from haystack.utils.auth import Secret from numpy import array as np_array @@ -45,7 +50,14 @@ def test_init_is_lazy(_mock_client): @pytest.mark.integration -class TestWeaviateDocumentStore(DocumentStoreBaseExtendedTests): +class TestWeaviateDocumentStore( + DocumentStoreBaseExtendedTests, + CountDocumentsByFilterTest, + CountUniqueMetadataByFilterTest, + GetMetadataFieldsInfoTest, + GetMetadataFieldMinMaxTest, + GetMetadataFieldUniqueValuesTest, +): @pytest.fixture def document_store(self, request) -> Generator[WeaviateDocumentStore, None, None]: # Use a different index for each test so we can run them in parallel From 6b7c0c091c6ee6d48947ca4e6e767ba366371b39 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Wed, 18 Mar 2026 10:22:15 +0100 Subject: [PATCH 2/7] overriding some Mixin tests --- .../weaviate/tests/test_document_store.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index 89bb093ec5..e938c6e515 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -1090,3 +1090,49 @@ def test_get_metadata_field_unique_values_empty_result(self, document_store): values, total_count = document_store.get_metadata_field_unique_values("category") assert total_count == 0 assert values == [] + + # --- Overrides of mixin tests to account for Weaviate-specific behaviour --- + + @staticmethod + def test_get_metadata_field_min_max_empty_collection(document_store): + # Weaviate requires fields to be declared in the schema before querying them. + # The mixin uses "priority" which is not in the pre-defined schema, so we use + # "number" which IS declared in the fixture's collection_settings. + # An aggregate over an empty collection should return None for both min and max. + assert document_store.count_documents() == 0 + result = document_store.get_metadata_field_min_max("number") + assert result["min"] is None + assert result["max"] is None + + @staticmethod + def test_get_metadata_fields_info_empty_collection(document_store): + # Weaviate collections always carry a fixed schema regardless of whether any + # documents have been written. The fixture pre-declares "number", "date", + # "category" and "status", so get_metadata_fields_info() will return those + # even on an empty collection instead of the empty dict the generic mixin expects. + assert document_store.count_documents() == 0 + fields_info = document_store.get_metadata_fields_info() + assert set(fields_info.keys()) == {"number", "date", "category", "status"} + + @staticmethod + def test_count_unique_metadata_by_filter_all_documents(document_store): + # The generic mixin passes filters={} (empty dict) to mean "no filter". + # Weaviate's convert_filters() does not accept an empty dict; a filter that + # explicitly selects all documents must be used instead. + docs = [ + Document(content="Doc 1", meta={"category": "A", "status": "active", "priority": 1}), + Document(content="Doc 2", meta={"category": "B", "status": "active", "priority": 2}), + Document(content="Doc 3", meta={"category": "A", "status": "inactive", "priority": 1}), + Document(content="Doc 4", meta={"category": "A", "status": "active", "priority": 3}), + Document(content="Doc 5", meta={"category": "C", "status": "active", "priority": 2}), + ] + document_store.write_documents(docs) + assert document_store.count_documents() == 5 + + counts = document_store.count_unique_metadata_by_filter( + filters={"field": "meta.priority", "operator": ">=", "value": 1}, + metadata_fields=["category", "status", "priority"], + ) + assert counts["category"] == 3 + assert counts["status"] == 2 + assert counts["priority"] == 3 From 967799cb6fc71c44d359750b6427e1061d032f5a Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Wed, 18 Mar 2026 11:17:44 +0100 Subject: [PATCH 3/7] overriding some Mixin tests --- .../weaviate/tests/test_document_store.py | 173 ++++++++++-------- 1 file changed, 93 insertions(+), 80 deletions(-) diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index e938c6e515..6c56b733ad 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -888,31 +888,6 @@ def test_update_by_filter_with_pagination(self, document_store, monkeypatch): assert "index" in doc.meta assert 0 <= doc.meta["index"] < 250 - def test_count_documents_by_filter(self, document_store): - docs = [ - Document(content="Doc 1", meta={"category": "TypeA"}), - Document(content="Doc 2", meta={"category": "TypeB"}), - Document(content="Doc 3", meta={"category": "TypeA"}), - Document(content="Doc 4", meta={"category": "TypeA"}), - ] - document_store.write_documents(docs) - assert document_store.count_documents() == 4 - - count = document_store.count_documents_by_filter( - filters={"field": "meta.category", "operator": "==", "value": "TypeA"} - ) - assert count == 3 - - count = document_store.count_documents_by_filter( - filters={"field": "meta.category", "operator": "==", "value": "TypeB"} - ) - assert count == 1 - - count = document_store.count_documents_by_filter( - filters={"field": "meta.category", "operator": "==", "value": "TypeC"} - ) - assert count == 0 - def test_get_metadata_fields_info(self, document_store): fields_info = document_store.get_metadata_fields_info() @@ -932,30 +907,6 @@ def test_get_metadata_fields_info(self, document_store): assert "status" in fields_info assert fields_info["status"]["type"] == "text" - def test_get_metadata_field_min_max(self, document_store): - docs = [ - Document(content="Doc 1", meta={"number": 10}), - Document(content="Doc 2", meta={"number": 5}), - Document(content="Doc 3", meta={"number": 20}), - Document(content="Doc 4", meta={"number": 15}), - ] - document_store.write_documents(docs) - - result = document_store.get_metadata_field_min_max("number") - assert result["min"] == 5 - assert result["max"] == 20 - - def test_get_metadata_field_min_max_with_meta_prefix(self, document_store): - docs = [ - Document(content="Doc 1", meta={"number": 100}), - Document(content="Doc 2", meta={"number": 200}), - ] - document_store.write_documents(docs) - - result = document_store.get_metadata_field_min_max("meta.number") - assert result["min"] == 100 - assert result["max"] == 200 - def test_get_metadata_field_min_max_unsupported_type(self, document_store): with pytest.raises(ValueError, match="doesn't support min/max aggregation"): document_store.get_metadata_field_min_max("category") @@ -964,34 +915,6 @@ def test_get_metadata_field_min_max_field_not_found(self, document_store): with pytest.raises(ValueError, match="not found in collection schema"): document_store.get_metadata_field_min_max("nonexistent_field") - def test_count_unique_metadata_by_filter(self, document_store): - docs = [ - Document(content="Doc 1", meta={"category": "TypeA", "status": "draft"}), - Document(content="Doc 2", meta={"category": "TypeB", "status": "published"}), - Document(content="Doc 3", meta={"category": "TypeA", "status": "draft"}), - Document(content="Doc 4", meta={"category": "TypeC", "status": "published"}), - Document(content="Doc 5", meta={"category": "TypeA", "status": "archived"}), - ] - document_store.write_documents(docs) - - result = document_store.count_unique_metadata_by_filter( - filters={"field": "meta.category", "operator": "==", "value": "TypeA"}, metadata_fields=["status"] - ) - assert result["status"] == 2 - - result = document_store.count_unique_metadata_by_filter( - filters={ - "operator": "OR", - "conditions": [ - {"field": "meta.category", "operator": "==", "value": "TypeA"}, - {"field": "meta.category", "operator": "==", "value": "TypeB"}, - ], - }, - metadata_fields=["category", "status"], - ) - assert result["category"] == 2 - assert result["status"] == 3 - def test_count_unique_metadata_by_filter_with_meta_prefix(self, document_store): docs = [ Document(content="Doc 1", meta={"category": "TypeA"}), @@ -1093,16 +1016,106 @@ def test_get_metadata_field_unique_values_empty_result(self, document_store): # --- Overrides of mixin tests to account for Weaviate-specific behaviour --- + def test_count_documents_by_filter_simple(self, document_store): + docs = [ + Document(content="Doc 1", meta={"category": "TypeA"}), + Document(content="Doc 2", meta={"category": "TypeB"}), + Document(content="Doc 3", meta={"category": "TypeA"}), + Document(content="Doc 4", meta={"category": "TypeA"}), + ] + document_store.write_documents(docs) + assert document_store.count_documents() == 4 + + count = document_store.count_documents_by_filter( + filters={"field": "meta.category", "operator": "==", "value": "TypeA"} + ) + assert count == 3 + + count = document_store.count_documents_by_filter( + filters={"field": "meta.category", "operator": "==", "value": "TypeB"} + ) + assert count == 1 + + count = document_store.count_documents_by_filter( + filters={"field": "meta.category", "operator": "==", "value": "TypeC"} + ) + assert count == 0 + + def test_count_documents_by_filter_compound(self, document_store): + """Test count_documents_by_filter() with AND filter.""" + docs = [ + Document(content="Doc 1", meta={"category": "TypeA", "status": "active"}), + Document(content="Doc 2", meta={"category": "TypeB", "status": "active"}), + Document(content="Doc 3", meta={"category": "TypeA", "status": "inactive"}), + Document(content="Doc 4", meta={"category": "TypeA", "status": "active"}), + ] + document_store.write_documents(docs) + + count = document_store.count_documents_by_filter( # type:ignore[attr-defined] + filters={ + "operator": "AND", + "conditions": [ + {"field": "meta.category", "operator": "==", "value": "TypeA"}, + {"field": "meta.status", "operator": "==", "value": "active"}, + ], + } + ) + assert count == 2 + + def test_count_documents_by_filter_empty_collection(self, document_store): + """Test count_documents_by_filter() on an empty store.""" + assert document_store.count_documents() == 0 + + count = document_store.count_documents_by_filter( # type:ignore[attr-defined] + filters={"field": "meta.category", "operator": "==", "value": "TypeA"} + ) + assert count == 0 + + + def test_count_unique_metadata_by_filter_with_filter(self, document_store): + docs = [ + Document(content="Doc 1", meta={"category": "TypeA", "status": "draft"}), + Document(content="Doc 2", meta={"category": "TypeB", "status": "published"}), + Document(content="Doc 3", meta={"category": "TypeA", "status": "draft"}), + Document(content="Doc 4", meta={"category": "TypeC", "status": "published"}), + Document(content="Doc 5", meta={"category": "TypeA", "status": "archived"}), + ] + document_store.write_documents(docs) + + result = document_store.count_unique_metadata_by_filter( + filters={"field": "meta.category", "operator": "==", "value": "TypeA"}, metadata_fields=["status"] + ) + assert result["status"] == 2 + + def test_count_unique_metadata_by_filter_with_multiple_filters(self, document_store): + """Test counting with multiple filters""" + docs = [ + Document(content="Doc 1", meta={"category": "TypeA", "year": 2023}), + Document(content="Doc 2", meta={"category": "TypeA", "year": 2024}), + Document(content="Doc 3", meta={"category": "TypeB", "year": 2023}), + Document(content="Doc 4", meta={"category": "TypeB", "year": 2024}), + ] + document_store.write_documents(docs) + count = document_store.count_documents_by_filter( # type:ignore[attr-defined] + filters={ + "operator": "AND", + "conditions": [ + {"field": "meta.category", "operator": "==", "value": "TypeB"}, + {"field": "meta.year", "operator": "==", "value": 2023}, + ], + } + ) + assert count == 1 + @staticmethod def test_get_metadata_field_min_max_empty_collection(document_store): # Weaviate requires fields to be declared in the schema before querying them. # The mixin uses "priority" which is not in the pre-defined schema, so we use # "number" which IS declared in the fixture's collection_settings. - # An aggregate over an empty collection should return None for both min and max. assert document_store.count_documents() == 0 result = document_store.get_metadata_field_min_max("number") - assert result["min"] is None - assert result["max"] is None + assert result["min"] is 0 + assert result["max"] is 0 @staticmethod def test_get_metadata_fields_info_empty_collection(document_store): From 842ecd6ce5f2eae101cc5d742af4bd833d785f17 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 19 Mar 2026 13:02:46 +0100 Subject: [PATCH 4/7] chore: update haystack-ai dependency to >=2.26.1 Co-Authored-By: Claude Sonnet 4.6 --- integrations/weaviate/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/weaviate/pyproject.toml b/integrations/weaviate/pyproject.toml index 735f5e5d3a..e83867a6ec 100644 --- a/integrations/weaviate/pyproject.toml +++ b/integrations/weaviate/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "haystack-ai>=2.24.0", + "haystack-ai>=2.26.1", "weaviate-client>=4.20", "python-dateutil", ] From f41588dc624681b9811fb8028518545f0e70e27a Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 19 Mar 2026 13:05:24 +0100 Subject: [PATCH 5/7] increasing lowest haystack dependency --- integrations/elasticsearch/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/elasticsearch/pyproject.toml b/integrations/elasticsearch/pyproject.toml index 72bd12548d..44c3715642 100644 --- a/integrations/elasticsearch/pyproject.toml +++ b/integrations/elasticsearch/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "haystack-ai>=2.24.0", + "haystack-ai>=2.26.1", "elasticsearch>=8,<9", "aiohttp>=3.9.0" # for async support https://elasticsearch-py.readthedocs.io/en/latest/async.html#valueerror-when-initializing-asyncelasticsearch ] From d067a74e0d4b1ca2fff5b5e4f5155ccdb4a03f12 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 20 Mar 2026 18:35:44 +0100 Subject: [PATCH 6/7] ruff fix --- .../weaviate/tests/test_document_store.py | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index 6c56b733ad..41a695757c 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -14,13 +14,13 @@ from haystack.dataclasses.document import Document from haystack.document_stores.errors import DocumentStoreError from haystack.testing.document_store import ( - DocumentStoreBaseExtendedTests, - create_filterable_docs, CountDocumentsByFilterTest, CountUniqueMetadataByFilterTest, - GetMetadataFieldsInfoTest, + DocumentStoreBaseExtendedTests, GetMetadataFieldMinMaxTest, + GetMetadataFieldsInfoTest, GetMetadataFieldUniqueValuesTest, + create_filterable_docs, ) from haystack.utils.auth import Secret from numpy import array as np_array @@ -947,20 +947,6 @@ def test_count_unique_metadata_by_filter_field_not_found(self, document_store): metadata_fields=["nonexistent_field"], ) - def test_get_metadata_field_unique_values(self, document_store): - docs = [ - Document(content="Doc 1", meta={"category": "TypeA"}), - Document(content="Doc 2", meta={"category": "TypeB"}), - Document(content="Doc 3", meta={"category": "TypeA"}), - Document(content="Doc 4", meta={"category": "TypeC"}), - Document(content="Doc 5", meta={"category": "TypeB"}), - ] - document_store.write_documents(docs) - - values, total_count = document_store.get_metadata_field_unique_values("category") - assert total_count == 3 - assert set(values) == {"TypeA", "TypeB", "TypeC"} - def test_get_metadata_field_unique_values_with_meta_prefix(self, document_store): docs = [ Document(content="Doc 1", meta={"category": "TypeA"}), @@ -1114,8 +1100,8 @@ def test_get_metadata_field_min_max_empty_collection(document_store): # "number" which IS declared in the fixture's collection_settings. assert document_store.count_documents() == 0 result = document_store.get_metadata_field_min_max("number") - assert result["min"] is 0 - assert result["max"] is 0 + assert result["min"] == 0 + assert result["max"] == 0 @staticmethod def test_get_metadata_fields_info_empty_collection(document_store): From 4bad12fe432e231628178985b98825ec17d4cd54 Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Fri, 20 Mar 2026 18:38:54 +0100 Subject: [PATCH 7/7] formatting --- integrations/weaviate/tests/test_document_store.py | 1 - 1 file changed, 1 deletion(-) diff --git a/integrations/weaviate/tests/test_document_store.py b/integrations/weaviate/tests/test_document_store.py index 41a695757c..8d1be5f5bc 100644 --- a/integrations/weaviate/tests/test_document_store.py +++ b/integrations/weaviate/tests/test_document_store.py @@ -1057,7 +1057,6 @@ def test_count_documents_by_filter_empty_collection(self, document_store): ) assert count == 0 - def test_count_unique_metadata_by_filter_with_filter(self, document_store): docs = [ Document(content="Doc 1", meta={"category": "TypeA", "status": "draft"}),