diff --git a/integrations/faiss/pyproject.toml b/integrations/faiss/pyproject.toml index 6f99407e37..239663fc4a 100644 --- a/integrations/faiss/pyproject.toml +++ b/integrations/faiss/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: PyPy", ] dependencies = [ - "haystack-ai>=2.24.0", + "haystack-ai>=2.26.1", "faiss-cpu>=1.8.0", "numpy>=1.22,<2; python_version < '3.13'", ] diff --git a/integrations/faiss/src/haystack_integrations/document_stores/faiss/document_store.py b/integrations/faiss/src/haystack_integrations/document_stores/faiss/document_store.py index e0c82e2b74..9a08c6cd3b 100644 --- a/integrations/faiss/src/haystack_integrations/document_stores/faiss/document_store.py +++ b/integrations/faiss/src/haystack_integrations/document_stores/faiss/document_store.py @@ -489,18 +489,18 @@ def get_metadata_field_unique_values(self, field_name: str) -> list[Any]: values.add(val) return list(values) - def count_unique_metadata_by_filter(self, filters: dict[str, Any], fields: list[str]) -> dict[str, int]: + def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]: """ Returns a count of unique values for multiple metadata fields, optionally scoped by a filter. :param filters: A dictionary of filters to apply. - :param fields: A list of metadata field names to count unique values for. + :param metadata_fields: A list of metadata field names to count unique values for. :returns: A dictionary mapping each field name to the count of its unique values. """ filtered_docs = self.filter_documents(filters) counts = {} - for field in fields: + for field in metadata_fields: unique_vals = set() for doc in filtered_docs: val = FAISSDocumentStore._get_doc_value(doc, field) diff --git a/integrations/faiss/tests/test_document_store.py b/integrations/faiss/tests/test_document_store.py index a70ac4d472..13ea03eab9 100644 --- a/integrations/faiss/tests/test_document_store.py +++ b/integrations/faiss/tests/test_document_store.py @@ -6,11 +6,16 @@ from haystack.dataclasses import Document from haystack.errors import FilterError from haystack.testing.document_store import ( + CountDocumentsByFilterTest, CountDocumentsTest, + CountUniqueMetadataByFilterTest, DeleteAllTest, DeleteByFilterTest, DeleteDocumentsTest, FilterDocumentsTest, + GetMetadataFieldMinMaxTest, + GetMetadataFieldsInfoTest, + GetMetadataFieldUniqueValuesTest, UpdateByFilterTest, ) @@ -24,20 +29,23 @@ class TestFAISSDocumentStore( UpdateByFilterTest, DeleteAllTest, DeleteByFilterTest, + CountDocumentsByFilterTest, + CountUniqueMetadataByFilterTest, + GetMetadataFieldsInfoTest, + GetMetadataFieldMinMaxTest, + GetMetadataFieldUniqueValuesTest, ): @pytest.fixture def document_store(self, tmp_path): return FAISSDocumentStore(index_path=str(tmp_path / "test_index")) def test_write_documents(self, document_store): - doc = Document(content="test") document_store.write_documents([doc]) assert document_store.count_documents() == 1 assert document_store.filter_documents()[0].id == doc.id def test_persistence(self, tmp_path): - path = tmp_path / "persistent_index" ds = FAISSDocumentStore(index_path=str(path), embedding_dim=3) @@ -73,7 +81,6 @@ def test_load_missing_files(self, tmp_path): ds.load(path) def test_search_with_and_without_filters(self, document_store): - # Setup documents with missing/varied embeddings to test edge cases doc1 = Document(content="test1", embedding=[0.1, 0.2, 0.3], meta={"category": "A"}) doc2 = Document(content="test2", embedding=[0.4, 0.5, 0.6], meta={"category": "B"}) @@ -97,7 +104,6 @@ def test_search_with_and_without_filters(self, document_store): def test_to_dict_from_dict(self): ds = FAISSDocumentStore(index_path="test_index", index_string="Flat", embedding_dim=128) - data = ds.to_dict() assert data["type"] == "haystack_integrations.document_stores.faiss.document_store.FAISSDocumentStore" assert data["init_parameters"]["index_path"] == "test_index" @@ -109,50 +115,7 @@ def test_to_dict_from_dict(self): assert ds_loaded.index_string == "Flat" assert ds_loaded.embedding_dim == 128 - def test_count_documents_by_filter(self, document_store): - - docs = [ - Document(content="test1", meta={"category": "A"}), - Document(content="test2", meta={"category": "B"}), - Document(content="test3", meta={"category": "A"}), - ] - document_store.write_documents(docs) - - count = document_store.count_documents_by_filter( - filters={"field": "meta.category", "operator": "==", "value": "A"} - ) - assert count == 2 - - def test_get_metadata_fields_info(self, document_store): - - docs = [Document(content="test1", meta={"category": "A", "count": 1, "is_active": True})] - document_store.write_documents(docs) - - info = document_store.get_metadata_fields_info() - assert "category" in info - assert info["category"]["type"] == "keyword" - assert "count" in info - assert info["count"]["type"] == "long" - assert "is_active" in info - assert info["is_active"]["type"] == "boolean" - - def test_count_unique_metadata_by_filter(self, document_store): - - docs = [ - Document(content="test1", meta={"category": "A", "status": "active"}), - Document(content="test2", meta={"category": "B", "status": "inactive"}), - Document(content="test3", meta={"category": "A", "status": "active"}), - ] - document_store.write_documents(docs) - - counts = document_store.count_unique_metadata_by_filter( - filters={"field": "meta.category", "operator": "==", "value": "A"}, fields=["meta.status"] - ) - assert "meta.status" in counts - assert counts["meta.status"] == 1 # Only "active" status for category A - def test_not_filter_with_empty_conditions_raises_filter_error(self, document_store): document_store.write_documents([Document(content="test", meta={"category": "A"})]) - with pytest.raises(FilterError, match="NOT operator expects at least one condition"): document_store.filter_documents(filters={"operator": "NOT", "conditions": []})