Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/faiss/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"haystack-ai>=2.24.0",
"haystack-ai>=2.26.1",
"faiss-cpu>=1.8.0",
"numpy>=1.22,<2; python_version < '3.13'",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -489,18 +489,18 @@ def get_metadata_field_unique_values(self, field_name: str) -> list[Any]:
values.add(val)
return list(values)

def count_unique_metadata_by_filter(self, filters: dict[str, Any], fields: list[str]) -> dict[str, int]:
def count_unique_metadata_by_filter(self, filters: dict[str, Any], metadata_fields: list[str]) -> dict[str, int]:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a breaking change.
I'm ok with it (for consistency), but let's make sure to release a new major version (2.0.0)

"""
Returns a count of unique values for multiple metadata fields, optionally scoped by a filter.

:param filters: A dictionary of filters to apply.
:param fields: A list of metadata field names to count unique values for.
:param metadata_fields: A list of metadata field names to count unique values for.
:returns: A dictionary mapping each field name to the count of its unique values.
"""
filtered_docs = self.filter_documents(filters)
counts = {}

for field in fields:
for field in metadata_fields:
unique_vals = set()
for doc in filtered_docs:
val = FAISSDocumentStore._get_doc_value(doc, field)
Expand Down
57 changes: 10 additions & 47 deletions integrations/faiss/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,16 @@
from haystack.dataclasses import Document
from haystack.errors import FilterError
from haystack.testing.document_store import (
CountDocumentsByFilterTest,
CountDocumentsTest,
CountUniqueMetadataByFilterTest,
DeleteAllTest,
DeleteByFilterTest,
DeleteDocumentsTest,
FilterDocumentsTest,
GetMetadataFieldMinMaxTest,
GetMetadataFieldsInfoTest,
GetMetadataFieldUniqueValuesTest,
UpdateByFilterTest,
)

Expand All @@ -24,20 +29,23 @@ class TestFAISSDocumentStore(
UpdateByFilterTest,
DeleteAllTest,
DeleteByFilterTest,
CountDocumentsByFilterTest,
CountUniqueMetadataByFilterTest,
GetMetadataFieldsInfoTest,
GetMetadataFieldMinMaxTest,
GetMetadataFieldUniqueValuesTest,
):
@pytest.fixture
def document_store(self, tmp_path):
return FAISSDocumentStore(index_path=str(tmp_path / "test_index"))

def test_write_documents(self, document_store):

doc = Document(content="test")
document_store.write_documents([doc])
assert document_store.count_documents() == 1
assert document_store.filter_documents()[0].id == doc.id

def test_persistence(self, tmp_path):

path = tmp_path / "persistent_index"
ds = FAISSDocumentStore(index_path=str(path), embedding_dim=3)

Expand Down Expand Up @@ -73,7 +81,6 @@ def test_load_missing_files(self, tmp_path):
ds.load(path)

def test_search_with_and_without_filters(self, document_store):

# Setup documents with missing/varied embeddings to test edge cases
doc1 = Document(content="test1", embedding=[0.1, 0.2, 0.3], meta={"category": "A"})
doc2 = Document(content="test2", embedding=[0.4, 0.5, 0.6], meta={"category": "B"})
Expand All @@ -97,7 +104,6 @@ def test_search_with_and_without_filters(self, document_store):

def test_to_dict_from_dict(self):
ds = FAISSDocumentStore(index_path="test_index", index_string="Flat", embedding_dim=128)

data = ds.to_dict()
assert data["type"] == "haystack_integrations.document_stores.faiss.document_store.FAISSDocumentStore"
assert data["init_parameters"]["index_path"] == "test_index"
Expand All @@ -109,50 +115,7 @@ def test_to_dict_from_dict(self):
assert ds_loaded.index_string == "Flat"
assert ds_loaded.embedding_dim == 128

def test_count_documents_by_filter(self, document_store):

docs = [
Document(content="test1", meta={"category": "A"}),
Document(content="test2", meta={"category": "B"}),
Document(content="test3", meta={"category": "A"}),
]
document_store.write_documents(docs)

count = document_store.count_documents_by_filter(
filters={"field": "meta.category", "operator": "==", "value": "A"}
)
assert count == 2

def test_get_metadata_fields_info(self, document_store):

docs = [Document(content="test1", meta={"category": "A", "count": 1, "is_active": True})]
document_store.write_documents(docs)

info = document_store.get_metadata_fields_info()
assert "category" in info
assert info["category"]["type"] == "keyword"
assert "count" in info
assert info["count"]["type"] == "long"
assert "is_active" in info
assert info["is_active"]["type"] == "boolean"

def test_count_unique_metadata_by_filter(self, document_store):

docs = [
Document(content="test1", meta={"category": "A", "status": "active"}),
Document(content="test2", meta={"category": "B", "status": "inactive"}),
Document(content="test3", meta={"category": "A", "status": "active"}),
]
document_store.write_documents(docs)

counts = document_store.count_unique_metadata_by_filter(
filters={"field": "meta.category", "operator": "==", "value": "A"}, fields=["meta.status"]
)
assert "meta.status" in counts
assert counts["meta.status"] == 1 # Only "active" status for category A

def test_not_filter_with_empty_conditions_raises_filter_error(self, document_store):
document_store.write_documents([Document(content="test", meta={"category": "A"})])

with pytest.raises(FilterError, match="NOT operator expects at least one condition"):
document_store.filter_documents(filters={"operator": "NOT", "conditions": []})
Loading