Skip to content

Commit b94031b

Browse files
feat: adding count with filtering operations to WeaviateDocumentStore (#2767)
* count_documents_by_filter and get_metadata_fields_info sync, async and tests * get_metadata_field_min_max * count_unique_metadata_by_filter * formatter * get_metadata_field_unique_values * fix type annotation for mypy * fix comments * improving docstring regarding contains filter operation * improving docstrings --------- Co-authored-by: David S. Batista <dsbatista@gmail.com>
1 parent cfc5706 commit b94031b

5 files changed

Lines changed: 826 additions & 4 deletions

File tree

integrations/weaviate/src/haystack_integrations/components/retrievers/weaviate/bm25_retriever.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@ class WeaviateBM25Retriever:
1818
1919
Example usage:
2020
```python
21-
from haystack_integrations.document_stores.weaviate.document_store import WeaviateDocumentStore
22-
from haystack_integrations.components.retrievers.weaviate.bm25_retriever import WeaviateBM25Retriever
21+
from haystack_integrations.document_stores.weaviate.document_store import (
22+
WeaviateDocumentStore,
23+
)
24+
from haystack_integrations.components.retrievers.weaviate.bm25_retriever import (
25+
WeaviateBM25Retriever,
26+
)
2327
2428
document_store = WeaviateDocumentStore(url="http://localhost:8080")
2529
retriever = WeaviateBM25Retriever(document_store=document_store)

integrations/weaviate/src/haystack_integrations/document_stores/weaviate/_filters.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,13 @@ def validate_filters(filters: dict[str, Any] | None) -> None:
2626
def convert_filters(filters: dict[str, Any]) -> FilterReturn:
2727
"""
2828
Convert filters from Haystack format to Weaviate format.
29+
30+
Supported comparison operators: ``==``, ``!=``, ``>``, ``>=``, ``<``, ``<=``,
31+
``in``, ``not in``, ``contains``.
32+
33+
Note: The ``contains`` operator performs substring matching and is
34+
**case-sensitive**. For case-insensitive matching, normalize the value
35+
(e.g., lowercase) before building the filter.
2936
"""
3037
if not isinstance(filters, dict):
3138
msg = "Filters must be a dictionary"
@@ -228,6 +235,19 @@ def _not_in(field: str, value: Any) -> FilterReturn:
228235
return Filter.all_of(operands)
229236

230237

238+
def _contains(field: str, value: Any) -> FilterReturn:
239+
"""
240+
Creates a filter for substring matching using Weaviate's 'like' operator.
241+
242+
The matching is case-sensitive. For case-insensitive matching, consider
243+
normalizing the value before passing it to this function.
244+
"""
245+
if not isinstance(value, str):
246+
msg = "Filter value must be a string when using 'contains' comparator"
247+
raise FilterError(msg)
248+
return weaviate.classes.query.Filter.by_property(field).like(f"*{value}*")
249+
250+
231251
COMPARISON_OPERATORS = {
232252
"==": _equal,
233253
"!=": _not_equal,
@@ -237,6 +257,7 @@ def _not_in(field: str, value: Any) -> FilterReturn:
237257
"<=": _less_than_equal,
238258
"in": _in,
239259
"not in": _not_in,
260+
"contains": _contains,
240261
}
241262

242263

0 commit comments

Comments
 (0)