Skip to content

Commit 6469748

Browse files
committed
refactoring to reduce duplicated code + fixing typos
1 parent dc0ec94 commit 6469748

2 files changed

Lines changed: 21 additions & 19 deletions

File tree

integrations/weaviate/src/haystack_integrations/document_stores/weaviate/_filters.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,18 @@
1111
from weaviate.collections.classes.filters import Filter, FilterReturn
1212

1313

14+
def validate_filters(filters: dict[str, Any] | None) -> None:
15+
"""
16+
Validates that filters have the correct structure.
17+
18+
:param filters: The filters to validate.
19+
:raises ValueError: If filters are provided but have invalid syntax.
20+
"""
21+
if filters and "operator" not in filters and "conditions" not in filters:
22+
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
23+
raise ValueError(msg)
24+
25+
1426
def convert_filters(filters: dict[str, Any]) -> FilterReturn:
1527
"""
1628
Convert filters from Haystack format to Weaviate format.

integrations/weaviate/src/haystack_integrations/document_stores/weaviate/document_store.py

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from weaviate.embedded import EmbeddedOptions
2121
from weaviate.util import generate_uuid5
2222

23-
from ._filters import convert_filters
23+
from ._filters import convert_filters, validate_filters
2424
from .auth import AuthCredentials
2525

2626
logger = logging.getLogger(__name__)
@@ -420,7 +420,7 @@ def _query_with_filters(self, filters: dict[str, Any]) -> list[DataObject[dict[s
420420
#
421421
# Nonetheless there's also another issue, paginating with limit and offset is not efficient
422422
# and it's still restricted by the QUERY_MAXIMUM_RESULTS environment variable.
423-
# If the sum of limit and offest is greater than QUERY_MAXIMUM_RESULTS an error is raised.
423+
# If the sum of limit and offset is greater than QUERY_MAXIMUM_RESULTS an error is raised.
424424
# See the official docs for more:
425425
# https://weaviate.io/developers/weaviate/api/graphql/additional-operators#performance-considerations
426426
offset = 0
@@ -453,9 +453,7 @@ def filter_documents(self, filters: Optional[dict[str, Any]] = None) -> list[Doc
453453
:param filters: The filters to apply to the document list.
454454
:returns: A list of Documents that match the given filters.
455455
"""
456-
if filters and "operator" not in filters and "conditions" not in filters:
457-
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
458-
raise ValueError(msg)
456+
validate_filters(filters)
459457

460458
result = []
461459
if filters:
@@ -484,7 +482,7 @@ def _batch_write(self, documents: list[Document]) -> int:
484482
vector=doc.embedding,
485483
)
486484
if failed_objects := self.client.batch.failed_objects:
487-
# We fallback to use the UUID if the _original_id is not present, this is just to be
485+
# We fall back to use the UUID if the _original_id is not present, this is just to be
488486
mapped_objects = {}
489487
for obj in failed_objects:
490488
properties = obj.object_.properties or {}
@@ -508,7 +506,7 @@ def _batch_write(self, documents: list[Document]) -> int:
508506
def _write(self, documents: list[Document], policy: DuplicatePolicy) -> int:
509507
"""
510508
Writes documents to Weaviate using the specified policy.
511-
This doesn't uses the batch API, so it's slower than _batch_write.
509+
This doesn't use the batch API, so it's slower than _batch_write.
512510
If policy is set to SKIP it will skip any document that already exists.
513511
If policy is set to FAIL it will raise an exception if any of the documents already exists.
514512
"""
@@ -619,9 +617,7 @@ def delete_by_filter(self, filters: dict[str, Any]) -> int:
619617
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
620618
:returns: The number of documents deleted.
621619
"""
622-
if filters and "operator" not in filters and "conditions" not in filters:
623-
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
624-
raise ValueError(msg)
620+
validate_filters(filters)
625621

626622
try:
627623
weaviate_filter = convert_filters(filters)
@@ -648,9 +644,7 @@ async def delete_by_filter_async(self, filters: dict[str, Any]) -> int:
648644
For filter syntax, see [Haystack metadata filtering](https://docs.haystack.deepset.ai/docs/metadata-filtering)
649645
:returns: The number of documents deleted.
650646
"""
651-
if filters and "operator" not in filters and "conditions" not in filters:
652-
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
653-
raise ValueError(msg)
647+
validate_filters(filters)
654648

655649
try:
656650
collection = await self.async_collection
@@ -679,9 +673,7 @@ def update_by_filter(self, filters: dict[str, Any], meta: dict[str, Any]) -> int
679673
:param meta: The metadata fields to update. These will be merged with existing metadata.
680674
:returns: The number of documents updated.
681675
"""
682-
if filters and "operator" not in filters and "conditions" not in filters:
683-
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
684-
raise ValueError(msg)
676+
validate_filters(filters)
685677

686678
if not isinstance(meta, dict):
687679
msg = "Meta must be a dictionary"
@@ -777,9 +769,7 @@ async def update_by_filter_async(self, filters: dict[str, Any], meta: dict[str,
777769
:param meta: The metadata fields to update. These will be merged with existing metadata.
778770
:returns: The number of documents updated.
779771
"""
780-
if filters and "operator" not in filters and "conditions" not in filters:
781-
msg = "Invalid filter syntax. See https://docs.haystack.deepset.ai/docs/metadata-filtering for details."
782-
raise ValueError(msg)
772+
validate_filters(filters)
783773

784774
if not isinstance(meta, dict):
785775
msg = "Meta must be a dictionary"

0 commit comments

Comments
 (0)