Skip to content

Commit 7fd51b7

Browse files
committed
Iterator with filter
1 parent 7c479c9 commit 7fd51b7

11 files changed

Lines changed: 171 additions & 100 deletions

File tree

integration/test_iterator.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
)
2121
from weaviate.collections.iterator import ITERATOR_CACHE_SIZE
2222
from weaviate.exceptions import WeaviateInvalidInputError
23+
import weaviate.classes as wvc
2324

2425

2526
@pytest.fixture(scope="module")
@@ -225,3 +226,31 @@ def test_iterator_with_after(collection_factory: CollectionFactory) -> None:
225226
next(iterator).properties["data"]
226227
== collection.query.fetch_object_by_id(uuids[6]).properties["data"]
227228
)
229+
230+
231+
def test_iterator_with_filter(collection_factory: CollectionFactory) -> None:
232+
collection = collection_factory(
233+
properties=[
234+
Property(name="bool", data_type=DataType.BOOL),
235+
Property(name="count", data_type=DataType.INT),
236+
],
237+
vectorizer_config=Configure.Vectorizer.none(),
238+
data_model_properties=Dict[str, int],
239+
)
240+
241+
if collection._connection._weaviate_version.is_lower_than(1, 33, 0):
242+
pytest.skip("Iterator with filters requires Weaviate version 1.33 or higher")
243+
244+
num_objects = 1000
245+
res = collection.data.insert_many(
246+
[DataObject(properties={"bool": i % 2 == 0, "count": i}) for i in range(num_objects)]
247+
)
248+
assert not res.has_errors
249+
250+
count = 0
251+
for obj in collection.iterator(
252+
filters=wvc.query.Filter.by_property("bool").equal(True), cache_size=1
253+
):
254+
assert obj.properties["bool"] is True
255+
count += 1
256+
assert count == num_objects / 2

weaviate/collections/collection/async_.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from weaviate.collections.backups import _CollectionBackupAsync
88
from weaviate.collections.classes.cluster import Shard
99
from weaviate.collections.classes.config import ConsistencyLevel
10+
from weaviate.collections.classes.filters import _Filters
1011
from weaviate.collections.classes.grpc import METADATA, PROPERTIES, REFERENCES
1112
from weaviate.collections.classes.internal import (
1213
CrossReferences,
@@ -24,6 +25,7 @@
2425
from weaviate.collections.query import _QueryCollectionAsync
2526
from weaviate.collections.tenants import _TenantsAsync
2627
from weaviate.connect.v4 import ConnectionAsync
28+
from weaviate.exceptions import WeaviateUnsupportedFeatureError
2729
from weaviate.types import UUID
2830

2931
from .base import _CollectionBase
@@ -197,6 +199,7 @@ def iterator(
197199
include_vector: bool = False,
198200
return_metadata: Optional[METADATA] = None,
199201
*,
202+
filters: Optional[_Filters] = None,
200203
return_properties: Optional[PROPERTIES] = None,
201204
return_references: Literal[None] = None,
202205
after: Optional[UUID] = None,
@@ -209,6 +212,7 @@ def iterator(
209212
include_vector: bool = False,
210213
return_metadata: Optional[METADATA] = None,
211214
*,
215+
filters: Optional[_Filters] = None,
212216
return_properties: Optional[PROPERTIES] = None,
213217
return_references: REFERENCES,
214218
after: Optional[UUID] = None,
@@ -221,6 +225,7 @@ def iterator(
221225
include_vector: bool = False,
222226
return_metadata: Optional[METADATA] = None,
223227
*,
228+
filters: Optional[_Filters] = None,
224229
return_properties: Optional[PROPERTIES] = None,
225230
return_references: Type[TReferences],
226231
after: Optional[UUID] = None,
@@ -233,6 +238,7 @@ def iterator(
233238
include_vector: bool = False,
234239
return_metadata: Optional[METADATA] = None,
235240
*,
241+
filters: Optional[_Filters] = None,
236242
return_properties: Type[TProperties],
237243
return_references: Literal[None] = None,
238244
after: Optional[UUID] = None,
@@ -245,6 +251,7 @@ def iterator(
245251
include_vector: bool = False,
246252
return_metadata: Optional[METADATA] = None,
247253
*,
254+
filters: Optional[_Filters] = None,
248255
return_properties: Type[TProperties],
249256
return_references: REFERENCES,
250257
after: Optional[UUID] = None,
@@ -257,6 +264,7 @@ def iterator(
257264
include_vector: bool = False,
258265
return_metadata: Optional[METADATA] = None,
259266
*,
267+
filters: Optional[_Filters] = None,
260268
return_properties: Type[TProperties],
261269
return_references: Type[TReferences],
262270
after: Optional[UUID] = None,
@@ -268,6 +276,7 @@ def iterator(
268276
include_vector: bool = False,
269277
return_metadata: Optional[METADATA] = None,
270278
*,
279+
filters: Optional[_Filters] = None,
271280
return_properties: Optional[ReturnProperties[TProperties]] = None,
272281
return_references: Optional[ReturnReferences[TReferences]] = None,
273282
after: Optional[UUID] = None,
@@ -301,6 +310,11 @@ def iterator(
301310
Raises:
302311
weaviate.exceptions.WeaviateGRPCQueryError: If the request to the Weaviate server fails.
303312
"""
313+
if self.query._connection._weaviate_version.is_lower_than(1, 33, 0):
314+
raise WeaviateUnsupportedFeatureError(
315+
"Iterator with filters", self._connection.server_version, "1.33.0"
316+
)
317+
304318
return _ObjectAIterator(
305319
self.query,
306320
_IteratorInputs(
@@ -309,6 +323,7 @@ def iterator(
309323
return_properties=return_properties,
310324
return_references=return_references,
311325
after=after,
326+
filters=filters,
312327
),
313328
cache_size=cache_size,
314329
)

weaviate/collections/collection/sync.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from weaviate.collections.batch.collection import _BatchCollectionWrapper
99
from weaviate.collections.classes.cluster import Shard
1010
from weaviate.collections.classes.config import ConsistencyLevel
11+
from weaviate.collections.classes.filters import _Filters
1112
from weaviate.collections.classes.grpc import METADATA, PROPERTIES, REFERENCES
1213
from weaviate.collections.classes.internal import (
1314
CrossReferences,
@@ -25,6 +26,7 @@
2526
from weaviate.collections.query import _QueryCollection
2627
from weaviate.collections.tenants import _Tenants
2728
from weaviate.connect.v4 import ConnectionSync
29+
from weaviate.exceptions import WeaviateUnsupportedFeatureError
2830
from weaviate.types import UUID
2931

3032
from .base import _CollectionBase
@@ -221,6 +223,7 @@ def iterator(
221223
include_vector: bool = False,
222224
return_metadata: Optional[METADATA] = None,
223225
*,
226+
filters: Optional[_Filters] = None,
224227
return_properties: Optional[PROPERTIES] = None,
225228
return_references: Literal[None] = None,
226229
after: Optional[UUID] = None,
@@ -233,6 +236,7 @@ def iterator(
233236
include_vector: bool = False,
234237
return_metadata: Optional[METADATA] = None,
235238
*,
239+
filters: Optional[_Filters] = None,
236240
return_properties: Optional[PROPERTIES] = None,
237241
return_references: REFERENCES,
238242
after: Optional[UUID] = None,
@@ -245,6 +249,7 @@ def iterator(
245249
include_vector: bool = False,
246250
return_metadata: Optional[METADATA] = None,
247251
*,
252+
filters: Optional[_Filters] = None,
248253
return_properties: Optional[PROPERTIES] = None,
249254
return_references: Type[TReferences],
250255
after: Optional[UUID] = None,
@@ -257,6 +262,7 @@ def iterator(
257262
include_vector: bool = False,
258263
return_metadata: Optional[METADATA] = None,
259264
*,
265+
filters: Optional[_Filters] = None,
260266
return_properties: Type[TProperties],
261267
return_references: Literal[None] = None,
262268
after: Optional[UUID] = None,
@@ -269,6 +275,7 @@ def iterator(
269275
include_vector: bool = False,
270276
return_metadata: Optional[METADATA] = None,
271277
*,
278+
filters: Optional[_Filters] = None,
272279
return_properties: Type[TProperties],
273280
return_references: REFERENCES,
274281
after: Optional[UUID] = None,
@@ -281,6 +288,7 @@ def iterator(
281288
include_vector: bool = False,
282289
return_metadata: Optional[METADATA] = None,
283290
*,
291+
filters: Optional[_Filters] = None,
284292
return_properties: Type[TProperties],
285293
return_references: Type[TReferences],
286294
after: Optional[UUID] = None,
@@ -292,6 +300,7 @@ def iterator(
292300
include_vector: bool = False,
293301
return_metadata: Optional[METADATA] = None,
294302
*,
303+
filters: Optional[_Filters] = None,
295304
return_properties: Optional[ReturnProperties[TProperties]] = None,
296305
return_references: Optional[ReturnReferences[TReferences]] = None,
297306
after: Optional[UUID] = None,
@@ -315,6 +324,7 @@ def iterator(
315324
are returned. Use `wvc.QueryReference` to specify which references to return.
316325
317326
Args:
327+
filters: The filters to apply to the query.
318328
include_vector: Whether to include the vector in the metadata of the returned objects.
319329
return_metadata: The metadata to return with each object.
320330
return_properties: The properties to return with each object.
@@ -325,9 +335,15 @@ def iterator(
325335
Raises:
326336
weaviate.exceptions.WeaviateGRPCQueryError: If the request to the Weaviate server fails.
327337
"""
338+
if self.query._connection._weaviate_version.is_lower_than(1, 33, 0):
339+
raise WeaviateUnsupportedFeatureError(
340+
"Iterator with filters", self._connection.server_version, "1.33.0"
341+
)
342+
328343
return _ObjectIterator(
329344
self.query,
330345
_IteratorInputs(
346+
filters=filters,
331347
include_vector=include_vector,
332348
return_metadata=return_metadata,
333349
return_properties=return_properties,

weaviate/collections/grpc/query.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ def __create_request(
478478
limit=limit,
479479
offset=offset,
480480
after=str(after) if after is not None else "",
481+
after_present=after is not None,
481482
autocut=autocut,
482483
properties=self._translate_properties_from_python_to_grpc(
483484
return_properties_parsed, return_references_parsed

weaviate/collections/iterator.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
)
1212
from uuid import UUID
1313

14+
from weaviate.collections.classes.filters import _Filters
1415
from weaviate.collections.classes.grpc import METADATA
1516
from weaviate.collections.classes.internal import (
1617
Object,
@@ -30,15 +31,16 @@
3031

3132
@dataclass
3233
class _IteratorInputs(Generic[TProperties, TReferences]):
34+
filters: Optional[_Filters]
3335
include_vector: bool
3436
return_metadata: Optional[METADATA]
3537
return_properties: Optional[ReturnProperties[TProperties]]
3638
return_references: Optional[ReturnReferences[TReferences]]
3739
after: Optional[UUIDorStr]
3840

3941

40-
def _parse_after(after: Optional[UUIDorStr]) -> Optional[UUID]:
41-
return after if after is None or isinstance(after, UUID) else UUID(after)
42+
def _parse_after(after: Optional[UUIDorStr]) -> UUIDorStr:
43+
return "" if after is None or isinstance(after, UUID) else UUID(after)
4244

4345

4446
class _ObjectIterator(
@@ -55,7 +57,7 @@ def __init__(
5557
self.__inputs = inputs
5658

5759
self.__iter_object_cache: List[Object[TProperties, TReferences]] = []
58-
self.__iter_object_last_uuid: Optional[UUID] = _parse_after(self.__inputs.after)
60+
self.__iter_object_last_uuid: Optional[UUIDorStr] = _parse_after(self.__inputs.after)
5961
self.__iter_cache_size = cache_size or ITERATOR_CACHE_SIZE
6062

6163
def __iter__(
@@ -74,6 +76,7 @@ def __next__(self) -> Object[TProperties, TReferences]:
7476
return_metadata=self.__inputs.return_metadata,
7577
return_properties=self.__inputs.return_properties,
7678
return_references=self.__inputs.return_references,
79+
filters=self.__inputs.filters,
7780
)
7881
self.__iter_object_cache = res.objects # type: ignore
7982
if len(self.__iter_object_cache) == 0:
@@ -101,7 +104,7 @@ def __init__(
101104
self.__inputs = inputs
102105

103106
self.__iter_object_cache: List[Object[TProperties, TReferences]] = []
104-
self.__iter_object_last_uuid: Optional[UUID] = _parse_after(self.__inputs.after)
107+
self.__iter_object_last_uuid: UUIDorStr = _parse_after(self.__inputs.after)
105108
self.__iter_cache_size = cache_size or ITERATOR_CACHE_SIZE
106109

107110
def __aiter__(
@@ -122,6 +125,7 @@ async def __anext__(
122125
return_metadata=self.__inputs.return_metadata,
123126
return_properties=self.__inputs.return_properties,
124127
return_references=self.__inputs.return_references,
128+
filters=self.__inputs.filters,
125129
)
126130
self.__iter_object_cache = res.objects # type: ignore
127131
if len(self.__iter_object_cache) == 0:

0 commit comments

Comments
 (0)