Skip to content

Commit 96e36c8

Browse files
committed
refactor!: remove deprecated APIs
Drop `DatasetClient.download_items()`, the `max_unprocessed_requests_retries` and `min_delay_between_unprocessed_requests_retries` arguments of `batch_add_requests`, and the `exclusive_start_id` argument of `list_requests`. They have been deprecated for a long time and v3 is the right moment to remove them.
1 parent 73bd98a commit 96e36c8

3 files changed

Lines changed: 3 additions & 177 deletions

File tree

src/apify_client/_resource_clients/dataset.py

Lines changed: 0 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import warnings
43
from contextlib import asynccontextmanager, contextmanager
54
from dataclasses import dataclass
65
from typing import TYPE_CHECKING, Any
@@ -290,104 +289,6 @@ def _callback(*, limit: int | None = None, offset: int | None = None) -> Dataset
290289

291290
return get_items_iterator(_callback, limit=limit, offset=offset, chunk_size=chunk_size or DEFAULT_CHUNK_SIZE)
292291

293-
def download_items(
294-
self,
295-
*,
296-
item_format: str = 'json',
297-
offset: int | None = None,
298-
limit: int | None = None,
299-
desc: bool | None = None,
300-
clean: bool | None = None,
301-
bom: bool | None = None,
302-
delimiter: str | None = None,
303-
fields: list[str] | None = None,
304-
omit: list[str] | None = None,
305-
unwind: list[str] | None = None,
306-
skip_empty: bool | None = None,
307-
skip_header_row: bool | None = None,
308-
skip_hidden: bool | None = None,
309-
xml_root: str | None = None,
310-
xml_row: str | None = None,
311-
flatten: list[str] | None = None,
312-
signature: str | None = None,
313-
timeout: Timeout = 'long',
314-
) -> bytes:
315-
"""Get the items in the dataset as raw bytes.
316-
317-
Deprecated: this function is a deprecated alias of `get_items_as_bytes`. It will be removed in
318-
a future version.
319-
320-
https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items
321-
322-
Args:
323-
item_format: Format of the results, possible values are: json, jsonl, csv, html, xlsx, xml and rss.
324-
The default value is json.
325-
offset: Number of items that should be skipped at the start. The default value is 0.
326-
limit: Maximum number of items to return. By default there is no limit.
327-
desc: By default, results are returned in the same order as they were stored. To reverse the order,
328-
set this parameter to True.
329-
clean: If True, returns only non-empty items and skips hidden fields (i.e. fields starting with
330-
the # character). The clean parameter is just a shortcut for skip_hidden=True and skip_empty=True
331-
parameters. Note that since some objects might be skipped from the output, that the result might
332-
contain less items than the limit value.
333-
bom: All text responses are encoded in UTF-8 encoding. By default, csv files are prefixed with
334-
the UTF-8 Byte Order Mark (BOM), while json, jsonl, xml, html and rss files are not. If you want
335-
to override this default behavior, specify bom=True query parameter to include the BOM or bom=False
336-
to skip it.
337-
delimiter: A delimiter character for CSV files. The default delimiter is a simple comma (,).
338-
fields: A list of fields which should be picked from the items, only these fields will remain in
339-
the resulting record objects. Note that the fields in the outputted items are sorted the same way
340-
as they are specified in the fields parameter. You can use this feature to effectively fix the
341-
output format.
342-
omit: A list of fields which should be omitted from the items.
343-
unwind: A list of fields which should be unwound, in order which they should be processed. Each field
344-
should be either an array or an object. If the field is an array then every element of the array
345-
will become a separate record and merged with parent object. If the unwound field is an object then
346-
it is merged with the parent object. If the unwound field is missing or its value is neither an array
347-
nor an object and therefore cannot be merged with a parent object, then the item gets preserved
348-
as it is. Note that the unwound items ignore the desc parameter.
349-
skip_empty: If True, then empty items are skipped from the output. Note that if used, the results might
350-
contain less items than the limit value.
351-
skip_header_row: If True, then header row in the csv format is skipped.
352-
skip_hidden: If True, then hidden fields are skipped from the output, i.e. fields starting with
353-
the # character.
354-
xml_root: Overrides default root element name of xml output. By default the root element is items.
355-
xml_row: Overrides default element name that wraps each page or page function result object in xml output.
356-
By default the element name is item.
357-
flatten: A list of fields that should be flattened.
358-
signature: Signature used to access the items.
359-
timeout: Timeout for the API HTTP request.
360-
361-
Returns:
362-
The dataset items as raw bytes.
363-
"""
364-
warnings.warn(
365-
'`DatasetClient.download_items()` is deprecated, use `DatasetClient.get_items_as_bytes()` instead.',
366-
DeprecationWarning,
367-
stacklevel=2,
368-
)
369-
370-
return self.get_items_as_bytes(
371-
item_format=item_format,
372-
offset=offset,
373-
limit=limit,
374-
desc=desc,
375-
clean=clean,
376-
bom=bom,
377-
delimiter=delimiter,
378-
fields=fields,
379-
omit=omit,
380-
unwind=unwind,
381-
skip_empty=skip_empty,
382-
skip_header_row=skip_header_row,
383-
skip_hidden=skip_hidden,
384-
xml_root=xml_root,
385-
xml_row=xml_row,
386-
flatten=flatten,
387-
signature=signature,
388-
timeout=timeout,
389-
)
390-
391292
def get_items_as_bytes(
392293
self,
393294
*,

src/apify_client/_resource_clients/request_queue.py

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import asyncio
44
import math
5-
import warnings
65
from collections.abc import Iterable
76
from queue import Queue
87
from typing import TYPE_CHECKING, Any, Literal
@@ -377,8 +376,6 @@ def batch_add_requests(
377376
*,
378377
forefront: bool = False,
379378
max_parallel: int = 1,
380-
max_unprocessed_requests_retries: int | None = None,
381-
min_delay_between_unprocessed_requests_retries: timedelta | None = None,
382379
timeout: Timeout = 'medium',
383380
) -> BatchAddResult:
384381
"""Add requests to the request queue in batches.
@@ -393,26 +390,11 @@ def batch_add_requests(
393390
max_parallel: Specifies the maximum number of parallel tasks for API calls. This is only applicable
394391
to the async client. For the sync client, this value must be set to 1, as parallel execution
395392
is not supported.
396-
max_unprocessed_requests_retries: Deprecated argument. Will be removed in next major release.
397-
min_delay_between_unprocessed_requests_retries: Deprecated argument. Will be removed in next major release.
398393
timeout: Timeout for the API HTTP request.
399394
400395
Returns:
401396
Result containing lists of processed and unprocessed requests.
402397
"""
403-
if max_unprocessed_requests_retries:
404-
warnings.warn(
405-
'`max_unprocessed_requests_retries` is deprecated and not used anymore.',
406-
DeprecationWarning,
407-
stacklevel=2,
408-
)
409-
if min_delay_between_unprocessed_requests_retries:
410-
warnings.warn(
411-
'`min_delay_between_unprocessed_requests_retries` is deprecated and not used anymore.',
412-
DeprecationWarning,
413-
stacklevel=2,
414-
)
415-
416398
if max_parallel != 1:
417399
raise NotImplementedError('max_parallel is only supported in async client')
418400

@@ -514,7 +496,6 @@ def list_requests(
514496
filter: list[Literal['pending', 'locked']] | None = None, # noqa: A002
515497
timeout: Timeout = 'medium',
516498
cursor: str | None = None,
517-
exclusive_start_id: str | None = None,
518499
) -> ListOfRequests:
519500
"""List requests in the queue.
520501
@@ -525,23 +506,11 @@ def list_requests(
525506
filter: List of request states to use as a filter. Multiple values mean union of the given filters.
526507
timeout: Timeout for the API HTTP request.
527508
cursor: A token returned in previous API response, to continue listing next page of requests
528-
exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result.
529509
"""
530-
if exclusive_start_id and cursor:
531-
raise ValueError('Cannot use both `exclusive_start_id` and `cursor` for paginating requests.')
532-
533-
if exclusive_start_id is not None:
534-
warnings.warn(
535-
'`exclusive_start_id` is deprecated for paginating requests. Use pagination using `cursor` instead.',
536-
DeprecationWarning,
537-
stacklevel=2,
538-
)
539-
540510
request_params = self._build_params(
541511
limit=limit,
542512
filter=','.join(filter) if filter else None,
543513
clientKey=self.client_key,
544-
exclusiveStartId=exclusive_start_id,
545514
cursor=cursor,
546515
)
547516

@@ -979,8 +948,6 @@ async def batch_add_requests(
979948
*,
980949
forefront: bool = False,
981950
max_parallel: int = 5,
982-
max_unprocessed_requests_retries: int | None = None,
983-
min_delay_between_unprocessed_requests_retries: timedelta | None = None,
984951
timeout: Timeout = 'medium',
985952
) -> BatchAddResult:
986953
"""Add requests to the request queue in batches.
@@ -995,26 +962,11 @@ async def batch_add_requests(
995962
max_parallel: Specifies the maximum number of parallel tasks for API calls. This is only applicable
996963
to the async client. For the sync client, this value must be set to 1, as parallel execution
997964
is not supported.
998-
max_unprocessed_requests_retries: Deprecated argument. Will be removed in next major release.
999-
min_delay_between_unprocessed_requests_retries: Deprecated argument. Will be removed in next major release.
1000965
timeout: Timeout for the API HTTP request.
1001966
1002967
Returns:
1003968
Result containing lists of processed and unprocessed requests.
1004969
"""
1005-
if max_unprocessed_requests_retries:
1006-
warnings.warn(
1007-
'`max_unprocessed_requests_retries` is deprecated and not used anymore.',
1008-
DeprecationWarning,
1009-
stacklevel=2,
1010-
)
1011-
if min_delay_between_unprocessed_requests_retries:
1012-
warnings.warn(
1013-
'`min_delay_between_unprocessed_requests_retries` is deprecated and not used anymore.',
1014-
DeprecationWarning,
1015-
stacklevel=2,
1016-
)
1017-
1018970
requests_as_dicts = [
1019971
(
1020972
request
@@ -1126,7 +1078,6 @@ async def list_requests(
11261078
filter: list[Literal['pending', 'locked']] | None = None, # noqa: A002
11271079
timeout: Timeout = 'medium',
11281080
cursor: str | None = None,
1129-
exclusive_start_id: str | None = None,
11301081
) -> ListOfRequests:
11311082
"""List requests in the queue.
11321083
@@ -1137,23 +1088,11 @@ async def list_requests(
11371088
filter: List of request states to use as a filter. Multiple values mean union of the given filters.
11381089
timeout: Timeout for the API HTTP request.
11391090
cursor: A token returned in previous API response, to continue listing next page of requests
1140-
exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result.
11411091
"""
1142-
if exclusive_start_id and cursor:
1143-
raise ValueError('Cannot use both `exclusive_start_id` and `cursor` for paginating requests.')
1144-
1145-
if exclusive_start_id is not None:
1146-
warnings.warn(
1147-
'`exclusive_start_id` is deprecated for paginating requests. Use pagination using `cursor` instead.',
1148-
DeprecationWarning,
1149-
stacklevel=2,
1150-
)
1151-
11521092
request_params = self._build_params(
11531093
limit=limit,
11541094
filter=','.join(filter) if filter else None,
11551095
clientKey=self.client_key,
1156-
exclusiveStartId=exclusive_start_id,
11571096
cursor=cursor,
11581097
)
11591098

tests/unit/test_client_pagination.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -235,14 +235,14 @@ def _handle_cursor_pagination(request: Request) -> Response:
235235
"""Serve a cursor-paginated Apify API response for KVS keys and RQ requests.
236236
237237
Holds 2500 synthetic items whose integer `id` equals their position. Each page is capped at 1000 items. KVS uses
238-
`exclusiveStartKey`; RQ accepts either the deprecated `exclusiveStartId` on the initial call or the opaque `cursor`
239-
on subsequent calls. All three values encode the last-seen item id as a string — the next page starts at id + 1.
238+
`exclusiveStartKey`; RQ uses the opaque `cursor`. Both values encode the last-seen item id as a string — the
239+
next page starts at id + 1.
240240
"""
241241
params = request.args
242242
limit = _parse_int_param(params.get('limit'))
243243
assert limit >= 0, 'Invalid limit sent to API'
244244

245-
cursor_raw = params.get('exclusiveStartKey') or params.get('exclusiveStartId') or params.get('cursor')
245+
cursor_raw = params.get('exclusiveStartKey') or params.get('cursor')
246246

247247
total_items = NORMAL_ITEMS
248248
start = int(cursor_raw) + 1 if cursor_raw not in (None, '') else 0
@@ -617,17 +617,3 @@ async def test_rq_list_requests_iterable_async(
617617
client: RequestQueueClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server))
618618
returned_items = [dict(item) async for item in client.iterate_requests(**inputs)]
619619
assert returned_items == expected_items
620-
621-
622-
def test_rq_list_requests_rejects_cursor_and_exclusive_start_id() -> None:
623-
"""Passing both `cursor` and `exclusive_start_id` is mutually exclusive and must error."""
624-
client = ApifyClient(token='').request_queue(ID_PLACEHOLDER)
625-
with pytest.raises(ValueError, match='Cannot use both'):
626-
client.list_requests(cursor='a', exclusive_start_id='b')
627-
628-
629-
async def test_rq_list_requests_rejects_cursor_and_exclusive_start_id_async() -> None:
630-
"""Async variant of the mutual-exclusion check."""
631-
client = ApifyClientAsync(token='').request_queue(ID_PLACEHOLDER)
632-
with pytest.raises(ValueError, match='Cannot use both'):
633-
await client.list_requests(cursor='a', exclusive_start_id='b')

0 commit comments

Comments
 (0)