refactor!: remove deprecated APIs (#799)

vdusek · web-flow · commit 6e5df3595e5f · 2026-05-19T13:58:25.000+02:00
## Summary

Removes deprecated APIs as part of the v3 major release:

- `DatasetClient.download_items()` — deprecated alias of
`get_items_as_bytes()`.
- `max_unprocessed_requests_retries` and
`min_delay_between_unprocessed_requests_retries` arguments of
`batch_add_requests` (sync + async). The docstrings already said *"Will
be removed in next major release."*
- `exclusive_start_id` argument of `list_requests` (sync + async) —
replaced by `cursor`.

Also drops the now-unused `import warnings` from both resource client
modules and the associated unit tests for the removed
`cursor`/`exclusive_start_id` mutex check.
diff --git a/docs/04_upgrading/upgrading_to_v3.mdx b/docs/04_upgrading/upgrading_to_v3.mdx
@@ -334,3 +334,61 @@ The difference matters only if your code inspects the function itself:
 - Type checkers see `def (...) -> AsyncIterator[T]` instead of `async def (...) -> AsyncIterator[T]`. Annotations on variables that hold the call's result may need to change from `AsyncGenerator[T, None]` to `AsyncIterator[T]`.
 
 A new <ApiLink to="class/RequestQueueClientAsync#iterate_requests">`RequestQueueClientAsync.iterate_requests()`</ApiLink> helper is also introduced and follows the same `def ... -> AsyncIterator[T]` shape.
+
+## Removal of deprecated APIs
+
+Methods and arguments that had been deprecated in v2 are removed in v3.
+
+### `DatasetClient.download_items()`
+
+The deprecated alias has been removed. Use <ApiLink to="class/DatasetClient#get_items_as_bytes">`DatasetClient.get_items_as_bytes()`</ApiLink> instead — the signature and behavior are identical.
+
+Before (v2):
+
+```python
+raw = client.dataset('my-dataset').download_items(item_format='csv')
+```
+
+After (v3):
+
+```python
+raw = client.dataset('my-dataset').get_items_as_bytes(item_format='csv')
+```
+
+### `batch_add_requests`: `max_unprocessed_requests_retries` and `min_delay_between_unprocessed_requests_retries`
+
+Both arguments have been removed from <ApiLink to="class/RequestQueueClient#batch_add_requests">`RequestQueueClient.batch_add_requests()`</ApiLink> and <ApiLink to="class/RequestQueueClientAsync#batch_add_requests">`RequestQueueClientAsync.batch_add_requests()`</ApiLink>. They had no effect already in v2 — passing them only emitted a `DeprecationWarning`. Drop them from your call sites.
+
+Before (v2):
+
+```python
+rq_client.batch_add_requests(
+    requests,
+    max_unprocessed_requests_retries=3,
+    min_delay_between_unprocessed_requests_retries=timedelta(seconds=1),
+)
+```
+
+After (v3):
+
+```python
+rq_client.batch_add_requests(requests)
+```
+
+### `list_requests`: `exclusive_start_id`
+
+The deprecated `exclusive_start_id` argument has been removed from <ApiLink to="class/RequestQueueClient#list_requests">`RequestQueueClient.list_requests()`</ApiLink> and <ApiLink to="class/RequestQueueClientAsync#list_requests">`RequestQueueClientAsync.list_requests()`</ApiLink>. Use the `cursor` argument together with `next_cursor` from the previous response (or <ApiLink to="class/RequestQueueClient#iterate_requests">`iterate_requests()`</ApiLink>, which handles pagination for you).
+
+Before (v2):
+
+```python
+page = rq_client.list_requests(limit=100)
+next_page = rq_client.list_requests(limit=100, exclusive_start_id=page.items[-1].id)
+```
+
+After (v3):
+
+```python
+page = rq_client.list_requests(limit=100)
+next_page = rq_client.list_requests(limit=100, cursor=page.next_cursor)
+```
diff --git a/src/apify_client/_resource_clients/dataset.py b/src/apify_client/_resource_clients/dataset.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import warnings
 from contextlib import asynccontextmanager, contextmanager
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any
@@ -289,104 +288,6 @@ def _callback(*, limit: int | None = None, offset: int | None = None) -> Dataset
 
         return get_items_iterator(_callback, limit=limit, offset=offset, chunk_size=chunk_size or DEFAULT_CHUNK_SIZE)
 
-    def download_items(
-        self,
-        *,
-        item_format: str = 'json',
-        offset: int | None = None,
-        limit: int | None = None,
-        desc: bool | None = None,
-        clean: bool | None = None,
-        bom: bool | None = None,
-        delimiter: str | None = None,
-        fields: list[str] | None = None,
-        omit: list[str] | None = None,
-        unwind: list[str] | None = None,
-        skip_empty: bool | None = None,
-        skip_header_row: bool | None = None,
-        skip_hidden: bool | None = None,
-        xml_root: str | None = None,
-        xml_row: str | None = None,
-        flatten: list[str] | None = None,
-        signature: str | None = None,
-        timeout: Timeout = 'long',
-    ) -> bytes:
-        """Get the items in the dataset as raw bytes.
-
-        Deprecated: this function is a deprecated alias of `get_items_as_bytes`. It will be removed in
-        a future version.
-
-        https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items
-
-        Args:
-            item_format: Format of the results, possible values are: json, jsonl, csv, html, xlsx, xml and rss.
-                The default value is json.
-            offset: Number of items that should be skipped at the start. The default value is 0.
-            limit: Maximum number of items to return. By default there is no limit.
-            desc: By default, results are returned in the same order as they were stored. To reverse the order,
-                set this parameter to True.
-            clean: If True, returns only non-empty items and skips hidden fields (i.e. fields starting with
-                the # character). The clean parameter is just a shortcut for skip_hidden=True and skip_empty=True
-                parameters. Note that since some objects might be skipped from the output, that the result might
-                contain less items than the limit value.
-            bom: All text responses are encoded in UTF-8 encoding. By default, csv files are prefixed with
-                the UTF-8 Byte Order Mark (BOM), while json, jsonl, xml, html and rss files are not. If you want
-                to override this default behavior, specify bom=True query parameter to include the BOM or bom=False
-                to skip it.
-            delimiter: A delimiter character for CSV files. The default delimiter is a simple comma (,).
-            fields: A list of fields which should be picked from the items, only these fields will remain in
-                the resulting record objects. Note that the fields in the outputted items are sorted the same way
-                as they are specified in the fields parameter. You can use this feature to effectively fix the
-                output format.
-            omit: A list of fields which should be omitted from the items.
-            unwind: A list of fields which should be unwound, in order which they should be processed. Each field
-                should be either an array or an object. If the field is an array then every element of the array
-                will become a separate record and merged with parent object. If the unwound field is an object then
-                it is merged with the parent object. If the unwound field is missing or its value is neither an array
-                nor an object and therefore cannot be merged with a parent object, then the item gets preserved
-                as it is. Note that the unwound items ignore the desc parameter.
-            skip_empty: If True, then empty items are skipped from the output. Note that if used, the results might
-                contain less items than the limit value.
-            skip_header_row: If True, then header row in the csv format is skipped.
-            skip_hidden: If True, then hidden fields are skipped from the output, i.e. fields starting with
-                the # character.
-            xml_root: Overrides default root element name of xml output. By default the root element is items.
-            xml_row: Overrides default element name that wraps each page or page function result object in xml output.
-                By default the element name is item.
-            flatten: A list of fields that should be flattened.
-            signature: Signature used to access the items.
-            timeout: Timeout for the API HTTP request.
-
-        Returns:
-            The dataset items as raw bytes.
-        """
-        warnings.warn(
-            '`DatasetClient.download_items()` is deprecated, use `DatasetClient.get_items_as_bytes()` instead.',
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
-        return self.get_items_as_bytes(
-            item_format=item_format,
-            offset=offset,
-            limit=limit,
-            desc=desc,
-            clean=clean,
-            bom=bom,
-            delimiter=delimiter,
-            fields=fields,
-            omit=omit,
-            unwind=unwind,
-            skip_empty=skip_empty,
-            skip_header_row=skip_header_row,
-            skip_hidden=skip_hidden,
-            xml_root=xml_root,
-            xml_row=xml_row,
-            flatten=flatten,
-            signature=signature,
-            timeout=timeout,
-        )
-
     def get_items_as_bytes(
         self,
         *,
diff --git a/src/apify_client/_resource_clients/request_queue.py b/src/apify_client/_resource_clients/request_queue.py
@@ -2,7 +2,6 @@
 
 import asyncio
 import math
-import warnings
 from collections.abc import Iterable
 from queue import Queue
 from typing import TYPE_CHECKING, Any, Literal
@@ -377,8 +376,6 @@ def batch_add_requests(
         *,
         forefront: bool = False,
         max_parallel: int = 1,
-        max_unprocessed_requests_retries: int | None = None,
-        min_delay_between_unprocessed_requests_retries: timedelta | None = None,
         timeout: Timeout = 'medium',
     ) -> BatchAddResult:
         """Add requests to the request queue in batches.
@@ -393,26 +390,11 @@ def batch_add_requests(
             max_parallel: Specifies the maximum number of parallel tasks for API calls. This is only applicable
                 to the async client. For the sync client, this value must be set to 1, as parallel execution
                 is not supported.
-            max_unprocessed_requests_retries: Deprecated argument. Will be removed in next major release.
-            min_delay_between_unprocessed_requests_retries: Deprecated argument. Will be removed in next major release.
             timeout: Timeout for the API HTTP request.
 
         Returns:
             Result containing lists of processed and unprocessed requests.
         """
-        if max_unprocessed_requests_retries:
-            warnings.warn(
-                '`max_unprocessed_requests_retries` is deprecated and not used anymore.',
-                DeprecationWarning,
-                stacklevel=2,
-            )
-        if min_delay_between_unprocessed_requests_retries:
-            warnings.warn(
-                '`min_delay_between_unprocessed_requests_retries` is deprecated and not used anymore.',
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
         if max_parallel != 1:
             raise NotImplementedError('max_parallel is only supported in async client')
 
@@ -514,7 +496,6 @@ def list_requests(
         filter: list[Literal['pending', 'locked']] | None = None,  # noqa: A002
         timeout: Timeout = 'medium',
         cursor: str | None = None,
-        exclusive_start_id: str | None = None,
     ) -> ListOfRequests:
         """List requests in the queue.
 
@@ -525,23 +506,11 @@ def list_requests(
             filter: List of request states to use as a filter. Multiple values mean union of the given filters.
             timeout: Timeout for the API HTTP request.
             cursor: A token returned in previous API response, to continue listing next page of requests
-            exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result.
         """
-        if exclusive_start_id and cursor:
-            raise ValueError('Cannot use both `exclusive_start_id` and `cursor` for paginating requests.')
-
-        if exclusive_start_id is not None:
-            warnings.warn(
-                '`exclusive_start_id` is deprecated for paginating requests. Use pagination using `cursor` instead.',
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
         request_params = self._build_params(
             limit=limit,
             filter=','.join(filter) if filter else None,
             clientKey=self.client_key,
-            exclusiveStartId=exclusive_start_id,
             cursor=cursor,
         )
 
@@ -979,8 +948,6 @@ async def batch_add_requests(
         *,
         forefront: bool = False,
         max_parallel: int = 5,
-        max_unprocessed_requests_retries: int | None = None,
-        min_delay_between_unprocessed_requests_retries: timedelta | None = None,
         timeout: Timeout = 'medium',
     ) -> BatchAddResult:
         """Add requests to the request queue in batches.
@@ -995,26 +962,11 @@ async def batch_add_requests(
             max_parallel: Specifies the maximum number of parallel tasks for API calls. This is only applicable
                 to the async client. For the sync client, this value must be set to 1, as parallel execution
                 is not supported.
-            max_unprocessed_requests_retries: Deprecated argument. Will be removed in next major release.
-            min_delay_between_unprocessed_requests_retries: Deprecated argument. Will be removed in next major release.
             timeout: Timeout for the API HTTP request.
 
         Returns:
             Result containing lists of processed and unprocessed requests.
         """
-        if max_unprocessed_requests_retries:
-            warnings.warn(
-                '`max_unprocessed_requests_retries` is deprecated and not used anymore.',
-                DeprecationWarning,
-                stacklevel=2,
-            )
-        if min_delay_between_unprocessed_requests_retries:
-            warnings.warn(
-                '`min_delay_between_unprocessed_requests_retries` is deprecated and not used anymore.',
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
         requests_as_dicts = [
             (
                 request
@@ -1126,7 +1078,6 @@ async def list_requests(
         filter: list[Literal['pending', 'locked']] | None = None,  # noqa: A002
         timeout: Timeout = 'medium',
         cursor: str | None = None,
-        exclusive_start_id: str | None = None,
     ) -> ListOfRequests:
         """List requests in the queue.
 
@@ -1137,23 +1088,11 @@ async def list_requests(
             filter: List of request states to use as a filter. Multiple values mean union of the given filters.
             timeout: Timeout for the API HTTP request.
             cursor: A token returned in previous API response, to continue listing next page of requests
-            exclusive_start_id: (deprecated) All requests up to this one (including) are skipped from the result.
         """
-        if exclusive_start_id and cursor:
-            raise ValueError('Cannot use both `exclusive_start_id` and `cursor` for paginating requests.')
-
-        if exclusive_start_id is not None:
-            warnings.warn(
-                '`exclusive_start_id` is deprecated for paginating requests. Use pagination using `cursor` instead.',
-                DeprecationWarning,
-                stacklevel=2,
-            )
-
         request_params = self._build_params(
             limit=limit,
             filter=','.join(filter) if filter else None,
             clientKey=self.client_key,
-            exclusiveStartId=exclusive_start_id,
             cursor=cursor,
         )
 
diff --git a/tests/unit/test_client_pagination.py b/tests/unit/test_client_pagination.py
@@ -235,14 +235,14 @@ def _handle_cursor_pagination(request: Request) -> Response:
     """Serve a cursor-paginated Apify API response for KVS keys and RQ requests.
 
     Holds 2500 synthetic items whose integer `id` equals their position. Each page is capped at 1000 items. KVS uses
-    `exclusiveStartKey`; RQ accepts either the deprecated `exclusiveStartId` on the initial call or the opaque `cursor`
-    on subsequent calls. All three values encode the last-seen item id as a string — the next page starts at id + 1.
+    `exclusiveStartKey`; RQ uses the opaque `cursor`. Both values encode the last-seen item id as a string — the
+    next page starts at id + 1.
     """
     params = request.args
     limit = _parse_int_param(params.get('limit'))
     assert limit >= 0, 'Invalid limit sent to API'
 
-    cursor_raw = params.get('exclusiveStartKey') or params.get('exclusiveStartId') or params.get('cursor')
+    cursor_raw = params.get('exclusiveStartKey') or params.get('cursor')
 
     total_items = NORMAL_ITEMS
     start = int(cursor_raw) + 1 if cursor_raw not in (None, '') else 0
@@ -617,17 +617,3 @@ async def test_rq_list_requests_iterable_async(
     client: RequestQueueClientAsync = _CLIENT_FACTORIES[client_name](_make_async_client(pagination_server))
     returned_items = [dict(item) async for item in client.iterate_requests(**inputs)]
     assert returned_items == expected_items
-
-
-def test_rq_list_requests_rejects_cursor_and_exclusive_start_id() -> None:
-    """Passing both `cursor` and `exclusive_start_id` is mutually exclusive and must error."""
-    client = ApifyClient(token='').request_queue(ID_PLACEHOLDER)
-    with pytest.raises(ValueError, match='Cannot use both'):
-        client.list_requests(cursor='a', exclusive_start_id='b')
-
-
-async def test_rq_list_requests_rejects_cursor_and_exclusive_start_id_async() -> None:
-    """Async variant of the mutual-exclusion check."""
-    client = ApifyClientAsync(token='').request_queue(ID_PLACEHOLDER)
-    with pytest.raises(ValueError, match='Cannot use both'):
-        await client.list_requests(cursor='a', exclusive_start_id='b')