Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions src/apify/storage_clients/_apify/_request_queue_single_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,14 @@ async def _list_head(self) -> None:

# Update the cached data
for request_data in response.get('items', []):
# Due to https://github.com/apify/apify-core/blob/v0.1377.0/src/api/src/lib/request_queues/request_queue.ts#L53,
# the list_head endpoint may return truncated fields for long requests (e.g., long URLs or unique keys).
# If truncation is detected, fetch the full request data by its ID from the API.
# This is a temporary workaround - the caching will be refactored to use request IDs instead of unique keys.
# See https://github.com/apify/apify-sdk-python/issues/630 for details.
if '[truncated]' in request_data['uniqueKey'] or '[truncated]' in request_data['url']:
request_data = await self._api_client.get_request(request_id=request_data['id']) # noqa: PLW2901

request = Request.model_validate(request_data)

if request.unique_key in self._requests_in_progress:
Expand All @@ -248,15 +256,14 @@ async def _list_head(self) -> None:
# Only fetch the request if we do not know it yet.
if request.unique_key not in self._requests_cache:
request_id = unique_key_to_request_id(request.unique_key)
complete_request_data = await self._api_client.get_request(request_id)

if complete_request_data is not None:
request = Request.model_validate(complete_request_data)
self._requests_cache[request.unique_key] = request
else:
if request_data is not None and request_id != request_data['id']:
logger.warning(
f'Could not fetch request data for unique_key=`{request.unique_key}` (id=`{request_id}`)'
f'Request ID mismatch: {request_id} != {request_data["id"]}, '
'this may cause unexpected behavior.'
)
full_request_data = await self._api_client.get_request(request_id)
Comment thread
vdusek marked this conversation as resolved.
Outdated
request = Request.model_validate(full_request_data)
self._requests_cache[request.unique_key] = request

# Add new requests to the end of the head, unless already present in head
if request.unique_key not in self._head_requests:
Expand Down
38 changes: 38 additions & 0 deletions tests/integration/test_actor_request_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,3 +489,41 @@ async def main() -> None:
run_result = await run_actor(actor)

assert run_result.status == 'SUCCEEDED'


@pytest.mark.only
async def test_rq_long_url(
make_actor: MakeActorFunction,
run_actor: RunActorFunction,
) -> None:
async def main() -> None:
from apify import Actor, Request
from apify.storage_clients._apify._utils import unique_key_to_request_id

url = 'https://portal.isoss.gov.cz/irj/portal/anonymous/mvrest?path=/eosm-public-offer&officeLabels=%7B%7D&page=1&pageSize=100000&sortColumn=zdatzvsm&sortOrder=-1'

async with Actor:
request = Request.from_url(
url=url,
use_extended_unique_key=True,
always_enqueue=True,
)

rq = await Actor.open_request_queue(force_cloud=True)
request_id = unique_key_to_request_id(request.unique_key)

processed_request = await rq.add_request(request)
assert processed_request.id == request_id

request_obtained = await rq.fetch_next_request()
assert request_obtained is not None

await rq.mark_request_as_handled(request_obtained)

is_finished = await rq.is_finished()
assert is_finished

actor = await make_actor(label='long-url', main_func=main)
run_result = await run_actor(actor)

assert run_result.status == 'SUCCEEDED'