Skip to content

Commit cf3d6a3

Browse files
vdusekclaude
andcommitted
fix: Eliminate race condition in _fetch_requests_from_url
The previous implementation used `add_done_callback` + `asyncio.create_task` to process HTTP responses, but `asyncio.gather` only awaited the HTTP request tasks — not the callback-spawned processing tasks. This caused `created_requests` to be returned before processing completed, yielding empty or incomplete results. Refactored to gather all HTTP responses first, then process each response sequentially, ensuring all extracted URLs are collected before returning. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 252eb4e commit cf3d6a3

File tree

1 file changed

+21
-37
lines changed

1 file changed

+21
-37
lines changed

src/apify/request_loaders/_apify_request_list.py

Lines changed: 21 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import asyncio
44
import re
5-
from asyncio import Task
65
from typing import Annotated, Any
76

87
from pydantic import BaseModel, Field, TypeAdapter
@@ -114,49 +113,34 @@ async def _fetch_requests_from_url(
114113
) -> list[Request]:
115114
"""Create list of requests from url.
116115
117-
Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting
118-
callback on each response body and use URL_NO_COMMAS_REGEX regex to find all links. Create list of Requests from
119-
collected links and additional inputs stored in other attributes of each remote_url_requests_inputs.
116+
Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Extract links from
117+
each response body using URL_NO_COMMAS_REGEX regex. Create list of Requests from collected links and additional
118+
inputs stored in other attributes of each remote_url_requests_inputs.
120119
"""
121120
created_requests: list[Request] = []
122121

123-
async def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None:
124-
"""Extract links from response body and use them to create `Request` objects.
122+
# Fetch all remote URLs in parallel.
123+
responses = await asyncio.gather(
124+
*[
125+
http_client.send_request(method='GET', url=remote_url_input.requests_from_url)
126+
for remote_url_input in remote_url_requests_inputs
127+
]
128+
)
125129

126-
Use the regular expression to find all matching links in the response body, then create `Request`
127-
objects from these links and the provided input attributes.
128-
"""
129-
response = await (task.result()).read()
130-
matches = re.finditer(URL_NO_COMMAS_REGEX, response.decode('utf-8'))
130+
# Process each response and extract links.
131+
for request_input, http_response in zip(remote_url_requests_inputs, responses, strict=True):
132+
response_body = await http_response.read()
133+
matches = re.finditer(URL_NO_COMMAS_REGEX, response_body.decode('utf-8'))
131134

132135
created_requests.extend(
133-
[
134-
Request.from_url(
135-
match.group(0),
136-
method=request_input.method,
137-
payload=request_input.payload.encode('utf-8'),
138-
headers=request_input.headers,
139-
user_data=request_input.user_data,
140-
)
141-
for match in matches
142-
]
143-
)
144-
145-
remote_url_requests = []
146-
for remote_url_requests_input in remote_url_requests_inputs:
147-
get_response_task = asyncio.create_task(
148-
http_client.send_request(
149-
method='GET',
150-
url=remote_url_requests_input.requests_from_url,
151-
)
152-
)
153-
154-
get_response_task.add_done_callback(
155-
lambda task, inp=remote_url_requests_input: asyncio.create_task(
156-
create_requests_from_response(inp, task)
136+
Request.from_url(
137+
match.group(0),
138+
method=request_input.method,
139+
payload=request_input.payload.encode('utf-8'),
140+
headers=request_input.headers,
141+
user_data=request_input.user_data,
157142
)
143+
for match in matches
158144
)
159-
remote_url_requests.append(get_response_task)
160145

161-
await asyncio.gather(*remote_url_requests)
162146
return created_requests

0 commit comments

Comments
 (0)