|
2 | 2 |
|
3 | 3 | import asyncio |
4 | 4 | import re |
5 | | -from asyncio import Task |
6 | 5 | from typing import Annotated, Any |
7 | 6 |
|
8 | 7 | from pydantic import BaseModel, Field, TypeAdapter |
@@ -114,49 +113,34 @@ async def _fetch_requests_from_url( |
114 | 113 | ) -> list[Request]: |
115 | 114 | """Create list of requests from url. |
116 | 115 |
|
117 | | - Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Run extracting |
118 | | - callback on each response body and use URL_NO_COMMAS_REGEX regex to find all links. Create list of Requests from |
119 | | - collected links and additional inputs stored in other attributes of each remote_url_requests_inputs. |
| 116 | + Send GET requests to urls defined in each requests_from_url of remote_url_requests_inputs. Extract links from |
| 117 | + each response body using URL_NO_COMMAS_REGEX regex. Create list of Requests from collected links and additional |
| 118 | + inputs stored in other attributes of each remote_url_requests_inputs. |
120 | 119 | """ |
121 | 120 | created_requests: list[Request] = [] |
122 | 121 |
|
123 | | - async def create_requests_from_response(request_input: _RequestsFromUrlInput, task: Task) -> None: |
124 | | - """Extract links from response body and use them to create `Request` objects. |
| 122 | + # Fetch all remote URLs in parallel. |
| 123 | + responses = await asyncio.gather( |
| 124 | + *[ |
| 125 | + http_client.send_request(method='GET', url=remote_url_input.requests_from_url) |
| 126 | + for remote_url_input in remote_url_requests_inputs |
| 127 | + ] |
| 128 | + ) |
125 | 129 |
|
126 | | - Use the regular expression to find all matching links in the response body, then create `Request` |
127 | | - objects from these links and the provided input attributes. |
128 | | - """ |
129 | | - response = await (task.result()).read() |
130 | | - matches = re.finditer(URL_NO_COMMAS_REGEX, response.decode('utf-8')) |
| 130 | + # Process each response and extract links. |
| 131 | + for request_input, http_response in zip(remote_url_requests_inputs, responses, strict=True): |
| 132 | + response_body = await http_response.read() |
| 133 | + matches = re.finditer(URL_NO_COMMAS_REGEX, response_body.decode('utf-8')) |
131 | 134 |
|
132 | 135 | created_requests.extend( |
133 | | - [ |
134 | | - Request.from_url( |
135 | | - match.group(0), |
136 | | - method=request_input.method, |
137 | | - payload=request_input.payload.encode('utf-8'), |
138 | | - headers=request_input.headers, |
139 | | - user_data=request_input.user_data, |
140 | | - ) |
141 | | - for match in matches |
142 | | - ] |
143 | | - ) |
144 | | - |
145 | | - remote_url_requests = [] |
146 | | - for remote_url_requests_input in remote_url_requests_inputs: |
147 | | - get_response_task = asyncio.create_task( |
148 | | - http_client.send_request( |
149 | | - method='GET', |
150 | | - url=remote_url_requests_input.requests_from_url, |
151 | | - ) |
152 | | - ) |
153 | | - |
154 | | - get_response_task.add_done_callback( |
155 | | - lambda task, inp=remote_url_requests_input: asyncio.create_task( |
156 | | - create_requests_from_response(inp, task) |
| 136 | + Request.from_url( |
| 137 | + match.group(0), |
| 138 | + method=request_input.method, |
| 139 | + payload=request_input.payload.encode('utf-8'), |
| 140 | + headers=request_input.headers, |
| 141 | + user_data=request_input.user_data, |
157 | 142 | ) |
| 143 | + for match in matches |
158 | 144 | ) |
159 | | - remote_url_requests.append(get_response_task) |
160 | 145 |
|
161 | | - await asyncio.gather(*remote_url_requests) |
162 | 146 | return created_requests |
0 commit comments