22
33import asyncio
44from contextlib import asynccontextmanager
5- from typing import TYPE_CHECKING , Any
5+ from http .cookiejar import Cookie
6+ from typing import TYPE_CHECKING , Any , cast
67
78from curl_cffi import CurlInfo
89from curl_cffi .const import CurlHttpVersion
1516from curl_cffi .requests .impersonate import DEFAULT_CHROME as CURL_DEFAULT_CHROME
1617from typing_extensions import override
1718
18- from crawlee ._types import HttpHeaders , HttpPayload
19+ from crawlee ._types import HttpHeaders , HttpMethod , HttpPayload
1920from crawlee ._utils .blocked import ROTATE_PROXY_ERRORS
2021from crawlee ._utils .docs import docs_group
2122from crawlee .errors import ProxyError
2425if TYPE_CHECKING :
2526 from collections .abc import AsyncGenerator
2627 from datetime import timedelta
27- from http .cookiejar import Cookie
2828
2929 from curl_cffi import Curl
3030 from curl_cffi .requests import Request as CurlRequest
3131 from curl_cffi .requests import Response
32+ from curl_cffi .requests .session import HttpMethod as CurlHttpMethod
3233
3334 from crawlee import Request
3435 from crawlee ._types import HttpMethod
@@ -90,13 +91,15 @@ def headers(self) -> HttpHeaders:
9091 async def read (self ) -> bytes :
9192 if self ._response .astream_task :
9293 raise RuntimeError ('Use `read_stream` to read the body of the Response received from the `stream` method' )
94+
9395 return self ._response .content
9496
9597 async def read_stream (self ) -> AsyncGenerator [bytes , None ]:
96- if not self ._response .astream_task or self ._response .astream_task .done (): # ty: ignore[possibly-missing-attribute]
97- raise RuntimeError (
98- 'Cannot read stream: either already consumed or Response not obtained from `stream` method'
99- )
98+ if not self ._response .astream_task :
99+ raise RuntimeError ('Cannot read stream, Response not obtained from `stream` method.' )
100+
101+ if isinstance (self ._response .astream_task , asyncio .Future ) and self ._response .astream_task .done ():
102+ raise RuntimeError ('Cannot read stream, it was already consumed.' )
100103
101104 async for chunk in self ._response .aiter_content ():
102105 yield chunk
@@ -156,7 +159,7 @@ async def crawl(
156159 try :
157160 response = await client .request (
158161 url = request .url ,
159- method = request .method . upper (), # ty: ignore[invalid-argument-type]
162+ method = self . _convert_method ( request .method ),
160163 headers = request .headers ,
161164 data = request .payload ,
162165 cookies = session .cookies .jar if session else None ,
@@ -203,7 +206,7 @@ async def send_request(
203206 try :
204207 response = await client .request (
205208 url = url ,
206- method = method . upper (), # ty: ignore[invalid-argument-type]
209+ method = self . _convert_method ( method ),
207210 headers = dict (headers ) if headers else None ,
208211 data = payload ,
209212 cookies = session .cookies .jar if session else None ,
@@ -244,7 +247,7 @@ async def stream(
244247 try :
245248 response = await client .request (
246249 url = url ,
247- method = method . upper (), # ty: ignore[invalid-argument-type]
250+ method = self . _convert_method ( method ),
248251 headers = dict (headers ) if headers else None ,
249252 data = payload ,
250253 cookies = session .cookies .jar if session else None ,
@@ -291,6 +294,40 @@ def _get_client(self, proxy_url: str | None) -> AsyncSession:
291294
292295 return self ._client_by_proxy_url [proxy_url ]
293296
297+ def _convert_method (self , method : HttpMethod ) -> CurlHttpMethod :
298+ """Convert from Crawlee HTTP method to curl-cffi HTTP method.
299+
300+ Args:
301+ method: Crawlee HTTP method.
302+
303+ Returns:
304+ Corresponding curl-cffi HTTP method.
305+
306+ Raises:
307+ ValueError: If the provided HTTP method is not supported.
308+ """
309+ method_upper = method .upper () # curl-cffi requires uppercase methods
310+
311+ match method_upper :
312+ case 'GET' :
313+ return 'GET'
314+ case 'POST' :
315+ return 'POST'
316+ case 'PUT' :
317+ return 'PUT'
318+ case 'DELETE' :
319+ return 'DELETE'
320+ case 'OPTIONS' :
321+ return 'OPTIONS'
322+ case 'HEAD' :
323+ return 'HEAD'
324+ case 'TRACE' :
325+ return 'TRACE'
326+ case 'PATCH' :
327+ return 'PATCH'
328+ case _:
329+ raise ValueError (f'HTTP method { method } is not supported in { self .__class__ .__name__ } .' )
330+
294331 @staticmethod
295332 def _is_proxy_error (error : CurlRequestError ) -> bool :
296333 """Determine whether the given error is related to a proxy issue.
@@ -308,11 +345,16 @@ def _is_proxy_error(error: CurlRequestError) -> bool:
308345
309346 @staticmethod
310347 def _get_cookies (curl : Curl ) -> list [Cookie ]:
311- cookies : list [Cookie ] = []
312- for curl_cookie in curl .getinfo (CurlInfo .COOKIELIST ): # ty: ignore[not-iterable]
313- curl_morsel = CurlMorsel .from_curl_format (curl_cookie ) # ty: ignore[invalid-argument-type]
348+ cookies = list [Cookie ]()
349+
350+ # Implementation of getinfo always returns list[bytes] for CurlInfo.COOKIELIST.
351+ cookie_list = cast ('list[bytes]' , curl .getinfo (CurlInfo .COOKIELIST ))
352+
353+ for curl_cookie in cookie_list :
354+ curl_morsel = CurlMorsel .from_curl_format (curl_cookie )
314355 cookie = curl_morsel .to_cookiejar_cookie ()
315356 cookies .append (cookie )
357+
316358 return cookies
317359
318360 async def cleanup (self ) -> None :
0 commit comments