Skip to content

Commit b906909

Browse files
committed
revert the asyncio to thread lambda changes
1 parent f5682da commit b906909

File tree

6 files changed

+29
-25
lines changed

6 files changed

+29
-25
lines changed

docs/guides/code_examples/http_crawlers/selectolax_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ async def parse(self, response: HttpResponse) -> LexborHTMLParser:
2222
"""Parse HTTP response body into a document object."""
2323
response_body = await response.read()
2424
# Run parsing in a thread to avoid blocking the event loop.
25-
return await asyncio.to_thread(lambda: LexborHTMLParser(response_body))
25+
return await asyncio.to_thread(LexborHTMLParser, response_body)
2626

2727
@override
2828
async def parse_text(self, text: str) -> LexborHTMLParser:

src/crawlee/browsers/_playwright_browser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ async def new_context(self, **context_options: Any) -> BrowserContext:
7979
async def _delete_temp_dir(self, _: BrowserContext | None) -> None:
8080
if self._temp_dir and self._temp_dir.exists():
8181
temp_dir = self._temp_dir
82-
await asyncio.to_thread(lambda: shutil.rmtree(temp_dir, ignore_errors=True))
82+
await asyncio.to_thread(shutil.rmtree, temp_dir, ignore_errors=True)
8383

8484
@override
8585
async def close(self, **kwargs: Any) -> None:

src/crawlee/crawlers/_parsel/_parsel_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class ParselParser(AbstractHttpParser[Selector, Selector]):
2222
@override
2323
async def parse(self, response: HttpResponse) -> Selector:
2424
response_body = await response.read()
25-
return await asyncio.to_thread(lambda: Selector(body=response_body))
25+
return await asyncio.to_thread(Selector, body=response_body)
2626

2727
@override
2828
async def parse_text(self, text: str) -> Selector:

src/crawlee/storage_clients/_file_system/_dataset_client.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ async def open(
120120
dataset_base_path = Path(configuration.storage_dir) / cls._STORAGE_SUBDIR
121121

122122
if not dataset_base_path.exists():
123-
await asyncio.to_thread(lambda: dataset_base_path.mkdir(parents=True, exist_ok=True))
123+
await asyncio.to_thread(dataset_base_path.mkdir, parents=True, exist_ok=True)
124124

125125
# Get a new instance by ID.
126126
if id:
@@ -134,7 +134,7 @@ async def open(
134134
continue
135135

136136
try:
137-
file = await asyncio.to_thread(lambda p=path_to_metadata: p.open(mode='r', encoding='utf-8'))
137+
file = await asyncio.to_thread(path_to_metadata.open, mode='r', encoding='utf-8')
138138
try:
139139
file_content = json.load(file)
140140
metadata = DatasetMetadata(**file_content)
@@ -163,7 +163,7 @@ async def open(
163163

164164
# If the dataset directory exists, reconstruct the client from the metadata file.
165165
if path_to_dataset.exists() and path_to_metadata.exists():
166-
file = await asyncio.to_thread(lambda: path_to_metadata.open(mode='r', encoding='utf-8'))
166+
file = await asyncio.to_thread(path_to_metadata.open, mode='r', encoding='utf-8')
167167
try:
168168
file_content = json.load(file)
169169
finally:
@@ -211,7 +211,7 @@ async def drop(self) -> None:
211211
async def purge(self) -> None:
212212
async with self._lock:
213213
for file_path in await self._get_sorted_data_files():
214-
await asyncio.to_thread(lambda f=file_path: f.unlink(missing_ok=True))
214+
await asyncio.to_thread(file_path.unlink, missing_ok=True)
215215

216216
await self._update_metadata(
217217
update_accessed_at=True,
@@ -435,7 +435,7 @@ async def _update_metadata(
435435
self._metadata.item_count = new_item_count
436436

437437
# Ensure the parent directory for the metadata file exists.
438-
await asyncio.to_thread(lambda: self.path_to_metadata.parent.mkdir(parents=True, exist_ok=True))
438+
await asyncio.to_thread(self.path_to_metadata.parent.mkdir, parents=True, exist_ok=True)
439439

440440
# Dump the serialized metadata to the file.
441441
data = await json_dumps(self._metadata.model_dump())
@@ -456,7 +456,7 @@ async def _push_item(self, item: dict[str, Any], item_id: int) -> None:
456456
file_path = self.path_to_dataset / filename
457457

458458
# Ensure the dataset directory exists.
459-
await asyncio.to_thread(lambda: self.path_to_dataset.mkdir(parents=True, exist_ok=True))
459+
await asyncio.to_thread(self.path_to_dataset.mkdir, parents=True, exist_ok=True)
460460

461461
# Dump the serialized item to the file.
462462
data = await json_dumps(item)

src/crawlee/storage_clients/_file_system/_key_value_store_client.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import asyncio
4+
import functools
45
import json
56
import shutil
67
import urllib.parse
@@ -119,7 +120,7 @@ async def open(
119120
kvs_base_path = Path(configuration.storage_dir) / cls._STORAGE_SUBDIR
120121

121122
if not kvs_base_path.exists():
122-
await asyncio.to_thread(lambda: kvs_base_path.mkdir(parents=True, exist_ok=True))
123+
await asyncio.to_thread(kvs_base_path.mkdir, parents=True, exist_ok=True)
123124

124125
# Get a new instance by ID.
125126
if id:
@@ -133,7 +134,7 @@ async def open(
133134
continue
134135

135136
try:
136-
file = await asyncio.to_thread(lambda p=path_to_metadata: p.open(mode='r', encoding='utf-8'))
137+
file = await asyncio.to_thread(path_to_metadata.open, mode='r', encoding='utf-8')
137138
try:
138139
file_content = json.load(file)
139140
metadata = KeyValueStoreMetadata(**file_content)
@@ -162,7 +163,7 @@ async def open(
162163

163164
# If the key-value store directory exists, reconstruct the client from the metadata file.
164165
if path_to_kvs.exists() and path_to_metadata.exists():
165-
file = await asyncio.to_thread(lambda: path_to_metadata.open(mode='r', encoding='utf-8'))
166+
file = await asyncio.to_thread(path_to_metadata.open, mode='r', encoding='utf-8')
166167
try:
167168
file_content = json.load(file)
168169
finally:
@@ -212,7 +213,7 @@ async def purge(self) -> None:
212213
for file_path in self.path_to_kvs.glob('*'):
213214
if file_path.name == METADATA_FILENAME:
214215
continue
215-
await asyncio.to_thread(lambda f=file_path: f.unlink(missing_ok=True))
216+
await asyncio.to_thread(file_path.unlink, missing_ok=True)
216217

217218
await self._update_metadata(
218219
update_accessed_at=True,
@@ -239,7 +240,9 @@ async def get_value(self, *, key: str) -> KeyValueStoreRecord | None:
239240
# Read the metadata file
240241
async with self._lock:
241242
try:
242-
file = await asyncio.to_thread(lambda: record_metadata_filepath.open(mode='r', encoding='utf-8'))
243+
file = await asyncio.to_thread(
244+
functools.partial(record_metadata_filepath.open, mode='r', encoding='utf-8'),
245+
)
243246
except FileNotFoundError:
244247
logger.warning(f'Metadata file disappeared for key "{key}", aborting get_value')
245248
return None
@@ -346,11 +349,11 @@ async def delete_value(self, *, key: str) -> None:
346349
async with self._lock:
347350
# Delete the value file and its metadata if found
348351
if record_path.exists():
349-
await asyncio.to_thread(lambda: record_path.unlink(missing_ok=True))
352+
await asyncio.to_thread(record_path.unlink, missing_ok=True)
350353

351354
# Delete the metadata file if it exists
352355
if metadata_path.exists():
353-
await asyncio.to_thread(lambda: metadata_path.unlink(missing_ok=True))
356+
await asyncio.to_thread(metadata_path.unlink, missing_ok=True)
354357
else:
355358
logger.warning(f'Found value file for key "{key}" but no metadata file when trying to delete it.')
356359

@@ -395,7 +398,7 @@ async def iterate_keys(
395398

396399
# Try to read and parse the metadata file
397400
try:
398-
metadata_content = await asyncio.to_thread(lambda f=file_path: f.read_text(encoding='utf-8'))
401+
metadata_content = await asyncio.to_thread(file_path.read_text, encoding='utf-8')
399402
except FileNotFoundError:
400403
logger.warning(f'Metadata file disappeared for key "{key_name}", skipping it.')
401404
continue
@@ -475,7 +478,7 @@ async def _update_metadata(
475478
self._metadata.modified_at = now
476479

477480
# Ensure the parent directory for the metadata file exists.
478-
await asyncio.to_thread(lambda: self.path_to_metadata.parent.mkdir(parents=True, exist_ok=True))
481+
await asyncio.to_thread(self.path_to_metadata.parent.mkdir, parents=True, exist_ok=True)
479482

480483
# Dump the serialized metadata to the file.
481484
data = await json_dumps(self._metadata.model_dump())

src/crawlee/storage_clients/_file_system/_request_queue_client.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import asyncio
4+
import functools
45
import json
56
import shutil
67
from collections import deque
@@ -183,7 +184,7 @@ async def open(
183184
rq_base_path = Path(configuration.storage_dir) / cls._STORAGE_SUBDIR
184185

185186
if not rq_base_path.exists():
186-
await asyncio.to_thread(lambda: rq_base_path.mkdir(parents=True, exist_ok=True))
187+
await asyncio.to_thread(rq_base_path.mkdir, parents=True, exist_ok=True)
187188

188189
# Open an existing RQ by its ID, raise an error if not found.
189190
if id:
@@ -197,7 +198,7 @@ async def open(
197198
continue
198199

199200
try:
200-
file = await asyncio.to_thread(lambda p=path_to_metadata: p.open(mode='r', encoding='utf-8'))
201+
file = await asyncio.to_thread(path_to_metadata.open, mode='r', encoding='utf-8')
201202
try:
202203
file_content = json.load(file)
203204
metadata = RequestQueueMetadata(**file_content)
@@ -232,7 +233,7 @@ async def open(
232233

233234
# If the RQ directory exists, reconstruct the client from the metadata file.
234235
if path_to_rq.exists() and path_to_metadata.exists():
235-
file = await asyncio.to_thread(lambda: path_to_metadata.open(encoding='utf-8'))
236+
file = await asyncio.to_thread(path_to_metadata.open, encoding='utf-8')
236237
try:
237238
file_content = json.load(file)
238239
finally:
@@ -300,7 +301,7 @@ async def purge(self) -> None:
300301
request_files = await self._get_request_files(self.path_to_rq)
301302

302303
for file_path in request_files:
303-
await asyncio.to_thread(lambda f=file_path: f.unlink(missing_ok=True))
304+
await asyncio.to_thread(file_path.unlink, missing_ok=True)
304305

305306
# Clear recoverable state
306307
await self._state.reset()
@@ -675,7 +676,7 @@ async def _update_metadata(
675676
self._metadata.had_multiple_clients = True
676677

677678
# Ensure the parent directory for the metadata file exists.
678-
await asyncio.to_thread(lambda: self.path_to_metadata.parent.mkdir(parents=True, exist_ok=True))
679+
await asyncio.to_thread(self.path_to_metadata.parent.mkdir, parents=True, exist_ok=True)
679680

680681
# Dump the serialized metadata to the file.
681682
data = await json_dumps(self._metadata.model_dump())
@@ -753,7 +754,7 @@ async def _get_request_files(cls, path_to_rq: Path) -> list[Path]:
753754
A list of paths to all request files.
754755
"""
755756
# Create the requests directory if it doesn't exist.
756-
await asyncio.to_thread(lambda: path_to_rq.mkdir(parents=True, exist_ok=True))
757+
await asyncio.to_thread(path_to_rq.mkdir, parents=True, exist_ok=True)
757758

758759
# List all the json files.
759760
files = await asyncio.to_thread(lambda: list(path_to_rq.glob('*.json')))
@@ -775,7 +776,7 @@ async def _parse_request_file(cls, file_path: Path) -> Request | None:
775776
"""
776777
# Open the request file.
777778
try:
778-
file = await asyncio.to_thread(lambda f=file_path: f.open(mode='r', encoding='utf-8'))
779+
file = await asyncio.to_thread(functools.partial(file_path.open, mode='r', encoding='utf-8'))
779780
except FileNotFoundError:
780781
logger.warning(f'Request file "{file_path}" not found.')
781782
return None

0 commit comments

Comments
 (0)