Skip to content

Commit 87a846b

Browse files
vdusekclaude
andcommitted
perf: offload BeautifulSoup parsing to a thread via asyncio.to_thread()
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 02a18ea commit 87a846b

1 file changed

Lines changed: 4 additions & 2 deletions

File tree

src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_parser.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import asyncio
34
from typing import TYPE_CHECKING, Literal
45

56
from bs4 import BeautifulSoup, Tag
@@ -23,11 +24,12 @@ def __init__(self, parser: BeautifulSoupParserType = 'lxml') -> None:
2324

2425
@override
2526
async def parse(self, response: HttpResponse) -> BeautifulSoup:
26-
return BeautifulSoup(await response.read(), features=self._parser)
27+
body = await response.read()
28+
return await asyncio.to_thread(BeautifulSoup, body, features=self._parser)
2729

2830
@override
2931
async def parse_text(self, text: str) -> BeautifulSoup:
30-
return BeautifulSoup(text, features=self._parser)
32+
return await asyncio.to_thread(BeautifulSoup, text, features=self._parser)
3133

3234
@override
3335
def is_matching_selector(self, parsed_content: Tag, selector: str) -> bool:

0 commit comments

Comments
 (0)