Skip to content

Commit a4ccbde

Browse files
committed
add additional comments
1 parent e4a20b0 commit a4ccbde

2 files changed

Lines changed: 7 additions & 0 deletions

File tree

docs/guides/code_examples/http_crawlers/selectolax_context.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from crawlee.crawlers._abstract_http import ParsedHttpCrawlingContext
77

88

9+
# Custom context for Selectolax parser, you can add your own methods here
10+
# to facilitate working with the parsed document.
911
@dataclass(frozen=True)
1012
class SelectolaxLexborContext(ParsedHttpCrawlingContext[LexborHTMLParser]):
1113
"""Crawling context providing access to the parsed page.

docs/guides/code_examples/http_crawlers/selectolax_crawler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
from crawlee.crawlers._abstract_http import ParsedHttpCrawlingContext
1818

1919

20+
# Custom crawler using custom context, It is optional and you can use
21+
# AbstractHttpCrawler directly with SelectolaxLexborParser if you don't need
22+
# any custom context methods.
2023
class SelectolaxLexborCrawler(
2124
AbstractHttpCrawler[SelectolaxLexborContext, LexborHTMLParser, LexborNode]
2225
):
@@ -30,6 +33,8 @@ def __init__(
3033
async def final_step(
3134
context: ParsedHttpCrawlingContext[LexborHTMLParser],
3235
) -> AsyncGenerator[SelectolaxLexborContext, None]:
36+
# Yield custom context wrapping with additional functionality around the base
37+
# context.
3338
yield SelectolaxLexborContext.from_parsed_http_crawling_context(context)
3439

3540
# Build context pipeline: HTTP request -> parsing -> custom context.

0 commit comments

Comments
 (0)