docs: use Request.crawl_depth for depth tracking in BeautifulSoup and Parsel examples

vdusek · vdusek · commit 96d0d8a9163a · 2026-06-05T12:01:21.000+02:00
diff --git a/docs/03_guides/code/01_beautifulsoup_httpx.py b/docs/03_guides/code/01_beautifulsoup_httpx.py
@@ -23,23 +23,20 @@ async def main() -> None:
         # Open the default request queue for handling URLs to be processed.
         request_queue = await Actor.open_request_queue()
 
-        # Enqueue the start URLs with an initial crawl depth of 0.
+        # Enqueue the start URLs. Their crawl depth defaults to 0.
         for start_url in start_urls:
             url = start_url.get('url')
             Actor.log.info(f'Enqueuing {url} ...')
-            new_request = Request.from_url(url, user_data={'depth': 0})
-            await request_queue.add_request(new_request)
+            await request_queue.add_request(Request.from_url(url))
 
         # Create an HTTPX client to fetch the HTML content of the URLs.
         async with httpx.AsyncClient() as client:
             # Process the URLs from the request queue.
             while request := await request_queue.fetch_next_request():
                 url = request.url
 
-                if not isinstance(request.user_data['depth'], (str, int)):
-                    raise TypeError('Request.depth is an unexpected type.')
-
-                depth = int(request.user_data['depth'])
+                # Read the crawl depth tracked by the request itself.
+                depth = request.crawl_depth
                 Actor.log.info(f'Scraping {url} (depth={depth}) ...')
 
                 try:
@@ -58,10 +55,8 @@ async def main() -> None:
 
                             if link_url.startswith(('http://', 'https://')):
                                 Actor.log.info(f'Enqueuing {link_url} ...')
-                                new_request = Request.from_url(
-                                    link_url,
-                                    user_data={'depth': depth + 1},
-                                )
+                                new_request = Request.from_url(link_url)
+                                new_request.crawl_depth = depth + 1
                                 await request_queue.add_request(new_request)
 
                     # Extract the desired data.
@@ -81,7 +76,7 @@ async def main() -> None:
 
                 finally:
                     # Mark the request as handled to ensure it is not processed again.
-                    await request_queue.mark_request_as_handled(new_request)
+                    await request_queue.mark_request_as_handled(request)
 
 
 if __name__ == '__main__':
diff --git a/docs/03_guides/code/02_parsel_impit.py b/docs/03_guides/code/02_parsel_impit.py
@@ -23,23 +23,20 @@ async def main() -> None:
         # Open the default request queue for handling URLs to be processed.
         request_queue = await Actor.open_request_queue()
 
-        # Enqueue the start URLs with an initial crawl depth of 0.
+        # Enqueue the start URLs. Their crawl depth defaults to 0.
         for start_url in start_urls:
             url = start_url.get('url')
             Actor.log.info(f'Enqueuing {url} ...')
-            new_request = Request.from_url(url, user_data={'depth': 0})
-            await request_queue.add_request(new_request)
+            await request_queue.add_request(Request.from_url(url))
 
         # Create an Impit client to fetch the HTML content of the URLs.
         async with impit.AsyncClient() as client:
             # Process the URLs from the request queue.
             while request := await request_queue.fetch_next_request():
                 url = request.url
 
-                if not isinstance(request.user_data['depth'], (str, int)):
-                    raise TypeError('Request.depth is an unexpected type.')
-
-                depth = int(request.user_data['depth'])
+                # Read the crawl depth tracked by the request itself.
+                depth = request.crawl_depth
                 Actor.log.info(f'Scraping {url} (depth={depth}) ...')
 
                 try:
@@ -59,10 +56,8 @@ async def main() -> None:
 
                             if link_url.startswith(('http://', 'https://')):
                                 Actor.log.info(f'Enqueuing {link_url} ...')
-                                new_request = Request.from_url(
-                                    link_url,
-                                    user_data={'depth': depth + 1},
-                                )
+                                new_request = Request.from_url(link_url)
+                                new_request.crawl_depth = depth + 1
                                 await request_queue.add_request(new_request)
 
                     # Extract the desired data using Parsel selectors.