88from crawlee .http_clients ._base import HttpClient
99from crawlee .request_loaders ._sitemap_request_loader import SitemapRequestLoader
1010from crawlee .storages import KeyValueStore
11- from tests .unit .utils import run_alone_on_mac
1211
1312BASIC_SITEMAP = """
1413<?xml version="1.0" encoding="UTF-8"?>
@@ -51,24 +50,22 @@ def encode_base64(data: bytes) -> str:
5150 return base64 .b64encode (data ).decode ('utf-8' )
5251
5352
54- @run_alone_on_mac
5553async def test_sitemap_traversal (server_url : URL , http_client : HttpClient ) -> None :
5654 sitemap_url = (server_url / 'sitemap.xml' ).with_query (base64 = encode_base64 (BASIC_SITEMAP .encode ()))
5755 sitemap_loader = SitemapRequestLoader ([str (sitemap_url )], http_client = http_client )
5856
5957 while not await sitemap_loader .is_finished ():
6058 item = await sitemap_loader .fetch_next_request ()
61- assert item is not None
6259
63- await sitemap_loader .mark_request_as_handled (item )
60+ if item :
61+ await sitemap_loader .mark_request_as_handled (item )
6462
6563 assert await sitemap_loader .is_empty ()
6664 assert await sitemap_loader .is_finished ()
6765 assert await sitemap_loader .get_total_count () == 5
6866 assert await sitemap_loader .get_handled_count () == 5
6967
7068
71- @run_alone_on_mac
7269async def test_is_empty_does_not_depend_on_fetch_next_request (server_url : URL , http_client : HttpClient ) -> None :
7370 sitemap_url = (server_url / 'sitemap.xml' ).with_query (base64 = encode_base64 (BASIC_SITEMAP .encode ()))
7471 sitemap_loader = SitemapRequestLoader ([str (sitemap_url )], http_client = http_client )
@@ -88,6 +85,9 @@ async def test_is_empty_does_not_depend_on_fetch_next_request(server_url: URL, h
8885 await sitemap_loader .mark_request_as_handled (item )
8986
9087 assert await sitemap_loader .is_empty ()
88+
89+ await asyncio .sleep (0.1 )
90+
9191 assert await sitemap_loader .is_finished ()
9292
9393
@@ -195,12 +195,13 @@ def transform_request(request_options: RequestOptions) -> RequestOptions | Reque
195195
196196 while not await sitemap_loader .is_finished ():
197197 request = await sitemap_loader .fetch_next_request ()
198- assert request is not None
199- assert request .user_data .get ('transformed' ) is True
200198
201- extracted_urls .add (request .url )
199+ if request :
200+ assert request .user_data .get ('transformed' ) is True
201+
202+ extracted_urls .add (request .url )
202203
203- await sitemap_loader .mark_request_as_handled (request )
204+ await sitemap_loader .mark_request_as_handled (request )
204205
205206 assert len (extracted_urls ) == 5
206207 assert extracted_urls == {
0 commit comments