2020 SkippedReason ,
2121 service_locator ,
2222)
23+ from crawlee .browsers import BrowserPool , PlaywrightBrowserPlugin
2324from crawlee .configuration import Configuration
2425from crawlee .crawlers import PlaywrightCrawler
2526from crawlee .fingerprint_suite import (
@@ -336,47 +337,47 @@ async def test_isolation_cookies(*, use_incognito_pages: bool, server_url: URL)
336337 sessions_cookies : dict [str , dict [str , str ]] = {}
337338 response_cookies : dict [str , dict [str , str ]] = {}
338339
339- crawler = PlaywrightCrawler (
340- session_pool = SessionPool (max_pool_size = 1 ),
341- use_incognito_pages = use_incognito_pages ,
342- concurrency_settings = ConcurrencySettings (desired_concurrency = 1 , max_concurrency = 1 ),
343- )
344-
345- @crawler .router .default_handler
346- async def handler (context : PlaywrightCrawlingContext ) -> None :
347- if not context .session :
348- return
349-
350- sessions_ids .append (context .session .id )
351- sessions [context .session .id ] = context .session
352-
353- if context .request .unique_key == '1' :
354- # With the second request, we check the cookies in the session and set retire
355- await context .add_requests (
356- [
357- Request .from_url (
358- str (server_url .with_path ('/cookies' )), unique_key = '2' , user_data = {'retire_session' : True }
359- )
360- ]
361- )
362- return
363-
364- response_data = json .loads (await context .response .text ())
365- response_cookies [context .session .id ] = response_data .get ('cookies' )
366-
367- if context .request .user_data .get ('retire_session' ):
368- context .session .retire ()
369-
370- if context .request .unique_key == '2' :
371- # The third request is made with a new session to make sure it does not use another session's cookies
372- await context .add_requests ([Request .from_url (str (server_url .with_path ('/cookies' )), unique_key = '3' )])
340+ async with BrowserPool (plugins = [PlaywrightBrowserPlugin (use_incognito_pages = use_incognito_pages )]) as browser_pool :
341+ crawler = PlaywrightCrawler (
342+ session_pool = SessionPool (max_pool_size = 1 ),
343+ concurrency_settings = ConcurrencySettings (desired_concurrency = 1 , max_concurrency = 1 ),
344+ browser_pool = browser_pool ,
345+ )
373346
374- await crawler .run (
375- [
376- # The first request sets the cookie in the session
377- Request .from_url (str (server_url .with_path ('set_cookies' ).extend_query (a = 1 )), unique_key = '1' ),
378- ]
379- )
347+ @crawler .router .default_handler
348+ async def handler (context : PlaywrightCrawlingContext ) -> None :
349+ if not context .session :
350+ return
351+
352+ sessions_ids .append (context .session .id )
353+ sessions [context .session .id ] = context .session
354+ response_data = json .loads (await context .response .text ())
355+ response_cookies [context .session .id ] = response_data .get ('cookies' )
356+
357+ if context .request .unique_key == '1' :
358+ # With the second request, we check the cookies in the session and set retire
359+ await context .add_requests (
360+ [
361+ Request .from_url (
362+ str (server_url .with_path ('/cookies' )), unique_key = '2' , user_data = {'retire_session' : True }
363+ )
364+ ]
365+ )
366+ return
367+
368+ if context .request .user_data .get ('retire_session' ):
369+ context .session .retire ()
370+
371+ if context .request .unique_key == '2' :
372+ # The third request is made with a new session to make sure it does not use another session's cookies
373+ await context .add_requests ([Request .from_url (str (server_url .with_path ('/cookies' )), unique_key = '3' )])
374+
375+ await crawler .run (
376+ [
377+ # The first request sets the cookie in the session
378+ Request .from_url (str (server_url .with_path ('set_cookies' ).extend_query (a = 1 )), unique_key = '1' ),
379+ ]
380+ )
380381
381382 assert len (response_cookies ) == 2
382383 assert len (sessions ) == 2
@@ -402,7 +403,7 @@ async def handler(context: PlaywrightCrawlingContext) -> None:
402403 # The initiated cookies must match in both the response and the session store
403404 assert sessions_cookies [cookie_session_id ] == response_cookies [cookie_session_id ] == {'a' : '1' }
404405
405- # For a clean session, the cookie should not be in the sesstion store or in the response
406+ # For a clean session, the cookie should not be in the session store or in the response
406407 # This way we can be sure that no cookies are being leaked through the http client
407408 assert sessions_cookies [clean_session_id ] == response_cookies [clean_session_id ] == {}
408409 # Without `use_incognito_pages` we will have access to the session cookie,
0 commit comments