Skip to content

Commit ccdb23b

Browse files
committed
use BrowserPool for test
1 parent 8c0dae6 commit ccdb23b

File tree

1 file changed

+42
-41
lines changed

1 file changed

+42
-41
lines changed

tests/unit/crawlers/_playwright/test_playwright_crawler.py

Lines changed: 42 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
SkippedReason,
2121
service_locator,
2222
)
23+
from crawlee.browsers import BrowserPool, PlaywrightBrowserPlugin
2324
from crawlee.configuration import Configuration
2425
from crawlee.crawlers import PlaywrightCrawler
2526
from crawlee.fingerprint_suite import (
@@ -336,47 +337,47 @@ async def test_isolation_cookies(*, use_incognito_pages: bool, server_url: URL)
336337
sessions_cookies: dict[str, dict[str, str]] = {}
337338
response_cookies: dict[str, dict[str, str]] = {}
338339

339-
crawler = PlaywrightCrawler(
340-
session_pool=SessionPool(max_pool_size=1),
341-
use_incognito_pages=use_incognito_pages,
342-
concurrency_settings=ConcurrencySettings(desired_concurrency=1, max_concurrency=1),
343-
)
344-
345-
@crawler.router.default_handler
346-
async def handler(context: PlaywrightCrawlingContext) -> None:
347-
if not context.session:
348-
return
349-
350-
sessions_ids.append(context.session.id)
351-
sessions[context.session.id] = context.session
352-
353-
if context.request.unique_key == '1':
354-
# With the second request, we check the cookies in the session and set retire
355-
await context.add_requests(
356-
[
357-
Request.from_url(
358-
str(server_url.with_path('/cookies')), unique_key='2', user_data={'retire_session': True}
359-
)
360-
]
361-
)
362-
return
363-
364-
response_data = json.loads(await context.response.text())
365-
response_cookies[context.session.id] = response_data.get('cookies')
366-
367-
if context.request.user_data.get('retire_session'):
368-
context.session.retire()
369-
370-
if context.request.unique_key == '2':
371-
# The third request is made with a new session to make sure it does not use another session's cookies
372-
await context.add_requests([Request.from_url(str(server_url.with_path('/cookies')), unique_key='3')])
340+
async with BrowserPool(plugins=[PlaywrightBrowserPlugin(use_incognito_pages=use_incognito_pages)]) as browser_pool:
341+
crawler = PlaywrightCrawler(
342+
session_pool=SessionPool(max_pool_size=1),
343+
concurrency_settings=ConcurrencySettings(desired_concurrency=1, max_concurrency=1),
344+
browser_pool=browser_pool,
345+
)
373346

374-
await crawler.run(
375-
[
376-
# The first request sets the cookie in the session
377-
Request.from_url(str(server_url.with_path('set_cookies').extend_query(a=1)), unique_key='1'),
378-
]
379-
)
347+
@crawler.router.default_handler
348+
async def handler(context: PlaywrightCrawlingContext) -> None:
349+
if not context.session:
350+
return
351+
352+
sessions_ids.append(context.session.id)
353+
sessions[context.session.id] = context.session
354+
response_data = json.loads(await context.response.text())
355+
response_cookies[context.session.id] = response_data.get('cookies')
356+
357+
if context.request.unique_key == '1':
358+
# With the second request, we check the cookies in the session and set retire
359+
await context.add_requests(
360+
[
361+
Request.from_url(
362+
str(server_url.with_path('/cookies')), unique_key='2', user_data={'retire_session': True}
363+
)
364+
]
365+
)
366+
return
367+
368+
if context.request.user_data.get('retire_session'):
369+
context.session.retire()
370+
371+
if context.request.unique_key == '2':
372+
# The third request is made with a new session to make sure it does not use another session's cookies
373+
await context.add_requests([Request.from_url(str(server_url.with_path('/cookies')), unique_key='3')])
374+
375+
await crawler.run(
376+
[
377+
# The first request sets the cookie in the session
378+
Request.from_url(str(server_url.with_path('set_cookies').extend_query(a=1)), unique_key='1'),
379+
]
380+
)
380381

381382
assert len(response_cookies) == 2
382383
assert len(sessions) == 2
@@ -402,7 +403,7 @@ async def handler(context: PlaywrightCrawlingContext) -> None:
402403
# The initiated cookies must match in both the response and the session store
403404
assert sessions_cookies[cookie_session_id] == response_cookies[cookie_session_id] == {'a': '1'}
404405

405-
# For a clean session, the cookie should not be in the sesstion store or in the response
406+
# For a clean session, the cookie should not be in the session store or in the response
406407
# This way we can be sure that no cookies are being leaked through the http client
407408
assert sessions_cookies[clean_session_id] == response_cookies[clean_session_id] == {}
408409
# Without `use_incognito_pages` we will have access to the session cookie,

0 commit comments

Comments
 (0)