|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | from typing import TYPE_CHECKING |
| 4 | +from unittest.mock import AsyncMock |
4 | 5 |
|
5 | 6 | import pytest |
6 | 7 |
|
7 | 8 | from crawlee.browsers import BrowserPool, PlaywrightBrowserPlugin |
| 9 | +from crawlee.browsers._browser_controller import BrowserController |
| 10 | +from crawlee.browsers._types import CrawleePage |
8 | 11 | from tests.unit.utils import run_alone_on_mac |
9 | 12 |
|
10 | 13 | if TYPE_CHECKING: |
| 14 | + from collections.abc import Mapping |
| 15 | + from typing import Any |
| 16 | + |
11 | 17 | from yarl import URL |
12 | 18 |
|
| 19 | + from crawlee.proxy_configuration import ProxyInfo |
| 20 | + |
13 | 21 |
|
14 | 22 | async def test_default_plugin_new_page_creation(server_url: URL) -> None: |
15 | 23 | async with BrowserPool() as browser_pool: |
@@ -192,3 +200,157 @@ async def test_browser_pool_retire_browser_after_page_count( |
192 | 200 | assert first_browser is second_browser |
193 | 201 | else: |
194 | 202 | assert first_browser is not second_browser |
| 203 | + |
| 204 | + |
| 205 | +async def test_pre_page_create_hook_is_called() -> None: |
| 206 | + call_mock = AsyncMock() |
| 207 | + |
| 208 | + async with BrowserPool() as browser_pool: |
| 209 | + |
| 210 | + @browser_pool.pre_page_create_hook |
| 211 | + async def hook( |
| 212 | + page_id: str, |
| 213 | + controller: BrowserController, |
| 214 | + browser_new_context_options: dict[str, Any], |
| 215 | + proxy_info: ProxyInfo | None, |
| 216 | + ) -> None: |
| 217 | + await call_mock(page_id, controller, browser_new_context_options, proxy_info) |
| 218 | + |
| 219 | + browser_new_context_options['user_agent'] = 'Modified User-Agent' |
| 220 | + |
| 221 | + assert len(controller.pages) == 0 |
| 222 | + |
| 223 | + test_page = await browser_pool.new_page() |
| 224 | + user_agent = await test_page.page.evaluate('navigator.userAgent') |
| 225 | + |
| 226 | + await test_page.page.close() |
| 227 | + |
| 228 | + assert user_agent == 'Modified User-Agent' |
| 229 | + |
| 230 | + call_mock.assert_awaited_once() |
| 231 | + page_id, controller, _, proxy_info = call_mock.call_args[0] |
| 232 | + |
| 233 | + assert isinstance(page_id, str) |
| 234 | + assert test_page.id == page_id |
| 235 | + assert isinstance(controller, BrowserController) |
| 236 | + assert proxy_info is None |
| 237 | + |
| 238 | + |
| 239 | +async def test_post_page_create_hook_is_called() -> None: |
| 240 | + call_mock = AsyncMock() |
| 241 | + |
| 242 | + async with BrowserPool() as browser_pool: |
| 243 | + |
| 244 | + @browser_pool.post_page_create_hook |
| 245 | + async def hook(crawlee_page: CrawleePage, controller: BrowserController) -> None: |
| 246 | + await call_mock(crawlee_page, controller) |
| 247 | + await crawlee_page.page.evaluate('window.__hook_applied = true') |
| 248 | + |
| 249 | + assert isinstance(crawlee_page, CrawleePage) |
| 250 | + |
| 251 | + assert len(controller.pages) == 1 |
| 252 | + |
| 253 | + test_page = await browser_pool.new_page() |
| 254 | + |
| 255 | + js_result = await test_page.page.evaluate('window.__hook_applied') |
| 256 | + |
| 257 | + await test_page.page.close() |
| 258 | + |
| 259 | + assert js_result is True |
| 260 | + |
| 261 | + call_mock.assert_awaited_once() |
| 262 | + crawlee_page, controller = call_mock.call_args[0] |
| 263 | + |
| 264 | + assert test_page is crawlee_page |
| 265 | + assert isinstance(controller, BrowserController) |
| 266 | + |
| 267 | + |
| 268 | +async def test_pre_page_close_hook() -> None: |
| 269 | + call_mock = AsyncMock() |
| 270 | + |
| 271 | + async with BrowserPool() as browser_pool: |
| 272 | + |
| 273 | + @browser_pool.pre_page_close_hook |
| 274 | + async def hook(crawlee_page: CrawleePage, controller: BrowserController) -> None: |
| 275 | + await call_mock(crawlee_page, controller) |
| 276 | + |
| 277 | + assert not crawlee_page.page.is_closed() |
| 278 | + assert len(controller.pages) == 1 |
| 279 | + |
| 280 | + test_page = await browser_pool.new_page() |
| 281 | + await test_page.page.close() |
| 282 | + |
| 283 | + call_mock.assert_awaited_once() |
| 284 | + assert test_page.page.is_closed() |
| 285 | + |
| 286 | + |
| 287 | +async def test_post_page_close_hook() -> None: |
| 288 | + call_mock = AsyncMock() |
| 289 | + |
| 290 | + async with BrowserPool() as browser_pool: |
| 291 | + |
| 292 | + @browser_pool.post_page_close_hook |
| 293 | + async def hook(page_id: str, controller: BrowserController) -> None: |
| 294 | + await call_mock(page_id, controller) |
| 295 | + |
| 296 | + assert len(controller.pages) == 0 |
| 297 | + |
| 298 | + test_page = await browser_pool.new_page() |
| 299 | + await test_page.page.close() |
| 300 | + |
| 301 | + page_id, controller = call_mock.call_args[0] |
| 302 | + |
| 303 | + call_mock.assert_awaited_once() |
| 304 | + assert test_page.id == page_id |
| 305 | + assert isinstance(controller, BrowserController) |
| 306 | + |
| 307 | + |
| 308 | +async def test_page_hooks_execution_order() -> None: |
| 309 | + call_order: list[str] = [] |
| 310 | + |
| 311 | + async with BrowserPool() as browser_pool: |
| 312 | + |
| 313 | + @browser_pool.pre_page_create_hook |
| 314 | + async def pre_create( |
| 315 | + _page_id: str, |
| 316 | + _controller: BrowserController, |
| 317 | + _browser_new_context_options: Mapping[str, Any], |
| 318 | + _proxy_info: ProxyInfo | None, |
| 319 | + ) -> None: |
| 320 | + call_order.append('pre_create') |
| 321 | + |
| 322 | + @browser_pool.post_page_create_hook |
| 323 | + async def post_create(_crawlee_page: CrawleePage, _controller: BrowserController) -> None: |
| 324 | + call_order.append('post_create') |
| 325 | + |
| 326 | + @browser_pool.pre_page_close_hook |
| 327 | + async def pre_close(_crawlee_page: CrawleePage, _controller: BrowserController) -> None: |
| 328 | + call_order.append('pre_close') |
| 329 | + |
| 330 | + @browser_pool.post_page_close_hook |
| 331 | + async def post_close(_page_id: str, _controller: BrowserController) -> None: |
| 332 | + call_order.append('post_close') |
| 333 | + |
| 334 | + page = await browser_pool.new_page() |
| 335 | + await page.page.close() |
| 336 | + |
| 337 | + assert call_order == ['pre_create', 'post_create', 'pre_close', 'post_close'] |
| 338 | + |
| 339 | + |
| 340 | +async def test_multiple_hooks_all_called() -> None: |
| 341 | + call_order: list[str] = [] |
| 342 | + |
| 343 | + async with BrowserPool() as browser_pool: |
| 344 | + |
| 345 | + @browser_pool.post_page_create_hook |
| 346 | + async def first(_crawlee_page: CrawleePage, _controller: BrowserController) -> None: |
| 347 | + call_order.append('first') |
| 348 | + |
| 349 | + @browser_pool.post_page_create_hook |
| 350 | + async def second(_crawlee_page: CrawleePage, _controller: BrowserController) -> None: |
| 351 | + call_order.append('second') |
| 352 | + |
| 353 | + page = await browser_pool.new_page() |
| 354 | + await page.page.close() |
| 355 | + |
| 356 | + assert call_order == ['first', 'second'] |
0 commit comments