Skip to content

Commit 747c0cf

Browse files
authored
feat: Use specialized Playwright docker images in templates (#1757)
### Description - Update templates to use a browser-specialized Playwright Docker image - Add specialized templates: `playwright-chrome`, `playwright-firefox`, `playwright-webkit` - Update e2e tests to include new templates - Update `PlaywrightBrowserPlugin` to not raise an Exception when browser `type='chrome'` and explicit `executable_path` is provided as well. Just add debug log instead. ### Issues - Closes: #1756 ### Testing - e2e tests ### Checklist - [ ] CI passed
1 parent fe94362 commit 747c0cf

File tree

14 files changed

+77
-50
lines changed

14 files changed

+77
-50
lines changed

.github/workflows/on_schedule_tests.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ jobs:
2424
fail-fast: false
2525
max-parallel: 12
2626
matrix:
27-
crawler-type: ["playwright_camoufox", "playwright", "parsel", "beautifulsoup"]
28-
http-client: [ "httpx", "curl_impersonate"]
27+
crawler-type: ["playwright_camoufox", "playwright_chrome", "playwright_firefox", "playwright_webkit", "playwright", "parsel", "beautifulsoup"]
28+
http-client: ["httpx", "curl_impersonate"]
2929
package-manager: ["pip", "uv", "poetry"]
3030

3131
runs-on: "ubuntu-latest"

src/crawlee/browsers/_playwright_browser_plugin.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,23 @@ def __init__(
8282
'executable_path': config.default_browser_path,
8383
'chromium_sandbox': not config.disable_browser_sandbox,
8484
}
85-
86-
if browser_type == 'chrome' and default_launch_browser_options['executable_path']:
87-
raise ValueError(
88-
'Cannot use browser_type `chrome` with `Configuration.default_browser_path` or `executable_path` set.'
89-
)
85+
explicit_browser_launch_options = browser_launch_options or {}
9086

9187
# Map 'chrome' to 'chromium' with the 'chrome' channel.
9288
if browser_type == 'chrome':
9389
browser_type = 'chromium'
9490
# Chromium parameter 'channel' set to 'chrome' enables using installed Google Chrome.
9591
default_launch_browser_options['channel'] = 'chrome'
9692

93+
if executable_path := explicit_browser_launch_options.get(
94+
'executable_path', default_launch_browser_options.get('executable_path')
95+
):
96+
logger.debug(
97+
f"Using browser executable from {executable_path}, which takes precedence over 'chrome' channel."
98+
)
99+
97100
self._browser_type: BrowserType = browser_type
98-
self._browser_launch_options: dict[str, Any] = default_launch_browser_options | (browser_launch_options or {})
101+
self._browser_launch_options: dict[str, Any] = default_launch_browser_options | explicit_browser_launch_options
99102
self._browser_new_context_options = browser_new_context_options or {}
100103
self._max_open_pages_per_browser = max_open_pages_per_browser
101104
self._use_incognito_pages = use_incognito_pages

src/crawlee/project_template/cookiecutter.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"project_name": "crawlee-python-project",
33
"__package_name": "{{ cookiecutter.project_name|lower|replace('-', '_') }}",
4-
"crawler_type": ["beautifulsoup", "parsel", "playwright", "playwright-camoufox"],
4+
"crawler_type": ["beautifulsoup", "parsel", "playwright", "playwright-camoufox", "playwright-chrome", "playwright-firefox", "playwright-webkit"],
55
"__crawler_type": "{{ cookiecutter.crawler_type|lower|replace('-', '_') }}",
66
"http_client": ["impit", "httpx", "curl-impersonate"],
77
"package_manager": ["poetry", "pip", "uv"],
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# % extends 'main.py'
2+
3+
# % block import
4+
from crawlee.crawlers import PlaywrightCrawler
5+
# % endblock
6+
7+
# % block instantiation
8+
crawler = PlaywrightCrawler(
9+
request_handler=router,
10+
headless=True,
11+
max_requests_per_crawl=10,
12+
browser_type="chrome",
13+
{{ self.http_client_instantiation() }}
14+
)
15+
# % endblock
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# % extends 'main.py'
2+
3+
# % block import
4+
from crawlee.crawlers import PlaywrightCrawler
5+
# % endblock
6+
7+
# % block instantiation
8+
crawler = PlaywrightCrawler(
9+
request_handler=router,
10+
headless=True,
11+
max_requests_per_crawl=10,
12+
browser_type="firefox",
13+
{{ self.http_client_instantiation() }}
14+
)
15+
# % endblock
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# % extends 'main.py'
2+
3+
# % block import
4+
from crawlee.crawlers import PlaywrightCrawler
5+
# % endblock
6+
7+
# % block instantiation
8+
crawler = PlaywrightCrawler(
9+
request_handler=router,
10+
headless=True,
11+
max_requests_per_crawl=10,
12+
browser_type="webkit",
13+
{{ self.http_client_instantiation() }}
14+
)
15+
# % endblock

src/crawlee/project_template/templates/routes_camoufox.py

Lines changed: 0 additions & 19 deletions
This file was deleted.

src/crawlee/project_template/templates/routes_playwright_camoufox.py

Lines changed: 0 additions & 19 deletions
This file was deleted.

src/crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,13 @@
44
# % if cookiecutter.crawler_type == 'playwright'
55
FROM apify/actor-python-playwright:3.13
66
# % elif cookiecutter.crawler_type == 'playwright-camoufox'
7-
# Currently camoufox has issues installing on Python 3.13
8-
FROM apify/actor-python-playwright:3.12
7+
FROM apify/actor-python-playwright-camoufox:3.13
8+
# % elif cookiecutter.crawler_type == 'playwright-chrome'
9+
FROM apify/actor-python-playwright-chrome:3.13
10+
# % elif cookiecutter.crawler_type == 'playwright-firefox'
11+
FROM apify/actor-python-playwright-firefox:3.13
12+
# % elif cookiecutter.crawler_type == 'playwright-webkit'
13+
FROM apify/actor-python-playwright-webkit:3.13
914
# % else
1015
FROM apify/actor-python:3.13
1116
# % endif

src/crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# % if cookiecutter.crawler_type == 'playwright-camoufox'
1+
# % if cookiecutter.crawler_type.startswith('playwright')
22
# % set extras = ['playwright']
33
# % else
44
# % set extras = [cookiecutter.crawler_type]

0 commit comments

Comments
 (0)