Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/on_schedule_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
fail-fast: false
max-parallel: 12
matrix:
crawler-type: ["playwright_camoufox", "playwright", "parsel", "beautifulsoup"]
http-client: [ "httpx", "curl_impersonate"]
crawler-type: ["playwright_camoufox", "playwright_chrome", "playwright_firefox", "playwright_webkit", "playwright", "parsel", "beautifulsoup"]
Comment thread
vdusek marked this conversation as resolved.
http-client: ["httpx", "curl_impersonate"]
Comment thread
Pijukatel marked this conversation as resolved.
package-manager: ["pip", "uv", "poetry"]

runs-on: "ubuntu-latest"
Expand Down
10 changes: 5 additions & 5 deletions src/crawlee/browsers/_playwright_browser_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,16 @@ def __init__(
'chromium_sandbox': not config.disable_browser_sandbox,
}

if browser_type == 'chrome' and default_launch_browser_options['executable_path']:
raise ValueError(
'Cannot use browser_type `chrome` with `Configuration.default_browser_path` or `executable_path` set.'
)

# Map 'chrome' to 'chromium' with the 'chrome' channel.
if browser_type == 'chrome':
browser_type = 'chromium'
# Chromium parameter 'channel' set to 'chrome' enables using installed Google Chrome.
default_launch_browser_options['channel'] = 'chrome'
if default_launch_browser_options['executable_path']:
Comment thread
Pijukatel marked this conversation as resolved.
Outdated
logger.debug(
f'Using browser executable from {default_launch_browser_options["executable_path"]},'
f" which takes precedence over 'chrome' channel."
)

self._browser_type: BrowserType = browser_type
self._browser_launch_options: dict[str, Any] = default_launch_browser_options | (browser_launch_options or {})
Comment thread
Pijukatel marked this conversation as resolved.
Outdated
Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/project_template/cookiecutter.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"project_name": "crawlee-python-project",
"__package_name": "{{ cookiecutter.project_name|lower|replace('-', '_') }}",
"crawler_type": ["beautifulsoup", "parsel", "playwright", "playwright-camoufox"],
"crawler_type": ["beautifulsoup", "parsel", "playwright", "playwright-camoufox", "playwright-chrome", "playwright-firefox", "playwright-webkit"],
Comment thread
vdusek marked this conversation as resolved.
"__crawler_type": "{{ cookiecutter.crawler_type|lower|replace('-', '_') }}",
"http_client": ["impit", "httpx", "curl-impersonate"],
"package_manager": ["poetry", "pip", "uv"],
Expand Down
14 changes: 14 additions & 0 deletions src/crawlee/project_template/templates/main_playwright_chrome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# % extends 'main.py'
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Closing parenthesis formatting

Files: templates/main_playwright_chrome.py,
templates/main_playwright_firefox.py, templates/main_playwright_webkit.py

The closing ) is on the same line as the last argument:

  {{ self.http_client_instantiation() }})

Consider:

  {{ self.http_client_instantiation() }}

)

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


# % block import
from crawlee.crawlers import PlaywrightCrawler
# % endblock

# % block instantiation
crawler = PlaywrightCrawler(
request_handler=router,
headless=True,
max_requests_per_crawl=10,
browser_type="chrome",
{{ self.http_client_instantiation() }})
# % endblock
14 changes: 14 additions & 0 deletions src/crawlee/project_template/templates/main_playwright_firefox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# % extends 'main.py'

# % block import
from crawlee.crawlers import PlaywrightCrawler
# % endblock

# % block instantiation
crawler = PlaywrightCrawler(
request_handler=router,
headless=True,
max_requests_per_crawl=10,
browser_type="firefox",
{{ self.http_client_instantiation() }})
# % endblock
14 changes: 14 additions & 0 deletions src/crawlee/project_template/templates/main_playwright_webkit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# % extends 'main.py'

# % block import
from crawlee.crawlers import PlaywrightCrawler
# % endblock

# % block instantiation
crawler = PlaywrightCrawler(
request_handler=router,
headless=True,
max_requests_per_crawl=10,
browser_type="webkit",
{{ self.http_client_instantiation() }})
# % endblock

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@
# % if cookiecutter.crawler_type == 'playwright'
FROM apify/actor-python-playwright:3.13
# % elif cookiecutter.crawler_type == 'playwright-camoufox'
# Currently camoufox has issues installing on Python 3.13
FROM apify/actor-python-playwright:3.12
FROM apify/actor-python-playwright-camoufox:3.13
# % elif cookiecutter.crawler_type == 'playwright-chrome'
FROM apify/actor-python-playwright-chrome:3.13
# % elif cookiecutter.crawler_type == 'playwright-firefox'
FROM apify/actor-python-playwright-firefox:3.13
# % elif cookiecutter.crawler_type == 'playwright-webkit'
FROM apify/actor-python-playwright-webkit:3.13
# % else
FROM apify/actor-python:3.13
# % endif
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# % if cookiecutter.crawler_type == 'playwright-camoufox'
# % if cookiecutter.crawler_type.startswith('playwright')
# % set extras = ['playwright']
# % else
# % set extras = [cookiecutter.crawler_type]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# % if cookiecutter.crawler_type == 'playwright-camoufox'
camoufox[geoip]~=0.4.5
# % endif
# % if cookiecutter.crawler_type.startswith('playwright')
Comment thread
vdusek marked this conversation as resolved.
# % set extras = ['playwright']
# % else
# % set extras = [cookiecutter.crawler_type]
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
# % if cookiecutter.crawler_type.startswith('playwright')
# % include 'routes_playwright.py'
# % else
# % include 'routes_%s.py' % cookiecutter.__crawler_type
# % endif
Comment thread
Pijukatel marked this conversation as resolved.
3 changes: 3 additions & 0 deletions tests/e2e/project_template/test_static_crawlers_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
'crawler_type',
[
pytest.param('playwright-camoufox', marks=pytest.mark.playwright_camoufox),
pytest.param('playwright-chrome', marks=pytest.mark.playwright_chrome),
pytest.param('playwright-firefox', marks=pytest.mark.playwright_firefox),
pytest.param('playwright-webkit', marks=pytest.mark.playwright_webkit),
pytest.param('playwright', marks=pytest.mark.playwright),
pytest.param('parsel', marks=pytest.mark.parsel),
pytest.param('beautifulsoup', marks=pytest.mark.beautifulsoup),
Expand Down
Loading