Skip to content

Commit 4db3e31

Browse files
authored
Merge pull request #276 from scrapy-plugins/scrapy-2.15
Add support for Scrapy 2.15
2 parents 6ed9008 + e112acf commit 4db3e31

6 files changed

Lines changed: 72 additions & 13 deletions

File tree

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# This runs the tests for the unreleased Scrapy master branch.
2+
# It doesn't run by default, to run it label your PR with the "test-scrapy-unreleased" label.
3+
name: scrapy-unreleased
4+
5+
permissions:
6+
contents: read
7+
8+
on:
9+
pull_request:
10+
types: [ opened, synchronize, reopened, labeled ]
11+
12+
jobs:
13+
test:
14+
if: contains(github.event.pull_request.labels.*.name, 'test-scrapy-unreleased')
15+
runs-on: ubuntu-latest
16+
17+
steps:
18+
- uses: actions/checkout@v6
19+
- name: Set up Python 3.13
20+
uses: actions/setup-python@v6
21+
with:
22+
python-version: "3.13"
23+
- name: Install dependencies
24+
run: |
25+
python -m pip install --upgrade pip
26+
python -m pip install tox
27+
- name: tox
28+
run: |
29+
tox -e scrapy-unreleased -- -n auto
30+
- name: coverage
31+
uses: codecov/codecov-action@v5
32+
with:
33+
token: ${{ secrets.CODECOV_TOKEN }}

.github/workflows/test.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33

44
name: tox
55

6+
permissions:
7+
contents: read
8+
69
on:
710
push:
811
branches: [ main ]

tests/__init__.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,12 @@ class DummySpider(Spider):
8686

8787

8888
async def get_crawler(
89-
settings=None, spider_cls=DummySpider, setup_engine=True, use_addon=False, poet=True
89+
settings=None,
90+
spider_cls=DummySpider,
91+
setup_engine=True,
92+
start_handler=False,
93+
use_addon=False,
94+
poet=True,
9095
):
9196
settings = settings or {}
9297
base_settings: SETTINGS_T = deepcopy(SETTINGS if not use_addon else SETTINGS_ADDON)
@@ -95,7 +100,7 @@ async def get_crawler(
95100
final_settings.setdefault("ADDONS", {})["scrapy_poet.Addon"] = 300
96101
crawler = _get_crawler(settings_dict=final_settings, spidercls=spider_cls)
97102
if setup_engine:
98-
await setup_crawler_engine(crawler)
103+
await setup_crawler_engine(crawler, start_handler=start_handler)
99104
return crawler
100105

101106

@@ -117,7 +122,9 @@ async def make_handler(
117122
):
118123
if api_url is not None:
119124
settings["ZYTE_API_URL"] = api_url
120-
crawler = await get_crawler(settings, use_addon=use_addon)
125+
crawler = await get_crawler(
126+
settings, setup_engine=True, start_handler=True, use_addon=use_addon
127+
)
121128
handler = get_download_handler(crawler, "https")
122129
if not isinstance(handler, _ScrapyZyteAPIBaseDownloadHandler):
123130
# i.e. ZYTE_API_ENABLED=False
@@ -164,7 +171,7 @@ def set_env(**env_vars):
164171
environ.update(old_environ)
165172

166173

167-
async def setup_crawler_engine(crawler: Crawler):
174+
async def setup_crawler_engine(crawler: Crawler, start_handler: bool = False) -> None:
168175
"""Run the crawl steps until engine setup, so that crawler.engine is not
169176
None.
170177
@@ -175,9 +182,10 @@ async def setup_crawler_engine(crawler: Crawler):
175182
crawler.spider = crawler._create_spider()
176183
crawler.engine = crawler._create_engine()
177184

178-
handler = get_download_handler(crawler, "https")
179-
if hasattr(handler, "engine_started"):
180-
await handler.engine_started()
185+
if start_handler:
186+
handler = get_download_handler(crawler, "https")
187+
if hasattr(handler, "engine_started"):
188+
await handler.engine_started()
181189

182190

183191
async def download_request(handler, request) -> Response:

tests/test_api_requests.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2945,7 +2945,7 @@ async def test_automap_all_cookies(meta):
29452945
"ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True,
29462946
"ZYTE_API_TRANSPARENT_MODE": True,
29472947
}
2948-
crawler = await get_crawler(settings)
2948+
crawler = await get_crawler(settings, start_handler=True)
29492949
cookie_middleware = get_downloader_middleware(crawler, CookiesMiddleware)
29502950
handler = get_download_handler(crawler, "https")
29512951
param_parser = handler._param_parser
@@ -3029,6 +3029,7 @@ async def test_automap_all_cookies(meta):
30293029
# {"name": "c", "value": "d", "domain": "b.example"},
30303030
]
30313031
)
3032+
await handler._close()
30323033

30333034

30343035
@pytest.mark.parametrize(
@@ -3053,7 +3054,7 @@ async def test_automap_cookie_jar(meta):
30533054
"ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True,
30543055
"ZYTE_API_TRANSPARENT_MODE": True,
30553056
}
3056-
crawler = await get_crawler(settings)
3057+
crawler = await get_crawler(settings, start_handler=True)
30573058
cookie_middleware = get_downloader_middleware(crawler, CookiesMiddleware)
30583059
handler = get_download_handler(crawler, "https")
30593060
param_parser = handler._param_parser
@@ -3090,6 +3091,7 @@ async def test_automap_cookie_jar(meta):
30903091
{"name": "z", "value": "y", "domain": "example.com"},
30913092
]
30923093
)
3094+
await handler._close()
30933095

30943096

30953097
@pytest.mark.parametrize(
@@ -3106,7 +3108,7 @@ async def test_automap_cookie_limit(meta, caplog):
31063108
"ZYTE_API_MAX_COOKIES": 1,
31073109
"ZYTE_API_TRANSPARENT_MODE": True,
31083110
}
3109-
crawler = await get_crawler(settings)
3111+
crawler = await get_crawler(settings, start_handler=True)
31103112
cookie_middleware = get_downloader_middleware(crawler, CookiesMiddleware)
31113113
handler = get_download_handler(crawler, "https")
31123114
param_parser = handler._param_parser
@@ -3196,6 +3198,7 @@ async def test_automap_cookie_limit(meta, caplog):
31963198
assert "would get 2 cookies" in caplog.text
31973199
assert "limited to 1 cookies" in caplog.text
31983200
caplog.clear()
3201+
await handler._close()
31993202

32003203

32013204
class CustomCookieJar(CookieJar):
@@ -3241,7 +3244,7 @@ async def test_automap_custom_cookie_middleware():
32413244
"ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True,
32423245
"ZYTE_API_TRANSPARENT_MODE": True,
32433246
}
3244-
crawler = await get_crawler(settings)
3247+
crawler = await get_crawler(settings, start_handler=True)
32453248
cookie_middleware = get_downloader_middleware(crawler, mw_cls)
32463249
handler = get_download_handler(crawler, "https")
32473250
param_parser = handler._param_parser
@@ -3252,6 +3255,7 @@ async def test_automap_custom_cookie_middleware():
32523255
assert api_params["experimental"]["requestCookies"] == [
32533256
{"name": "z", "value": "y", "domain": "example.com"}
32543257
]
3258+
await handler._close()
32553259

32563260

32573261
@pytest.mark.parametrize(

tests/test_handler.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,11 @@ async def test_concurrency_configuration(concurrency):
6565
**SETTINGS,
6666
"CONCURRENT_REQUESTS": concurrency,
6767
}
68-
crawler = await get_crawler_zyte_api(settings=settings)
68+
crawler = await get_crawler_zyte_api(settings=settings, start_handler=True)
6969
handler = get_download_handler(crawler, "https")
7070
assert handler._client.n_conn == concurrency
7171
assert handler._session._session.connector.limit == concurrency
72+
await handler._close()
7273

7374

7475
ETH_KEY = "c85ef7d79691fe79573b1a7064c5232332f53bb1b44a08f1a737f57a68a4706e"
@@ -274,6 +275,7 @@ async def test_retry_policy(
274275
async with make_handler(settings) as handler:
275276
req = Request("https://example.com", meta=meta)
276277
unmocked_session = handler._session
278+
await unmocked_session.close()
277279
handler._session = mock.AsyncMock(unmocked_session)
278280
handler._session.get.return_value = {
279281
"browserHtml": "",
@@ -528,6 +530,7 @@ async def test_log_request_truncate(
528530
meta = {"zyte_api": input_params}
529531
request = Request("https://example.com", meta=meta)
530532
unmocked_session = handler._session
533+
await unmocked_session.close()
531534
handler._session = mock.AsyncMock(unmocked_session)
532535
handler._session.get.return_value = {
533536
"browserHtml": "",
@@ -575,9 +578,10 @@ async def test_trust_env(enabled):
575578
settings["ZYTE_API_USE_ENV_PROXY"] = enabled
576579
else:
577580
enabled = False
578-
crawler = await get_crawler_zyte_api(settings=settings)
581+
crawler = await get_crawler_zyte_api(settings=settings, start_handler=True)
579582
handler = get_download_handler(crawler, "https")
580583
assert handler._session._session._trust_env == enabled
584+
await handler._close()
581585

582586

583587
@pytest.mark.parametrize(
@@ -765,6 +769,7 @@ async def test_download_request_limits(
765769
):
766770
settings: SETTINGS_T = {"DOWNLOAD_WARNSIZE": warnsize, "DOWNLOAD_MAXSIZE": maxsize}
767771
async with make_handler(settings, mockserver.urljoin("/")) as handler:
772+
await handler._session.close()
768773
handler._session = mock.AsyncMock()
769774
handler._session.get.return_value = mock.Mock(body=b"x" * body_size)
770775

tox.ini

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,12 @@ extras = provider
173173
[testenv:x402]
174174
extras = x402
175175

176+
[testenv:scrapy-unreleased]
177+
deps =
178+
{[testenv]deps}
179+
scrapy @ git+https://github.com/scrapy/scrapy@master
180+
extras = provider
181+
176182
[testenv:twine]
177183
deps =
178184
twine==6.1.0

0 commit comments

Comments
 (0)