|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from urllib.parse import ParseResult, urlparse |
| 4 | + |
| 5 | +import pytest |
| 6 | +from scrapy import Request, Spider |
| 7 | +from scrapy.core.downloader.handlers.http11 import TunnelError |
| 8 | +from scrapy.crawler import Crawler |
| 9 | +from scrapy.exceptions import NotConfigured |
| 10 | + |
| 11 | +from apify import ProxyConfiguration |
| 12 | +from apify.scrapy.middlewares import ApifyHttpProxyMiddleware |
| 13 | + |
| 14 | + |
| 15 | +class DummySpider(Spider): |
| 16 | + name = 'dummy_spider' |
| 17 | + |
| 18 | + |
| 19 | +@pytest.fixture() |
| 20 | +def middleware() -> ApifyHttpProxyMiddleware: |
| 21 | + """Fixture to create an Apify HTTP proxy middleware.""" |
| 22 | + proxy_settings = {'useApifyProxy': True} |
| 23 | + return ApifyHttpProxyMiddleware(proxy_settings) |
| 24 | + |
| 25 | + |
| 26 | +@pytest.fixture() |
| 27 | +def crawler(monkeypatch: pytest.MonkeyPatch) -> Crawler: |
| 28 | + """Fixture to create a Scrapy crawler.""" |
| 29 | + crawler = Crawler(DummySpider) |
| 30 | + monkeypatch.setattr(crawler, 'settings', {}) |
| 31 | + return crawler |
| 32 | + |
| 33 | + |
| 34 | +@pytest.fixture() |
| 35 | +def spider() -> DummySpider: |
| 36 | + """Fixture to create a "dummy" Scrapy spider.""" |
| 37 | + return DummySpider() |
| 38 | + |
| 39 | + |
| 40 | +@pytest.fixture() |
| 41 | +def dummy_request() -> Request: |
| 42 | + """Fixture to create a "dummy" Scrapy spider.""" |
| 43 | + return Request('https://example.com') |
| 44 | + |
| 45 | + |
| 46 | +@pytest.fixture() |
| 47 | +def proxy_configuration() -> ProxyConfiguration: |
| 48 | + """Fixture to create an Apify ProxyConfiguration object.""" |
| 49 | + return ProxyConfiguration() |
| 50 | + |
| 51 | + |
| 52 | +@pytest.mark.parametrize( |
| 53 | + ('settings', 'expected_exception'), |
| 54 | + [ |
| 55 | + ({'APIFY_PROXY_SETTINGS': {'useApifyProxy': True}}, None), |
| 56 | + ({'APIFY_PROXY_SETTINGS': {'useApifyProxy': True, 'apifyProxyGroups': []}}, None), |
| 57 | + ({}, NotConfigured), |
| 58 | + ({'a': 1}, NotConfigured), |
| 59 | + ({'APIFY_PROXY_SETTINGS': {}}, NotConfigured), |
| 60 | + ({'APIFY_PROXY_SETTINGS': {'useApifyProxy': None}}, NotConfigured), |
| 61 | + ({'APIFY_PROXY_SETTINGS': {'useApifyProxy': False}}, NotConfigured), |
| 62 | + ], |
| 63 | +) |
| 64 | +def test__from_crawler( |
| 65 | + crawler: Crawler, |
| 66 | + monkeypatch: pytest.MonkeyPatch, |
| 67 | + settings: dict, |
| 68 | + expected_exception: type[Exception] | None, |
| 69 | +) -> None: |
| 70 | + monkeypatch.setattr(crawler, 'settings', settings) |
| 71 | + |
| 72 | + if expected_exception is None: |
| 73 | + middleware = ApifyHttpProxyMiddleware.from_crawler(crawler) |
| 74 | + assert middleware._proxy_settings == settings['APIFY_PROXY_SETTINGS'] |
| 75 | + |
| 76 | + else: |
| 77 | + with pytest.raises(expected_exception): |
| 78 | + ApifyHttpProxyMiddleware.from_crawler(crawler) |
| 79 | + |
| 80 | + |
| 81 | +@pytest.mark.parametrize( |
| 82 | + 'expected_proxy_url', |
| 83 | + ['http://username:password@proxy.example.com:8080', 'http://hsdfgds:52354325@proxy.apify.com:5748'], |
| 84 | +) |
| 85 | +async def test__get_new_proxy_url( |
| 86 | + monkeypatch: pytest.MonkeyPatch, |
| 87 | + middleware: ApifyHttpProxyMiddleware, |
| 88 | + proxy_configuration: ProxyConfiguration, |
| 89 | + expected_proxy_url: str, |
| 90 | +) -> None: |
| 91 | + async def mock_new_url() -> str: |
| 92 | + return expected_proxy_url |
| 93 | + |
| 94 | + monkeypatch.setattr(proxy_configuration, 'new_url', mock_new_url) |
| 95 | + middleware._proxy_cfg_internal = proxy_configuration |
| 96 | + proxy_url = await middleware._get_new_proxy_url() |
| 97 | + assert proxy_url == urlparse(expected_proxy_url) |
| 98 | + |
| 99 | + |
| 100 | +@pytest.mark.parametrize( |
| 101 | + ('proxy_url', 'expected_exception', 'expected_request_header'), |
| 102 | + [ |
| 103 | + ('http://username:password@proxy.example.com:8080', None, b'Basic dXNlcm5hbWU6cGFzc3dvcmQ='), |
| 104 | + ('http://user123:pass456@proxy.apify.com:5748', None, b'Basic dXNlcjEyMzpwYXNzNDU2'), |
| 105 | + ('http://@proxy.example.com:2943', ValueError, b''), |
| 106 | + ], |
| 107 | +) |
| 108 | +async def test__process_request( |
| 109 | + monkeypatch: pytest.MonkeyPatch, |
| 110 | + middleware: ApifyHttpProxyMiddleware, |
| 111 | + spider: DummySpider, |
| 112 | + dummy_request: Request, |
| 113 | + proxy_url: str, |
| 114 | + expected_exception: type[Exception] | None, |
| 115 | + expected_request_header: bytes, |
| 116 | +) -> None: |
| 117 | + async def mock_get_new_proxy_url() -> ParseResult: |
| 118 | + return urlparse(proxy_url) |
| 119 | + |
| 120 | + monkeypatch.setattr(middleware, '_get_new_proxy_url', mock_get_new_proxy_url) |
| 121 | + |
| 122 | + if expected_exception is None: |
| 123 | + await middleware.process_request(dummy_request, spider) |
| 124 | + assert dummy_request.meta['proxy'] == proxy_url |
| 125 | + assert dummy_request.headers[b'Proxy-Authorization'] == expected_request_header |
| 126 | + else: |
| 127 | + with pytest.raises(expected_exception): |
| 128 | + await middleware.process_request(dummy_request, spider) |
| 129 | + |
| 130 | + |
| 131 | +@pytest.mark.parametrize( |
| 132 | + ('exception', 'none_returned_values_is_expected'), |
| 133 | + [ |
| 134 | + (TunnelError(), False), |
| 135 | + (ValueError(), True), |
| 136 | + ], |
| 137 | +) |
| 138 | +def test__process_exception( |
| 139 | + middleware: ApifyHttpProxyMiddleware, |
| 140 | + spider: DummySpider, |
| 141 | + dummy_request: Request, |
| 142 | + exception: Exception, |
| 143 | + *, |
| 144 | + none_returned_values_is_expected: bool, |
| 145 | +) -> None: |
| 146 | + returned_value = middleware.process_exception(dummy_request, exception, spider) |
| 147 | + |
| 148 | + if none_returned_values_is_expected: |
| 149 | + assert returned_value is None |
| 150 | + |
| 151 | + else: |
| 152 | + assert returned_value == dummy_request |
0 commit comments