Skip to content

Commit b2e4006

Browse files
feat: add subdivision code to proxy configuration (#878)
This PR adds new variable `subdivision_code` to Apify proxy configuration. This variable is currently needed for US state targeting. More context: https://apify.slack.com/archives/C010Q0FBYG3/p1776704073278519 Same PR in JS: apify/apify-sdk-js#592
1 parent 20249b6 commit b2e4006

6 files changed

Lines changed: 84 additions & 6 deletions

File tree

docs/02_concepts/05_proxy_management.mdx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,13 @@ When no `session_id` is provided, your custom proxy URLs are rotated round-robin
5656

5757
### Apify proxy configuration
5858

59-
With Apify Proxy, you can select specific proxy groups to use, or countries to connect from. This allows you to get better proxy performance after some initial research.
59+
With Apify Proxy, you can select specific proxy groups to use, or countries to connect from. For even finer control, you can also target a specific subdivision (e.g. a US state) using the `subdivision_code` parameter alongside `country_code`. This allows you to get better proxy performance after some initial research.
6060

6161
<RunnableCodeBlock className="language-python" language="python">
6262
{ApifyProxyConfig}
6363
</RunnableCodeBlock>
6464

65-
Now your connections using proxy_url will use only Residential proxies from the US. Note that you must first get access to a proxy group before you are able to use it. You can find your available proxy groups in the [proxy dashboard](https://console.apify.com/proxy).
65+
Now your connections using proxy_url will use only Residential proxies from California, US. The `subdivision_code` accepts a 1–3 character ISO 3166-2 code (e.g. `CA` for California) and currently only works for the United States (`country_code='US'`). Note that you must first get access to a proxy group before you are able to use it. You can find your available proxy groups in the [proxy dashboard](https://console.apify.com/proxy).
6666

6767
If you don't specify any proxy groups, automatic proxy selection will be used.
6868

docs/02_concepts/code/05_apify_proxy_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ async def main() -> None:
88
proxy_cfg = await Actor.create_proxy_configuration(
99
groups=['RESIDENTIAL'],
1010
country_code='US',
11+
subdivision_code='CA',
1112
)
1213

1314
if not proxy_cfg:

src/apify/_actor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,6 +1315,7 @@ async def create_proxy_configuration(
13151315
password: str | None = None,
13161316
groups: list[str] | None = None,
13171317
country_code: str | None = None,
1318+
subdivision_code: str | None = None,
13181319
proxy_urls: list[str | None] | None = None,
13191320
new_url_function: _NewUrlFunction | None = None,
13201321
) -> ProxyConfiguration | None:
@@ -1332,6 +1333,8 @@ async def create_proxy_configuration(
13321333
if available.
13331334
groups: Proxy groups which the Apify Proxy should use, if provided.
13341335
country_code: Country which the Apify Proxy should use, if provided.
1336+
subdivision_code: Subdivision (e.g. US state) which the Apify Proxy should use, if provided.
1337+
Requires `country_code` to be set. Two-letter ISO 3166-2 code (e.g. `CA` for California).
13351338
proxy_urls: Custom proxy server URLs which should be rotated through.
13361339
new_url_function: Function which returns a custom proxy URL to be used.
13371340
@@ -1342,6 +1345,7 @@ async def create_proxy_configuration(
13421345
if actor_proxy_input is not None:
13431346
if actor_proxy_input.get('useApifyProxy', False):
13441347
country_code = country_code or actor_proxy_input.get('apifyProxyCountry')
1348+
subdivision_code = subdivision_code or actor_proxy_input.get('apifyProxySubdivision')
13451349
groups = groups or actor_proxy_input.get('apifyProxyGroups')
13461350
else:
13471351
proxy_urls = actor_proxy_input.get('proxyUrls', [])
@@ -1352,6 +1356,7 @@ async def create_proxy_configuration(
13521356
password=password,
13531357
groups=groups,
13541358
country_code=country_code,
1359+
subdivision_code=subdivision_code,
13551360
proxy_urls=proxy_urls,
13561361
new_url_function=new_url_function,
13571362
_actor_config=self.configuration,

src/apify/_proxy_configuration.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727

2828
APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
2929
COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
30+
# ISO 3166-2 subdivision codes are 1-3 uppercase alphanumeric characters, e.g. 'CA', 'NSW', '9' (Wien, AT-9)
31+
SUBDIVISION_CODE_REGEX = re.compile(r'^[A-Z0-9]{1,3}$')
3032
SESSION_ID_MAX_LENGTH = 50
3133

3234

@@ -89,6 +91,13 @@ class ProxyInfo(CrawleeProxyInfo):
8991
This parameter is optional, by default, the proxy uses all available proxy servers from all countries.
9092
"""
9193

94+
subdivision_code: str | None = None
95+
"""If set, the proxy will use IP addresses geolocated to the specified subdivision (e.g. US state).
96+
Requires `country_code` to be set. The subdivision code must be a 1-3 character ISO 3166-2 code
97+
consisting of uppercase letters and digits (e.g. `CA` for California). Currently only supported for
98+
the United States (`country_code='US'`).
99+
"""
100+
92101

93102
@docs_group('Configuration')
94103
class ProxyConfiguration(CrawleeProxyConfiguration):
@@ -111,6 +120,7 @@ def __init__(
111120
password: str | None = None,
112121
groups: list[str] | None = None,
113122
country_code: str | None = None,
123+
subdivision_code: str | None = None,
114124
proxy_urls: list[str | None] | None = None,
115125
new_url_function: _NewUrlFunction | None = None,
116126
tiered_proxy_urls: list[list[str | None]] | None = None,
@@ -126,6 +136,9 @@ def __init__(
126136
if available.
127137
groups: Proxy groups which the Apify Proxy should use, if provided.
128138
country_code: Country which the Apify Proxy should use, if provided.
139+
subdivision_code: Subdivision (e.g. US state) which the Apify Proxy should use, if provided.
140+
Requires `country_code` to be set. 1-3 character ISO 3166-2 code of uppercase letters/digits
141+
(e.g. `CA` for California).
129142
proxy_urls: Custom proxy server URLs which should be rotated through.
130143
new_url_function: Function which returns a custom proxy URL to be used.
131144
tiered_proxy_urls: Proxy URLs arranged into tiers
@@ -141,11 +154,17 @@ def __init__(
141154
country_code = str(country_code)
142155
_check(country_code, label='country_code', pattern=COUNTRY_CODE_REGEX)
143156

157+
if subdivision_code:
158+
if not country_code:
159+
raise ValueError('ProxyConfiguration: Cannot set "subdivision_code" without "country_code".')
160+
subdivision_code = str(subdivision_code)
161+
_check(subdivision_code, label='subdivision_code', pattern=SUBDIVISION_CODE_REGEX)
162+
144163
if (proxy_urls or new_url_function or tiered_proxy_urls) and (groups or country_code):
145164
raise ValueError(
146165
'Cannot combine custom proxies with Apify Proxy!'
147166
' It is not allowed to set "proxy_urls" or "new_url_function" combined with'
148-
' "groups" or "country_code".'
167+
' "groups", "country_code", or "subdivision_code".'
149168
)
150169

151170
if proxy_urls and any('apify.com' in (url or '') for url in proxy_urls):
@@ -176,6 +195,7 @@ def __init__(
176195

177196
self._groups = list(groups) if groups else []
178197
self._country_code = country_code
198+
self._subdivision_code = subdivision_code
179199

180200
async def initialize(self) -> None:
181201
"""Check if using proxy, if so, check the access.
@@ -247,6 +267,7 @@ async def new_proxy_info(
247267
proxy_tier=proxy_info.proxy_tier,
248268
groups=self._groups,
249269
country_code=self._country_code or None,
270+
subdivision_code=self._subdivision_code or None,
250271
)
251272

252273
return ProxyInfo(
@@ -309,7 +330,10 @@ def _get_username(self, session_id: int | str | None = None) -> str:
309330
if session_id is not None:
310331
parts.append(f'session-{session_id}')
311332
if self._country_code:
312-
parts.append(f'country-{self._country_code}')
333+
if self._subdivision_code:
334+
parts.append(f'country-{self._country_code}_{self._subdivision_code}')
335+
else:
336+
parts.append(f'country-{self._country_code}')
313337

314338
if not parts:
315339
return 'auto'

tests/unit/actor/test_actor_create_proxy_configuration.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,22 @@ def request_handler(request: Request, response: Response) -> Response:
146146
== f'http://groups-{"+".join(groups)},country-{country_code}:{DUMMY_PASSWORD}@proxy.apify.com:8000'
147147
)
148148

149-
assert len(patched_apify_client.calls['user']['get']) == 2 # ty: ignore[unresolved-attribute]
150-
assert call_mock.call_count == 2
149+
subdivision = 'CA'
150+
proxy_configuration = await Actor.create_proxy_configuration(
151+
actor_proxy_input={
152+
'useApifyProxy': True,
153+
'apifyProxyGroups': groups,
154+
'apifyProxyCountry': country_code,
155+
'apifyProxySubdivision': subdivision,
156+
}
157+
)
158+
assert proxy_configuration is not None
159+
assert (
160+
await proxy_configuration.new_url()
161+
== f'http://groups-{"+".join(groups)},country-{country_code}_{subdivision}:{DUMMY_PASSWORD}@proxy.apify.com:8000'
162+
)
163+
164+
assert len(patched_apify_client.calls['user']['get']) == 3 # ty: ignore[unresolved-attribute]
165+
assert call_mock.call_count == 3
151166

152167
await Actor.exit()

tests/unit/test_proxy_configuration.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,15 @@ def test_invalid_arguments() -> None:
8888
with pytest.raises(ValueError, match=match_pattern):
8989
ProxyConfiguration(country_code=invalid_country_code) # ty: ignore[invalid-argument-type]
9090

91+
for invalid_subdivision_code in ['California', 'ca', 'ABCD', 'A1b']:
92+
escaped = re.escape(str(invalid_subdivision_code))
93+
match_pattern = f'Value {escaped} of argument subdivision_code does not match pattern'
94+
with pytest.raises(ValueError, match=match_pattern):
95+
ProxyConfiguration(country_code='US', subdivision_code=invalid_subdivision_code)
96+
97+
with pytest.raises(ValueError, match=r'Cannot set "subdivision_code" without "country_code"'):
98+
ProxyConfiguration(subdivision_code='CA')
99+
91100
with pytest.raises(ValueError, match=r'Exactly one of .* must be specified'):
92101
ProxyConfiguration(
93102
proxy_urls=['http://proxy.com:1111'],
@@ -105,6 +114,9 @@ def test_invalid_arguments() -> None:
105114
new_url_function=lambda session_id=None, request=None: 'http://proxy.com:2222', groups=['GROUP1']
106115
)
107116

117+
with pytest.raises(ValueError, match=r'Cannot combine custom proxies with Apify Proxy'):
118+
ProxyConfiguration(proxy_urls=['http://proxy.com:1111'], country_code='US', subdivision_code='CA')
119+
108120

109121
async def test_new_url_basic() -> None:
110122
groups = ['GROUP1', 'GROUP2']
@@ -124,6 +136,26 @@ async def test_new_url_basic() -> None:
124136
assert proxy_url == f'http://{expected_username}:{password}@{expected_hostname}:{expected_port}'
125137

126138

139+
async def test_new_url_with_subdivision() -> None:
140+
groups = ['RESIDENTIAL']
141+
password = 'abcd1234'
142+
country_code = 'US'
143+
subdivision = 'CA'
144+
proxy_configuration = ProxyConfiguration(
145+
groups=groups,
146+
password=password,
147+
country_code=country_code,
148+
subdivision_code=subdivision,
149+
)
150+
proxy_url = await proxy_configuration.new_url()
151+
152+
expected_username = f'groups-{"+".join(groups)},country-{country_code}_{subdivision}'
153+
expected_hostname = 'proxy.apify.com'
154+
expected_port = 8000
155+
156+
assert proxy_url == f'http://{expected_username}:{password}@{expected_hostname}:{expected_port}'
157+
158+
127159
async def test_new_url_with_session_ids() -> None:
128160
groups = ['GROUP1', 'GROUP2']
129161
password = 'abcd1234'
@@ -287,6 +319,7 @@ async def test_new_proxy_info_basic_construction() -> None:
287319
'port': expected_port,
288320
'groups': groups,
289321
'country_code': country_code,
322+
'subdivision_code': None,
290323
'username': expected_username,
291324
'password': password,
292325
'proxy_tier': None,

0 commit comments

Comments
 (0)