Skip to content

Commit 8a7cbda

Browse files
committed
Performance opmitization
1 parent 75a0389 commit 8a7cbda

File tree

2 files changed

+86
-31
lines changed

2 files changed

+86
-31
lines changed

src/apify_client/_apify_client.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
from functools import cached_property
4+
35
from apify_client._client_registry import ClientRegistry, ClientRegistryAsync
46
from apify_client._config import ClientConfig
57
from apify_client._http_clients import HttpClient, HttpClientAsync
@@ -96,10 +98,14 @@ def __init__(
9698
min_delay_between_retries_millis=min_delay_between_retries_millis,
9799
timeout_secs=timeout_secs,
98100
)
101+
"""Resolved client configuration."""
102+
99103
self._statistics = ClientStatistics()
104+
"""Collector for client request statistics."""
105+
100106
self._http_client = HttpClient(config=self._config, statistics=self._statistics)
107+
"""HTTP client used to communicate with the Apify API."""
101108

102-
# Create client classes config for dependency injection
103109
self._client_registry = ClientRegistry(
104110
actor_client=ActorClient,
105111
actor_version_client=ActorVersionClient,
@@ -118,9 +124,11 @@ def __init__(
118124
webhook_dispatch_collection_client=WebhookDispatchCollectionClient,
119125
log_client=LogClient,
120126
)
127+
"""Registry of resource client classes used for dependency injection."""
121128

122-
@property
129+
@cached_property
123130
def _base_kwargs(self) -> dict:
131+
"""Base keyword arguments for resource client construction."""
124132
return {
125133
'base_url': self._config.base_url,
126134
'public_base_url': self._config.public_base_url,
@@ -304,10 +312,14 @@ def __init__(
304312
min_delay_between_retries_millis=min_delay_between_retries_millis,
305313
timeout_secs=timeout_secs,
306314
)
315+
"""Resolved client configuration."""
316+
307317
self._statistics = ClientStatistics()
318+
"""Collector for client request statistics."""
319+
308320
self._http_client = HttpClientAsync(config=self._config, statistics=self._statistics)
321+
"""HTTP client used to communicate with the Apify API."""
309322

310-
# Create async client classes config for dependency injection
311323
self._client_registry = ClientRegistryAsync(
312324
actor_client=ActorClientAsync,
313325
actor_version_client=ActorVersionClientAsync,
@@ -326,9 +338,11 @@ def __init__(
326338
webhook_dispatch_collection_client=WebhookDispatchCollectionClientAsync,
327339
log_client=LogClientAsync,
328340
)
341+
"""Registry of resource client classes used for dependency injection."""
329342

330-
@property
343+
@cached_property
331344
def _base_kwargs(self) -> dict:
345+
"""Base keyword arguments for resource client construction."""
332346
return {
333347
'base_url': self._config.base_url,
334348
'public_base_url': self._config.public_base_url,

src/apify_client/_utils.py

Lines changed: 68 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33
from __future__ import annotations
44

5-
import base64
65
import hashlib
76
import hmac
87
import io
98
import json
109
import string
1110
import time
11+
from base64 import b64encode, urlsafe_b64encode
1212
from enum import Enum
1313
from http import HTTPStatus
1414
from typing import TYPE_CHECKING, Any, TypeVar
@@ -20,6 +20,9 @@
2020

2121
T = TypeVar('T')
2222

23+
_BASE62_CHARSET = string.digits + string.ascii_letters
24+
"""Module-level constant for base62 encoding."""
25+
2326

2427
def catch_not_found_or_throw(exc: ApifyApiError) -> None:
2528
"""Suppress 404 Not Found errors and re-raise all other API errors.
@@ -41,32 +44,62 @@ def filter_none_values(
4144
*,
4245
remove_empty_dicts: bool | None = None,
4346
) -> dict:
44-
"""Remove None values from a dictionary recursively.
47+
"""Recursively remove None values from a dictionary.
48+
49+
The Apify API ignores missing fields but may reject fields explicitly set to None. This helper prepares
50+
request payloads by stripping None values from nested dictionaries.
4551
46-
The Apify API ignores missing fields but may reject fields explicitly set to None. This function prepares request
47-
payloads by recursively removing None values from nested dictionaries.
52+
Uses an iterative, stack-based approach for better performance on deeply nested structures.
4853
4954
Args:
50-
data: The dictionary to clean.
51-
remove_empty_dicts: If True, also remove empty dictionaries after filtering None values.
55+
data: Dictionary to clean.
56+
remove_empty_dicts: Whether to remove empty dictionaries after filtering.
5257
5358
Returns:
54-
A new dictionary with None values removed at all nesting levels.
59+
A new dictionary with all None values removed.
5560
"""
61+
# Use an explicit stack to avoid recursion overhead
62+
result = {}
63+
64+
# Stack entries are (source_dict, target_dict)
65+
stack: list[tuple[dict, dict]] = [(data, result)]
66+
67+
while stack:
68+
source, target = stack.pop()
69+
70+
for key, val in source.items():
71+
if val is None:
72+
continue
5673

57-
def _internal(dictionary: dict, *, remove_empty: bool | None = None) -> dict | None:
58-
result = {}
59-
for key, val in dictionary.items():
6074
if isinstance(val, dict):
61-
val = _internal(val, remove_empty=remove_empty) # noqa: PLW2901
62-
if val is not None:
63-
result[key] = val
64-
if not result and remove_empty:
65-
return None
66-
return result
75+
nested = {}
76+
target[key] = nested
77+
stack.append((val, nested))
78+
else:
79+
target[key] = val
80+
81+
# Optionally remove empty dictionaries
82+
if remove_empty_dicts:
83+
_remove_empty_dicts_inplace(result)
6784

68-
result = _internal(data, remove_empty=remove_empty_dicts)
69-
return result if result is not None else {}
85+
return result
86+
87+
88+
def _remove_empty_dicts_inplace(data: dict[str, Any]) -> None:
89+
"""Recursively remove empty dictionaries from a dict in place.
90+
91+
This is a helper function for filter_none_values.
92+
"""
93+
keys_to_remove = list[str]()
94+
95+
for key, val in data.items():
96+
if isinstance(val, dict):
97+
_remove_empty_dicts_inplace(val)
98+
if not val:
99+
keys_to_remove.append(key)
100+
101+
for key in keys_to_remove:
102+
del data[key]
70103

71104

72105
def encode_webhook_list_to_base64(webhooks: list[dict]) -> str:
@@ -79,6 +112,7 @@ def encode_webhook_list_to_base64(webhooks: list[dict]) -> str:
79112
A base64-encoded JSON string.
80113
"""
81114
data = list[dict]()
115+
82116
for webhook in webhooks:
83117
webhook_representation = {
84118
'eventTypes': [enum_to_value(event_type) for event_type in webhook['event_types']],
@@ -90,7 +124,7 @@ def encode_webhook_list_to_base64(webhooks: list[dict]) -> str:
90124
webhook_representation['headersTemplate'] = webhook['headers_template']
91125
data.append(webhook_representation)
92126

93-
return base64.b64encode(json.dumps(data).encode('utf-8')).decode('ascii')
127+
return b64encode(json.dumps(data).encode('utf-8')).decode('ascii')
94128

95129

96130
def encode_key_value_store_record_value(value: Any, content_type: str | None = None) -> tuple[Any, str]:
@@ -116,7 +150,13 @@ def encode_key_value_store_record_value(value: Any, content_type: str | None = N
116150
and not isinstance(value, (bytes, bytearray, io.IOBase))
117151
and not isinstance(value, str)
118152
):
119-
value = json.dumps(value, ensure_ascii=False, indent=2, allow_nan=False, default=str).encode('utf-8')
153+
# Don't use indentation to reduce size.
154+
value = json.dumps(
155+
value,
156+
ensure_ascii=False,
157+
allow_nan=False,
158+
default=str,
159+
).encode('utf-8')
120160

121161
return (value, content_type)
122162

@@ -196,16 +236,17 @@ def encode_base62(num: int) -> str:
196236
Returns:
197237
The base62-encoded string.
198238
"""
199-
charset = string.digits + string.ascii_letters
200-
201239
if num == 0:
202-
return charset[0]
240+
return _BASE62_CHARSET[0]
203241

204-
res = ''
242+
# Use list to build result for O(n) complexity instead of O(n^2) string concatenation.
243+
parts = []
205244
while num > 0:
206245
num, remainder = divmod(num, 62)
207-
res = charset[remainder] + res
208-
return res
246+
parts.append(_BASE62_CHARSET[remainder])
247+
248+
# Reverse and join once at the end.
249+
return ''.join(reversed(parts))
209250

210251

211252
def create_hmac_signature(secret_key: str, message: str) -> str:
@@ -253,5 +294,5 @@ def create_storage_content_signature(
253294
message_to_sign = f'{version}.{expires_at}.{resource_id}'
254295
hmac_sig = create_hmac_signature(url_signing_secret_key, message_to_sign)
255296

256-
base64url_encoded_payload = base64.urlsafe_b64encode(f'{version}.{expires_at}.{hmac_sig}'.encode())
297+
base64url_encoded_payload = urlsafe_b64encode(f'{version}.{expires_at}.{hmac_sig}'.encode())
257298
return base64url_encoded_payload.decode('utf-8')

0 commit comments

Comments
 (0)