Skip to content

Commit fe678f2

Browse files
perf(client): optimize file structure copying in multipart requests
1 parent 3d0485c commit fe678f2

9 files changed

Lines changed: 195 additions & 106 deletions

File tree

src/steel/_files.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import io
44
import os
55
import pathlib
6-
from typing import overload
7-
from typing_extensions import TypeGuard
6+
from typing import Sequence, cast, overload
7+
from typing_extensions import TypeVar, TypeGuard
88

99
import anyio
1010

@@ -17,7 +17,9 @@
1717
HttpxFileContent,
1818
HttpxRequestFiles,
1919
)
20-
from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
20+
from ._utils import is_list, is_mapping, is_tuple_t, is_mapping_t, is_sequence_t
21+
22+
_T = TypeVar("_T")
2123

2224

2325
def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
@@ -121,3 +123,51 @@ async def async_read_file_content(file: FileContent) -> HttpxFileContent:
121123
return await anyio.Path(file).read_bytes()
122124

123125
return file
126+
127+
128+
def deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]]) -> _T:
129+
"""Copy only the containers along the given paths.
130+
131+
Used to guard against mutation by extract_files without copying the entire structure.
132+
Only dicts and lists that lie on a path are copied; everything else
133+
is returned by reference.
134+
135+
For example, given paths=[["foo", "files", "file"]] and the structure:
136+
{
137+
"foo": {
138+
"bar": {"baz": {}},
139+
"files": {"file": <content>}
140+
}
141+
}
142+
The root dict, "foo", and "files" are copied (they lie on the path).
143+
"bar" and "baz" are returned by reference (off the path).
144+
"""
145+
return _deepcopy_with_paths(item, paths, 0)
146+
147+
148+
def _deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]], index: int) -> _T:
149+
if not paths:
150+
return item
151+
if is_mapping(item):
152+
key_to_paths: dict[str, list[Sequence[str]]] = {}
153+
for path in paths:
154+
if index < len(path):
155+
key_to_paths.setdefault(path[index], []).append(path)
156+
157+
# if no path continues through this mapping, it won't be mutated and copying it is redundant
158+
if not key_to_paths:
159+
return item
160+
161+
result = dict(item)
162+
for key, subpaths in key_to_paths.items():
163+
if key in result:
164+
result[key] = _deepcopy_with_paths(result[key], subpaths, index + 1)
165+
return cast(_T, result)
166+
if is_list(item):
167+
array_paths = [path for path in paths if index < len(path) and path[index] == "<array>"]
168+
169+
# if no path expects a list here, nothing will be mutated inside it - return by reference
170+
if not array_paths:
171+
return cast(_T, item)
172+
return cast(_T, [_deepcopy_with_paths(entry, array_paths, index + 1) for entry in item])
173+
return item

src/steel/_utils/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
coerce_integer as coerce_integer,
2525
file_from_path as file_from_path,
2626
strip_not_given as strip_not_given,
27-
deepcopy_minimal as deepcopy_minimal,
2827
get_async_library as get_async_library,
2928
maybe_coerce_float as maybe_coerce_float,
3029
get_required_header as get_required_header,

src/steel/_utils/_utils.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,6 @@ def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
177177
return isinstance(obj, Iterable)
178178

179179

180-
def deepcopy_minimal(item: _T) -> _T:
181-
"""Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
182-
183-
- mappings, e.g. `dict`
184-
- list
185-
186-
This is done for performance reasons.
187-
"""
188-
if is_mapping(item):
189-
return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
190-
if is_list(item):
191-
return cast(_T, [deepcopy_minimal(entry) for entry in item])
192-
return item
193-
194-
195180
# copied from https://github.com/Rapptz/RoboDanny
196181
def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
197182
size = len(seq)

src/steel/resources/extensions.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import httpx
88

99
from ..types import extension_update_params, extension_upload_params
10+
from .._files import deepcopy_with_paths
1011
from .._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
11-
from .._utils import extract_files, path_template, maybe_transform, deepcopy_minimal, async_maybe_transform
12+
from .._utils import extract_files, path_template, maybe_transform, async_maybe_transform
1213
from .._compat import cached_property
1314
from .._resource import SyncAPIResource, AsyncAPIResource
1415
from .._response import (
@@ -79,11 +80,12 @@ def update(
7980
"""
8081
if not extension_id:
8182
raise ValueError(f"Expected a non-empty value for `extension_id` but received {extension_id!r}")
82-
body = deepcopy_minimal(
83+
body = deepcopy_with_paths(
8384
{
8485
"file": file,
8586
"url": url,
86-
}
87+
},
88+
[["file"]],
8789
)
8890
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
8991
# It should be noted that the actual Content-Type header that will be
@@ -233,11 +235,12 @@ def upload(
233235
234236
timeout: Override the client-level default timeout for this request, in seconds
235237
"""
236-
body = deepcopy_minimal(
238+
body = deepcopy_with_paths(
237239
{
238240
"file": file,
239241
"url": url,
240-
}
242+
},
243+
[["file"]],
241244
)
242245
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
243246
# It should be noted that the actual Content-Type header that will be
@@ -307,11 +310,12 @@ async def update(
307310
"""
308311
if not extension_id:
309312
raise ValueError(f"Expected a non-empty value for `extension_id` but received {extension_id!r}")
310-
body = deepcopy_minimal(
313+
body = deepcopy_with_paths(
311314
{
312315
"file": file,
313316
"url": url,
314-
}
317+
},
318+
[["file"]],
315319
)
316320
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
317321
# It should be noted that the actual Content-Type header that will be
@@ -461,11 +465,12 @@ async def upload(
461465
462466
timeout: Override the client-level default timeout for this request, in seconds
463467
"""
464-
body = deepcopy_minimal(
468+
body = deepcopy_with_paths(
465469
{
466470
"file": file,
467471
"url": url,
468-
}
472+
},
473+
[["file"]],
469474
)
470475
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
471476
# It should be noted that the actual Content-Type header that will be

src/steel/resources/files.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import httpx
88

99
from ..types import file_upload_params
10+
from .._files import deepcopy_with_paths
1011
from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, FileTypes, omit, not_given
11-
from .._utils import extract_files, path_template, maybe_transform, deepcopy_minimal, async_maybe_transform
12+
from .._utils import extract_files, path_template, maybe_transform, async_maybe_transform
1213
from .._compat import cached_property
1314
from .._resource import SyncAPIResource, AsyncAPIResource
1415
from .._response import (
@@ -169,11 +170,12 @@ def upload(
169170
170171
timeout: Override the client-level default timeout for this request, in seconds
171172
"""
172-
body = deepcopy_minimal(
173+
body = deepcopy_with_paths(
173174
{
174175
"file": file,
175176
"path": path,
176-
}
177+
},
178+
[["file"]],
177179
)
178180
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
179181
# It should be noted that the actual Content-Type header that will be
@@ -328,11 +330,12 @@ async def upload(
328330
329331
timeout: Override the client-level default timeout for this request, in seconds
330332
"""
331-
body = deepcopy_minimal(
333+
body = deepcopy_with_paths(
332334
{
333335
"file": file,
334336
"path": path,
335-
}
337+
},
338+
[["file"]],
336339
)
337340
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
338341
# It should be noted that the actual Content-Type header that will be

src/steel/resources/profiles.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import httpx
88

99
from ..types import profile_create_params, profile_update_params
10+
from .._files import deepcopy_with_paths
1011
from .._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
11-
from .._utils import extract_files, path_template, maybe_transform, deepcopy_minimal, async_maybe_transform
12+
from .._utils import extract_files, path_template, maybe_transform, async_maybe_transform
1213
from .._compat import cached_property
1314
from .._resource import SyncAPIResource, AsyncAPIResource
1415
from .._response import (
@@ -80,13 +81,14 @@ def create(
8081
8182
timeout: Override the client-level default timeout for this request, in seconds
8283
"""
83-
body = deepcopy_minimal(
84+
body = deepcopy_with_paths(
8485
{
8586
"user_data_dir": user_data_dir,
8687
"dimensions": dimensions,
8788
"proxy_url": proxy_url,
8889
"user_agent": user_agent,
89-
}
90+
},
91+
[["userDataDir"]],
9092
)
9193
files = extract_files(cast(Mapping[str, object], body), paths=[["userDataDir"]])
9294
# It should be noted that the actual Content-Type header that will be
@@ -140,13 +142,14 @@ def update(
140142
"""
141143
if not id:
142144
raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
143-
body = deepcopy_minimal(
145+
body = deepcopy_with_paths(
144146
{
145147
"user_data_dir": user_data_dir,
146148
"dimensions": dimensions,
147149
"proxy_url": proxy_url,
148150
"user_agent": user_agent,
149-
}
151+
},
152+
[["userDataDir"]],
150153
)
151154
files = extract_files(cast(Mapping[str, object], body), paths=[["userDataDir"]])
152155
# It should be noted that the actual Content-Type header that will be
@@ -270,13 +273,14 @@ async def create(
270273
271274
timeout: Override the client-level default timeout for this request, in seconds
272275
"""
273-
body = deepcopy_minimal(
276+
body = deepcopy_with_paths(
274277
{
275278
"user_data_dir": user_data_dir,
276279
"dimensions": dimensions,
277280
"proxy_url": proxy_url,
278281
"user_agent": user_agent,
279-
}
282+
},
283+
[["userDataDir"]],
280284
)
281285
files = extract_files(cast(Mapping[str, object], body), paths=[["userDataDir"]])
282286
# It should be noted that the actual Content-Type header that will be
@@ -330,13 +334,14 @@ async def update(
330334
"""
331335
if not id:
332336
raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
333-
body = deepcopy_minimal(
337+
body = deepcopy_with_paths(
334338
{
335339
"user_data_dir": user_data_dir,
336340
"dimensions": dimensions,
337341
"proxy_url": proxy_url,
338342
"user_agent": user_agent,
339-
}
343+
},
344+
[["userDataDir"]],
340345
)
341346
files = extract_files(cast(Mapping[str, object], body), paths=[["userDataDir"]])
342347
# It should be noted that the actual Content-Type header that will be

src/steel/resources/sessions/files.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66

77
import httpx
88

9+
from ..._files import deepcopy_with_paths
910
from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, FileTypes, omit, not_given
10-
from ..._utils import extract_files, path_template, maybe_transform, deepcopy_minimal, async_maybe_transform
11+
from ..._utils import extract_files, path_template, maybe_transform, async_maybe_transform
1112
from ..._compat import cached_property
1213
from ..._resource import SyncAPIResource, AsyncAPIResource
1314
from ..._response import (
@@ -260,11 +261,12 @@ def upload(
260261
"""
261262
if not session_id:
262263
raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
263-
body = deepcopy_minimal(
264+
body = deepcopy_with_paths(
264265
{
265266
"file": file,
266267
"path": path,
267-
}
268+
},
269+
[["file"]],
268270
)
269271
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
270272
# It should be noted that the actual Content-Type header that will be
@@ -510,11 +512,12 @@ async def upload(
510512
"""
511513
if not session_id:
512514
raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
513-
body = deepcopy_minimal(
515+
body = deepcopy_with_paths(
514516
{
515517
"file": file,
516518
"path": path,
517-
}
519+
},
520+
[["file"]],
518521
)
519522
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
520523
# It should be noted that the actual Content-Type header that will be

tests/test_deepcopy.py

Lines changed: 0 additions & 58 deletions
This file was deleted.

0 commit comments

Comments
 (0)