Skip to content

Commit ddfbedf

Browse files
chore(internal): codegen related update
1 parent 704c186 commit ddfbedf

7 files changed

Lines changed: 191 additions & 104 deletions

File tree

src/docstrange/_files.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import io
44
import os
55
import pathlib
6-
from typing import overload
7-
from typing_extensions import TypeGuard
6+
from typing import Sequence, cast, overload
7+
from typing_extensions import TypeVar, TypeGuard
88

99
import anyio
1010

@@ -17,7 +17,9 @@
1717
HttpxFileContent,
1818
HttpxRequestFiles,
1919
)
20-
from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
20+
from ._utils import is_list, is_mapping, is_tuple_t, is_mapping_t, is_sequence_t
21+
22+
_T = TypeVar("_T")
2123

2224

2325
def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
@@ -121,3 +123,51 @@ async def async_read_file_content(file: FileContent) -> HttpxFileContent:
121123
return await anyio.Path(file).read_bytes()
122124

123125
return file
126+
127+
128+
def deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]]) -> _T:
129+
"""Copy only the containers along the given paths.
130+
131+
Used to guard against mutation by extract_files without copying the entire structure.
132+
Only dicts and lists that lie on a path are copied; everything else
133+
is returned by reference.
134+
135+
For example, given paths=[["foo", "files", "file"]] and the structure:
136+
{
137+
"foo": {
138+
"bar": {"baz": {}},
139+
"files": {"file": <content>}
140+
}
141+
}
142+
The root dict, "foo", and "files" are copied (they lie on the path).
143+
"bar" and "baz" are returned by reference (off the path).
144+
"""
145+
return _deepcopy_with_paths(item, paths, 0)
146+
147+
148+
def _deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]], index: int) -> _T:
149+
if not paths:
150+
return item
151+
if is_mapping(item):
152+
key_to_paths: dict[str, list[Sequence[str]]] = {}
153+
for path in paths:
154+
if index < len(path):
155+
key_to_paths.setdefault(path[index], []).append(path)
156+
157+
# if no path continues through this mapping, it won't be mutated and copying it is redundant
158+
if not key_to_paths:
159+
return item
160+
161+
result = dict(item)
162+
for key, subpaths in key_to_paths.items():
163+
if key in result:
164+
result[key] = _deepcopy_with_paths(result[key], subpaths, index + 1)
165+
return cast(_T, result)
166+
if is_list(item):
167+
array_paths = [path for path in paths if index < len(path) and path[index] == "<array>"]
168+
169+
# if no path expects a list here, nothing will be mutated inside it - return by reference
170+
if not array_paths:
171+
return cast(_T, item)
172+
return cast(_T, [_deepcopy_with_paths(entry, array_paths, index + 1) for entry in item])
173+
return item

src/docstrange/_utils/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
coerce_integer as coerce_integer,
2525
file_from_path as file_from_path,
2626
strip_not_given as strip_not_given,
27-
deepcopy_minimal as deepcopy_minimal,
2827
get_async_library as get_async_library,
2928
maybe_coerce_float as maybe_coerce_float,
3029
get_required_header as get_required_header,

src/docstrange/_utils/_utils.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,6 @@ def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
177177
return isinstance(obj, Iterable)
178178

179179

180-
def deepcopy_minimal(item: _T) -> _T:
181-
"""Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
182-
183-
- mappings, e.g. `dict`
184-
- list
185-
186-
This is done for performance reasons.
187-
"""
188-
if is_mapping(item):
189-
return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
190-
if is_list(item):
191-
return cast(_T, [deepcopy_minimal(entry) for entry in item])
192-
return item
193-
194-
195180
# copied from https://github.com/Rapptz/RoboDanny
196181
def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
197182
size = len(seq)

src/docstrange/resources/classify.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
import httpx
88

99
from ..types import classify_sync_params, classify_batch_params
10+
from .._files import deepcopy_with_paths
1011
from .._types import Body, Query, Headers, NotGiven, FileTypes, SequenceNotStr, not_given
11-
from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
12+
from .._utils import extract_files, maybe_transform, async_maybe_transform
1213
from .._compat import cached_property
1314
from .._resource import SyncAPIResource, AsyncAPIResource
1415
from .._response import (
@@ -75,11 +76,12 @@ def batch(
7576
7677
timeout: Override the client-level default timeout for this request, in seconds
7778
"""
78-
body = deepcopy_minimal(
79+
body = deepcopy_with_paths(
7980
{
8081
"categories": categories,
8182
"files": files,
82-
}
83+
},
84+
[["files", "<array>"]],
8385
)
8486
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
8587
# It should be noted that the actual Content-Type header that will be
@@ -129,11 +131,12 @@ def sync(
129131
130132
timeout: Override the client-level default timeout for this request, in seconds
131133
"""
132-
body = deepcopy_minimal(
134+
body = deepcopy_with_paths(
133135
{
134136
"categories": categories,
135137
"file": file,
136-
}
138+
},
139+
[["file"]],
137140
)
138141
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
139142
# It should be noted that the actual Content-Type header that will be
@@ -202,11 +205,12 @@ async def batch(
202205
203206
timeout: Override the client-level default timeout for this request, in seconds
204207
"""
205-
body = deepcopy_minimal(
208+
body = deepcopy_with_paths(
206209
{
207210
"categories": categories,
208211
"files": files,
209-
}
212+
},
213+
[["files", "<array>"]],
210214
)
211215
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
212216
# It should be noted that the actual Content-Type header that will be
@@ -256,11 +260,12 @@ async def sync(
256260
257261
timeout: Override the client-level default timeout for this request, in seconds
258262
"""
259-
body = deepcopy_minimal(
263+
body = deepcopy_with_paths(
260264
{
261265
"categories": categories,
262266
"file": file,
263-
}
267+
},
268+
[["file"]],
264269
)
265270
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
266271
# It should be noted that the actual Content-Type header that will be

src/docstrange/resources/extract/extract.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
ResultsResourceWithStreamingResponse,
1717
AsyncResultsResourceWithStreamingResponse,
1818
)
19+
from ..._files import deepcopy_with_paths
1920
from ..._types import (
2021
Body,
2122
Omit,
@@ -27,7 +28,7 @@
2728
omit,
2829
not_given,
2930
)
30-
from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
31+
from ..._utils import extract_files, maybe_transform, async_maybe_transform
3132
from ..._compat import cached_property
3233
from ..._resource import SyncAPIResource, AsyncAPIResource
3334
from ..._response import (
@@ -124,7 +125,7 @@ def async_(
124125
125126
timeout: Override the client-level default timeout for this request, in seconds
126127
"""
127-
body = deepcopy_minimal(
128+
body = deepcopy_with_paths(
128129
{
129130
"output_format": output_format,
130131
"csv_options": csv_options,
@@ -135,7 +136,8 @@ def async_(
135136
"include_metadata": include_metadata,
136137
"json_options": json_options,
137138
"prompt_mode": prompt_mode,
138-
}
139+
},
140+
[["file"]],
139141
)
140142
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
141143
# It should be noted that the actual Content-Type header that will be
@@ -187,7 +189,7 @@ def batch(
187189
188190
timeout: Override the client-level default timeout for this request, in seconds
189191
"""
190-
body = deepcopy_minimal(
192+
body = deepcopy_with_paths(
191193
{
192194
"files": files,
193195
"output_format": output_format,
@@ -196,7 +198,8 @@ def batch(
196198
"include_metadata": include_metadata,
197199
"json_options": json_options,
198200
"prompt_mode": prompt_mode,
199-
}
201+
},
202+
[["files", "<array>"]],
200203
)
201204
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
202205
# It should be noted that the actual Content-Type header that will be
@@ -279,7 +282,7 @@ def stream(
279282
timeout: Override the client-level default timeout for this request, in seconds
280283
"""
281284
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
282-
body = deepcopy_minimal(
285+
body = deepcopy_with_paths(
283286
{
284287
"output_format": output_format,
285288
"csv_options": csv_options,
@@ -291,7 +294,8 @@ def stream(
291294
"include_metadata": include_metadata,
292295
"json_options": json_options,
293296
"prompt_mode": prompt_mode,
294-
}
297+
},
298+
[["file"]],
295299
)
296300
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
297301
# It should be noted that the actual Content-Type header that will be
@@ -365,7 +369,7 @@ def sync(
365369
366370
timeout: Override the client-level default timeout for this request, in seconds
367371
"""
368-
body = deepcopy_minimal(
372+
body = deepcopy_with_paths(
369373
{
370374
"output_format": output_format,
371375
"csv_options": csv_options,
@@ -376,7 +380,8 @@ def sync(
376380
"include_metadata": include_metadata,
377381
"json_options": json_options,
378382
"prompt_mode": prompt_mode,
379-
}
383+
},
384+
[["file"]],
380385
)
381386
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
382387
# It should be noted that the actual Content-Type header that will be
@@ -473,7 +478,7 @@ async def async_(
473478
474479
timeout: Override the client-level default timeout for this request, in seconds
475480
"""
476-
body = deepcopy_minimal(
481+
body = deepcopy_with_paths(
477482
{
478483
"output_format": output_format,
479484
"csv_options": csv_options,
@@ -484,7 +489,8 @@ async def async_(
484489
"include_metadata": include_metadata,
485490
"json_options": json_options,
486491
"prompt_mode": prompt_mode,
487-
}
492+
},
493+
[["file"]],
488494
)
489495
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
490496
# It should be noted that the actual Content-Type header that will be
@@ -536,7 +542,7 @@ async def batch(
536542
537543
timeout: Override the client-level default timeout for this request, in seconds
538544
"""
539-
body = deepcopy_minimal(
545+
body = deepcopy_with_paths(
540546
{
541547
"files": files,
542548
"output_format": output_format,
@@ -545,7 +551,8 @@ async def batch(
545551
"include_metadata": include_metadata,
546552
"json_options": json_options,
547553
"prompt_mode": prompt_mode,
548-
}
554+
},
555+
[["files", "<array>"]],
549556
)
550557
extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
551558
# It should be noted that the actual Content-Type header that will be
@@ -628,7 +635,7 @@ async def stream(
628635
timeout: Override the client-level default timeout for this request, in seconds
629636
"""
630637
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
631-
body = deepcopy_minimal(
638+
body = deepcopy_with_paths(
632639
{
633640
"output_format": output_format,
634641
"csv_options": csv_options,
@@ -640,7 +647,8 @@ async def stream(
640647
"include_metadata": include_metadata,
641648
"json_options": json_options,
642649
"prompt_mode": prompt_mode,
643-
}
650+
},
651+
[["file"]],
644652
)
645653
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
646654
# It should be noted that the actual Content-Type header that will be
@@ -714,7 +722,7 @@ async def sync(
714722
715723
timeout: Override the client-level default timeout for this request, in seconds
716724
"""
717-
body = deepcopy_minimal(
725+
body = deepcopy_with_paths(
718726
{
719727
"output_format": output_format,
720728
"csv_options": csv_options,
@@ -725,7 +733,8 @@ async def sync(
725733
"include_metadata": include_metadata,
726734
"json_options": json_options,
727735
"prompt_mode": prompt_mode,
728-
}
736+
},
737+
[["file"]],
729738
)
730739
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
731740
# It should be noted that the actual Content-Type header that will be

0 commit comments

Comments
 (0)