chore(internal): codegen related update

stainless-app[bot] · stainless-app[bot] · commit ddfbedf1d6de · 2026-04-18T08:25:00.000Z
diff --git a/src/docstrange/_files.py b/src/docstrange/_files.py
@@ -3,8 +3,8 @@
 import io
 import os
 import pathlib
-from typing import overload
-from typing_extensions import TypeGuard
+from typing import Sequence, cast, overload
+from typing_extensions import TypeVar, TypeGuard
 
 import anyio
 
@@ -17,7 +17,9 @@
     HttpxFileContent,
     HttpxRequestFiles,
 )
-from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
+from ._utils import is_list, is_mapping, is_tuple_t, is_mapping_t, is_sequence_t
+
+_T = TypeVar("_T")
 
 
 def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
@@ -121,3 +123,51 @@ async def async_read_file_content(file: FileContent) -> HttpxFileContent:
         return await anyio.Path(file).read_bytes()
 
     return file
+
+
+def deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]]) -> _T:
+    """Copy only the containers along the given paths.
+
+    Used to guard against mutation by extract_files without copying the entire structure.
+    Only dicts and lists that lie on a path are copied; everything else
+    is returned by reference.
+
+    For example, given paths=[["foo", "files", "file"]] and the structure:
+        {
+            "foo": {
+                "bar": {"baz": {}},
+                "files": {"file": <content>}
+            }
+        }
+    The root dict, "foo", and "files" are copied (they lie on the path).
+    "bar" and "baz" are returned by reference (off the path).
+    """
+    return _deepcopy_with_paths(item, paths, 0)
+
+
+def _deepcopy_with_paths(item: _T, paths: Sequence[Sequence[str]], index: int) -> _T:
+    if not paths:
+        return item
+    if is_mapping(item):
+        key_to_paths: dict[str, list[Sequence[str]]] = {}
+        for path in paths:
+            if index < len(path):
+                key_to_paths.setdefault(path[index], []).append(path)
+
+        # if no path continues through this mapping, it won't be mutated and copying it is redundant
+        if not key_to_paths:
+            return item
+
+        result = dict(item)
+        for key, subpaths in key_to_paths.items():
+            if key in result:
+                result[key] = _deepcopy_with_paths(result[key], subpaths, index + 1)
+        return cast(_T, result)
+    if is_list(item):
+        array_paths = [path for path in paths if index < len(path) and path[index] == "<array>"]
+
+        # if no path expects a list here, nothing will be mutated inside it - return by reference
+        if not array_paths:
+            return cast(_T, item)
+        return cast(_T, [_deepcopy_with_paths(entry, array_paths, index + 1) for entry in item])
+    return item
diff --git a/src/docstrange/_utils/__init__.py b/src/docstrange/_utils/__init__.py
@@ -24,7 +24,6 @@
     coerce_integer as coerce_integer,
     file_from_path as file_from_path,
     strip_not_given as strip_not_given,
-    deepcopy_minimal as deepcopy_minimal,
     get_async_library as get_async_library,
     maybe_coerce_float as maybe_coerce_float,
     get_required_header as get_required_header,
diff --git a/src/docstrange/_utils/_utils.py b/src/docstrange/_utils/_utils.py
@@ -177,21 +177,6 @@ def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
     return isinstance(obj, Iterable)
 
 
-def deepcopy_minimal(item: _T) -> _T:
-    """Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
-
-    - mappings, e.g. `dict`
-    - list
-
-    This is done for performance reasons.
-    """
-    if is_mapping(item):
-        return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
-    if is_list(item):
-        return cast(_T, [deepcopy_minimal(entry) for entry in item])
-    return item
-
-
 # copied from https://github.com/Rapptz/RoboDanny
 def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
     size = len(seq)
diff --git a/src/docstrange/resources/classify.py b/src/docstrange/resources/classify.py
@@ -7,8 +7,9 @@
 import httpx
 
 from ..types import classify_sync_params, classify_batch_params
+from .._files import deepcopy_with_paths
 from .._types import Body, Query, Headers, NotGiven, FileTypes, SequenceNotStr, not_given
-from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from .._utils import extract_files, maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -75,11 +76,12 @@ def batch(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "categories": categories,
                 "files": files,
-            }
+            },
+            [["files", "<array>"]],
         )
         extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
         # It should be noted that the actual Content-Type header that will be
@@ -129,11 +131,12 @@ def sync(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "categories": categories,
                 "file": file,
-            }
+            },
+            [["file"]],
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
         # It should be noted that the actual Content-Type header that will be
@@ -202,11 +205,12 @@ async def batch(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "categories": categories,
                 "files": files,
-            }
+            },
+            [["files", "<array>"]],
         )
         extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
         # It should be noted that the actual Content-Type header that will be
@@ -256,11 +260,12 @@ async def sync(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "categories": categories,
                 "file": file,
-            }
+            },
+            [["file"]],
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
         # It should be noted that the actual Content-Type header that will be
diff --git a/src/docstrange/resources/extract/extract.py b/src/docstrange/resources/extract/extract.py
@@ -16,6 +16,7 @@
     ResultsResourceWithStreamingResponse,
     AsyncResultsResourceWithStreamingResponse,
 )
+from ..._files import deepcopy_with_paths
 from ..._types import (
     Body,
     Omit,
@@ -27,7 +28,7 @@
     omit,
     not_given,
 )
-from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ..._utils import extract_files, maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import (
@@ -124,7 +125,7 @@ def async_(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "output_format": output_format,
                 "csv_options": csv_options,
@@ -135,7 +136,8 @@ def async_(
                 "include_metadata": include_metadata,
                 "json_options": json_options,
                 "prompt_mode": prompt_mode,
-            }
+            },
+            [["file"]],
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
         # It should be noted that the actual Content-Type header that will be
@@ -187,7 +189,7 @@ def batch(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "files": files,
                 "output_format": output_format,
@@ -196,7 +198,8 @@ def batch(
                 "include_metadata": include_metadata,
                 "json_options": json_options,
                 "prompt_mode": prompt_mode,
-            }
+            },
+            [["files", "<array>"]],
         )
         extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
         # It should be noted that the actual Content-Type header that will be
@@ -279,7 +282,7 @@ def stream(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "output_format": output_format,
                 "csv_options": csv_options,
@@ -291,7 +294,8 @@ def stream(
                 "include_metadata": include_metadata,
                 "json_options": json_options,
                 "prompt_mode": prompt_mode,
-            }
+            },
+            [["file"]],
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
         # It should be noted that the actual Content-Type header that will be
@@ -365,7 +369,7 @@ def sync(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "output_format": output_format,
                 "csv_options": csv_options,
@@ -376,7 +380,8 @@ def sync(
                 "include_metadata": include_metadata,
                 "json_options": json_options,
                 "prompt_mode": prompt_mode,
-            }
+            },
+            [["file"]],
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
         # It should be noted that the actual Content-Type header that will be
@@ -473,7 +478,7 @@ async def async_(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "output_format": output_format,
                 "csv_options": csv_options,
@@ -484,7 +489,8 @@ async def async_(
                 "include_metadata": include_metadata,
                 "json_options": json_options,
                 "prompt_mode": prompt_mode,
-            }
+            },
+            [["file"]],
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
         # It should be noted that the actual Content-Type header that will be
@@ -536,7 +542,7 @@ async def batch(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "files": files,
                 "output_format": output_format,
@@ -545,7 +551,8 @@ async def batch(
                 "include_metadata": include_metadata,
                 "json_options": json_options,
                 "prompt_mode": prompt_mode,
-            }
+            },
+            [["files", "<array>"]],
         )
         extracted_files = extract_files(cast(Mapping[str, object], body), paths=[["files", "<array>"]])
         # It should be noted that the actual Content-Type header that will be
@@ -628,7 +635,7 @@ async def stream(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "output_format": output_format,
                 "csv_options": csv_options,
@@ -640,7 +647,8 @@ async def stream(
                 "include_metadata": include_metadata,
                 "json_options": json_options,
                 "prompt_mode": prompt_mode,
-            }
+            },
+            [["file"]],
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
         # It should be noted that the actual Content-Type header that will be
@@ -714,7 +722,7 @@ async def sync(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        body = deepcopy_minimal(
+        body = deepcopy_with_paths(
             {
                 "output_format": output_format,
                 "csv_options": csv_options,
@@ -725,7 +733,8 @@ async def sync(
                 "include_metadata": include_metadata,
                 "json_options": json_options,
                 "prompt_mode": prompt_mode,
-            }
+            },
+            [["file"]],
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
         # It should be noted that the actual Content-Type header that will be
diff --git a/tests/test_deepcopy.py b/tests/test_deepcopy.py
diff --git a/tests/test_files.py b/tests/test_files.py