DOI-USGS
diff --git a/‎dataretrieval/waterdata/chunking.py‎
Lines changed: 87 additions & 54 deletions b/‎dataretrieval/waterdata/chunking.py‎
Lines changed: 87 additions & 54 deletions
@@ -33,11 +33,13 @@
 
 from __future__ import annotations
 
+import copy
 import functools
 import itertools
 import math
 import os
 from collections.abc import Callable, Iterator
+from contextvars import ContextVar
 from dataclasses import dataclass
 from typing import Any, ClassVar
 from urllib.parse import quote_plus
@@ -93,6 +95,19 @@
 # Response header USGS uses to advertise remaining hourly quota.
 _QUOTA_HEADER = "x-ratelimit-remaining"
 
+# Session shared across all sub-requests of a single chunked call.
+# Set by ``ChunkedCall.resume`` so paginated-loop helpers downstream
+# (``_walk_pages``) reuse the same connection pool across the entire
+# fan-out instead of opening a fresh ``requests.Session`` per
+# sub-request. ``None`` when not inside a chunked call — paginated
+# helpers fall back to their own short-lived session in that case.
+# Plumbed via ``ContextVar`` rather than a kwarg on ``_FetchOnce`` so
+# user-defined fetch functions (and test fixtures) keep the simple
+# ``fetch(args)`` signature.
+_chunked_session: ContextVar[requests.Session | None] = ContextVar(
+    "_chunked_session", default=None
+)
+
 # Separators the two axis kinds use to join their atoms back into
 # URL text. List axes comma-join values
 # (``site=USGS-A,USGS-B``); the filter axis OR-joins clauses
@@ -336,7 +351,7 @@ def _request_bytes(req: requests.PreparedRequest) -> int:
     return len(req.url) + body_len
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, slots=True)
 class _Axis:
     """
     A single chunkable axis of one user-level request — a list of
@@ -481,7 +496,7 @@ def _worst_case_args(
 def _plan_axes(
     axes: list[_Axis],
     args: dict[str, Any],
-    build_request: Callable[..., Any],
+    build_request: Callable[..., requests.PreparedRequest],
     url_limit: int,
 ) -> dict[str, list[list[str]]]:
     """
@@ -498,7 +513,7 @@ def _plan_axes(
         The chunkable axes to partition.
     args : dict[str, Any]
         The user-level args (used to build the worst-case request).
-    build_request : Callable[..., Any]
+    build_request : Callable[..., requests.PreparedRequest]
         Factory that turns a kwargs dict into a sized prepared
         request, e.g. ``_construct_api_requests``.
     url_limit : int
@@ -545,11 +560,7 @@ def _plan_axes(
         axis_chunks = chunks[biggest_axis.arg_key]
         chunk = axis_chunks[biggest_idx]
         mid = len(chunk) // 2
-        chunks[biggest_axis.arg_key] = (
-            axis_chunks[:biggest_idx]
-            + [chunk[:mid], chunk[mid:]]
-            + axis_chunks[biggest_idx + 1 :]
-        )
+        axis_chunks[biggest_idx : biggest_idx + 1] = [chunk[:mid], chunk[mid:]]
 
 
 class ChunkPlan:
@@ -572,7 +583,7 @@ class ChunkPlan:
     ----------
     args : dict[str, Any]
         The user-level request kwargs.
-    build_request : Callable[..., Any]
+    build_request : Callable[..., requests.PreparedRequest]
         Factory that turns a kwargs dict into a sized prepared
         request, e.g. ``_construct_api_requests``.
     url_limit : int
@@ -608,28 +619,23 @@ class ChunkPlan:
     def __init__(
         self,
         args: dict[str, Any],
-        build_request: Callable[..., Any],
+        build_request: Callable[..., requests.PreparedRequest],
         url_limit: int,
     ) -> None:
         self.args = args
-        # Passthrough defaults; promoted at the bottom only if chunking
-        # is actually needed.
         self.axes: list[_Axis] = []
         self.chunks: dict[str, list[list[str]]] = {}
         self.canonical_url: str | None = None
 
         axes = _extract_axes(args)
-
-        # Trivial passthrough: no chunkable axes. Skip the
-        # ``build_request`` call entirely — the common Water Data call
-        # shape doesn't pay for an unused request prep.
+        # No chunkable axes → skip ``build_request`` entirely; the
+        # common Water Data call shape shouldn't pay for an unused
+        # request prep on the passthrough hot path.
         if not axes:
             return
 
         initial_request = build_request(**args)
         self.canonical_url = initial_request.url
-
-        # Already-fits passthrough: chunking is possible but unnecessary.
         if _request_bytes(initial_request) <= url_limit:
             return
 
@@ -663,8 +669,6 @@ def iter_sub_args(self) -> Iterator[dict[str, Any]]:
             A copy of ``self.args`` with each axis's current chunk
             substituted under its ``arg_key``.
         """
-        # Trivial-passthrough fast path: skip the cartesian product
-        # machinery and yield ``self.args`` directly.
         if not self.axes:
             yield self.args
             return
@@ -791,9 +795,9 @@ def _classify_chunk_error(
     cur: BaseException | None = exc
     while cur is not None:
         if isinstance(cur, RateLimited):
-            return QuotaExhausted, getattr(cur, "retry_after", None)
+            return QuotaExhausted, cur.retry_after
         if isinstance(cur, ServiceUnavailable):
-            return ServiceInterrupted, getattr(cur, "retry_after", None)
+            return ServiceInterrupted, cur.retry_after
         cur = cur.__cause__
     return None
 
@@ -839,11 +843,12 @@ def _combine_chunk_responses(
     """
     Fold per-sub-request responses into a single aggregated response.
 
-    The first response is mutated in place: ``.headers`` becomes the
-    last response's (so ``x-ratelimit-remaining`` reflects current
-    state), ``.elapsed`` accumulates total wall-clock, and ``.url`` is
-    set to the canonical original-query URL so ``BaseMetadata``
-    reflects the user's full request rather than the first chunk.
+    Returns a shallow copy of ``responses[0]`` with ``.headers`` set to
+    the last response's (so ``x-ratelimit-remaining`` reflects current
+    state), ``.elapsed`` set to total wall-clock across every response,
+    and ``.url`` set to the canonical original-query URL so
+    ``BaseMetadata`` reflects the user's full request rather than the
+    first chunk.
 
     Parameters
     ----------
@@ -858,12 +863,20 @@ def _combine_chunk_responses(
     Returns
     -------
     requests.Response
-        The first response, mutated as described above.
+        A copy of the first response with aggregated state. The input
+        responses are not mutated, so this function is idempotent —
+        safe to call repeatedly via :attr:`ChunkedCall.partial_response`
+        during error inspection or resume retries.
     """
-    head = responses[0]
+    # copy.copy preserves the requests.Response shape but breaks the
+    # alias to responses[0] so repeated calls accumulate fresh totals
+    # rather than re-adding tail elapsed onto a previously-mutated head.
+    head = copy.copy(responses[0])
     if len(responses) > 1:
         head.headers = responses[-1].headers
-        head.elapsed = sum((r.elapsed for r in responses[1:]), start=head.elapsed)
+        head.elapsed = sum(
+            (r.elapsed for r in responses[1:]), start=responses[0].elapsed
+        )
     if canonical_url is not None:
         head.url = canonical_url
     return head
@@ -919,12 +932,15 @@ class ChunkedCall:
     def __init__(self, plan: ChunkPlan, fetch_once: _FetchOnce) -> None:
         self.plan = plan
         self.fetch_once = fetch_once
-        self._frames: list[pd.DataFrame] = []
-        self._responses: list[requests.Response] = []
+        # One entry per completed sub-request, in execution order.
+        # A single list keeps the (frame, response) pair atomic so the
+        # ``len(_chunks)`` cursor can't ever drift between two parallel
+        # lists.
+        self._chunks: list[tuple[pd.DataFrame, requests.Response]] = []
 
     @property
     def completed_chunks(self) -> int:
-        return len(self._responses)
+        return len(self._chunks)
 
     @property
     def total_chunks(self) -> int:
@@ -945,9 +961,9 @@ def partial_frame(self) -> pd.DataFrame:
             Combined frame of completed sub-requests, or an empty
             ``DataFrame`` when nothing has completed.
         """
-        if not self._frames:
+        if not self._chunks:
             return pd.DataFrame()
-        return _combine_chunk_frames(self._frames)
+        return _combine_chunk_frames([frame for frame, _ in self._chunks])
 
     @property
     def partial_response(self) -> requests.Response | None:
@@ -963,14 +979,24 @@ def partial_response(self) -> requests.Response | None:
             Aggregated response when at least one sub-request has
             completed, ``None`` otherwise.
         """
-        if not self._responses:
+        if not self._chunks:
             return None
-        return _combine_chunk_responses(self._responses, self.plan.canonical_url)
+        return _combine_chunk_responses(
+            [resp for _, resp in self._chunks], self.plan.canonical_url
+        )
 
     def resume(self) -> tuple[pd.DataFrame, requests.Response]:
         """
         Drive the chunked call to completion.
 
+        Opens one ``requests.Session`` for the run and publishes it on
+        the ``_chunked_session`` ``ContextVar`` so paginated-loop
+        helpers downstream (``_walk_pages``) reuse the same connection
+        pool across every sub-request instead of handshaking fresh on
+        each. The session is closed when ``resume`` returns or raises;
+        a follow-up ``resume`` call (after a ``ChunkInterrupted``)
+        opens a new one.
+
         Idempotent: starts from chunk 0 on the first call, then from
         the cursor (``self.completed_chunks``) on every subsequent
         call. Re-issues only sub-requests that haven't already
@@ -996,39 +1022,46 @@ def resume(self) -> tuple[pd.DataFrame, requests.Response]:
             When the rate-limit window can't cover the remaining plan
             (checked after the first sub-request).
         """
-        completed = len(self._responses)
-        for i, sub_args in enumerate(self.plan.iter_sub_args()):
-            if i < completed:
-                continue
-            self._issue(sub_args)
-        return (
-            _combine_chunk_frames(self._frames),
-            _combine_chunk_responses(self._responses, self.plan.canonical_url),
-        )
+        with requests.Session() as session:
+            token = _chunked_session.set(session)
+            try:
+                completed = len(self._chunks)
+                for i, sub_args in enumerate(self.plan.iter_sub_args()):
+                    if i < completed:
+                        continue
+                    self._issue(sub_args)
+                frames = [frame for frame, _ in self._chunks]
+                responses = [resp for _, resp in self._chunks]
+                return (
+                    _combine_chunk_frames(frames),
+                    _combine_chunk_responses(responses, self.plan.canonical_url),
+                )
+            finally:
+                _chunked_session.reset(token)
 
     def _issue(self, sub_args: dict[str, Any]) -> None:
         try:
-            frame, response = self.fetch_once(sub_args)
+            chunk = self.fetch_once(sub_args)
         except RuntimeError as exc:
             classification = _classify_chunk_error(exc)
             if classification is None:
                 raise
             interrupted_class, retry_after = classification
             raise interrupted_class(
-                completed_chunks=len(self._responses),
+                completed_chunks=len(self._chunks),
                 total_chunks=self.plan.total,
                 call=self,
                 retry_after=retry_after,
             ) from exc
-        self._frames.append(frame)
-        self._responses.append(response)
-        if len(self._responses) == 1 and self.plan.total > 1:
+        self._chunks.append(chunk)
+        if len(self._chunks) == 1 and self.plan.total > 1:
             self._check_quota_after_first()
 
     def _check_quota_after_first(self) -> None:
         if _quota_check_disabled():
             return
-        remaining = _read_remaining(self._responses[0])
+        _, first_response = self._chunks[0]
+        remaining = _read_remaining(first_response)
         if remaining is None or remaining >= self.plan.total - 1:
             return
         raise RequestExceedsQuota(
@@ -1040,7 +1073,7 @@ def _check_quota_after_first(self) -> None:
 
 def multi_value_chunked(
     *,
-    build_request: Callable[..., Any],
+    build_request: Callable[..., requests.PreparedRequest],
     url_limit: int | None = None,
 ) -> Callable[[_FetchOnce], _FetchOnce]:
     """
@@ -1054,7 +1087,7 @@ def multi_value_chunked(
 
     Parameters
     ----------
-    build_request : Callable
+    build_request : Callable[..., requests.PreparedRequest]
         Factory that turns a kwargs dict into a sized prepared
         request, e.g. ``_construct_api_requests``. Called during
         planning to measure each candidate plan.