refactor(chunking): Flatten _fan_out_async into focused helpers

thodson-usgs · claude · thodson-usgs · commit 6028dd5f8037 · 2026-05-25T07:59:17.000-05:00
Cognitive-burden refactor of the async fan-out path; behavior
unchanged. Five targeted cleanups:

1. Drop the ``Semaphore | None`` Optional + ``_bounded`` wrapper.
   The semaphore is now unconditional, sized to ``max_concurrent or
   sys.maxsize`` ("unbounded" is just a very-large counter). One
   ``async with semaphore`` everywhere; no branching.

2. Extract ``_probe_first`` and ``_fan_out_rest`` so
   ``_fan_out_async``'s body reads as ``probe -&gt; check quota -&gt;
   fan out rest -&gt; combine`` instead of inlining the two try/except
   blocks. Each helper has a focused docstring + ``_Track`` type
   alias for the shared callable.

3. Simplify the post-gather exception walker with the walrus:
   ``if (interrupted := call.wrap_failure(exc)) is not None``.
   One assignment per iteration, no double-call.

4. Use the existing ``ChunkedCall.completed_chunks`` property in
   the progress tick instead of poking ``len(call._chunks)``.

5. Extract ``_execute_in_parallel`` from the decorator wrapper. The
   wrapper now reads as ``serial-or-parallel?`` in five lines; the
   helper owns the ``fetch_async is None`` and running-event-loop
   fallbacks (with their UserWarnings).

Co-Authored-By: Claude Opus 4.7 &lt;noreply@anthropic.com&gt;
diff --git a/dataretrieval/waterdata/chunking.py b/dataretrieval/waterdata/chunking.py
@@ -53,6 +53,7 @@
 import itertools
 import math
 import os
+import sys
 import warnings
 from collections.abc import Awaitable, Callable, Iterator
 from contextlib import contextmanager, suppress
@@ -247,6 +248,12 @@ def get_active_async_session() -> httpx.AsyncClient | None:
 _FetchOnceAsync = Callable[
     [dict[str, Any]], Awaitable[tuple[pd.DataFrame, httpx.Response]]
 ]
+# A "tracked" sub-request issuer: takes ``(index, sub_args)``, issues
+# one sub-request, records the completion on the shared ``ChunkedCall``,
+# and ticks the progress reporter. The probe and fan-out helpers both
+# call into the same closure so bookkeeping happens exactly once per
+# success.
+_Track = Callable[[int, dict[str, Any]], Awaitable[tuple[pd.DataFrame, httpx.Response]]]
 
 
 class _RetryableTransportError(RuntimeError):
@@ -1429,6 +1436,51 @@ def _check_quota_remaining(self) -> None:
         )
 
 
+async def _probe_first(
+    call: ChunkedCall, sub_args: dict[str, Any], track: _Track
+) -> tuple[pd.DataFrame, httpx.Response]:
+    """
+    Issue sub-request 0 alone so its ``x-ratelimit-remaining`` header
+    can gate the rest of the plan before the burst goes out. A
+    transient failure here is routed through ``call.wrap_failure`` →
+    :class:`ChunkInterrupted`; non-transient failures re-raise so the
+    caller sees their original type.
+    """
+    try:
+        return await track(0, sub_args)
+    except (RuntimeError, httpx.HTTPError) as exc:
+        if (interrupted := call.wrap_failure(exc)) is not None:
+            raise interrupted from exc
+        raise
+
+
+async def _fan_out_rest(
+    call: ChunkedCall, sub_args_rest: list[dict[str, Any]], track: _Track
+) -> None:
+    """
+    Dispatch sub-requests 1..N-1 concurrently.
+
+    Completed pairs survive a sibling's transient failure via
+    ``return_exceptions=True``, so the partial result stays
+    recoverable through :meth:`ChunkedCall.resume`. On any failure,
+    prefer raising the first *recognized transient* — so the user
+    still gets a resumable :class:`ChunkInterrupted` even when a
+    non-transient bug landed earlier in submission order. Fall back
+    to the first failure (preserving its type) when nothing is
+    transient.
+    """
+    results = await asyncio.gather(
+        *(track(i, args) for i, args in enumerate(sub_args_rest, start=1)),
+        return_exceptions=True,
+    )
+    failures = [r for r in results if isinstance(r, BaseException)]
+    for exc in failures:
+        if (interrupted := call.wrap_failure(exc)) is not None:
+            raise interrupted from exc
+    if failures:
+        raise failures[0]
+
+
 async def _fan_out_async(
     plan: ChunkPlan,
     fetch_once: _FetchOnce,
@@ -1443,24 +1495,27 @@ async def _fan_out_async(
     The fan-out preserves the same safety contracts the serial
     :class:`ChunkedCall` path provides:
 
-    * **Quota check.** The first sub-request is issued alone; its
-      ``x-ratelimit-remaining`` header is read before any other
-      sub-request is dispatched. If the remaining plan can't fit the
-      window, :class:`RequestExceedsQuota` fires (matching
-      :meth:`ChunkedCall._check_quota_remaining`).
-    * **Resumable interruptions.** ``asyncio.gather`` runs with
-      ``return_exceptions=True`` so completed sub-requests survive a
-      sibling's transient failure. On a recognized transient
-      (:class:`RateLimited`, :class:`ServiceUnavailable`) a
-      :class:`ChunkInterrupted` subclass is raised with ``.call`` set
-      to a :class:`ChunkedCall` carrying the completed sub-args as a
-      sparse index map. Calling ``exc.call.resume()`` re-issues only
-      the unfinished sub-requests, via the sync ``fetch_once`` path.
-
-    Bounded by an :class:`asyncio.Semaphore` when ``max_concurrent``
-    is set; unbounded otherwise. The shared client is published on
+    * **Quota check.** The first sub-request is issued alone via
+      :func:`_probe_first`; its ``x-ratelimit-remaining`` header is
+      read before any other sub-request is dispatched. If the
+      remaining plan can't fit the window,
+      :class:`RequestExceedsQuota` fires.
+    * **Resumable interruptions.** :func:`_fan_out_rest` runs
+      ``asyncio.gather`` with ``return_exceptions=True`` so completed
+      sub-requests survive a sibling's transient failure. On a
+      recognized transient (:class:`RateLimited`,
+      :class:`ServiceUnavailable`) a :class:`ChunkInterrupted`
+      subclass is raised with ``.call`` set to a
+      :class:`ChunkedCall` carrying the sparse completed sub-args;
+      ``exc.call.resume()`` re-issues only the unfinished ones via
+      the sync ``fetch_once`` path.
+
+    In-flight sub-requests are capped by an
+    :class:`asyncio.Semaphore`; ``max_concurrent=None`` ("unbounded")
+    uses ``sys.maxsize`` so every call site can take the same
+    ``async with semaphore`` path. The shared client is published on
     :data:`_chunked_async_session` so async paginated-loop helpers
-    downstream reuse its connection pool.
+    reuse its connection pool.
 
     Parameters
     ----------
@@ -1497,57 +1552,37 @@ async def _fan_out_async(
 
     # ``httpx.Limits()`` defaults to ``max_connections=100`` — at
     # higher concurrency the pool would silently bottleneck the
-    # fan-out behind the connection cap. Pass an explicit cap that
-    # matches the semaphore, or ``None`` for truly unbounded.
+    # fan-out behind the connection cap. Match it to the semaphore,
+    # or ``None`` for truly unbounded.
     limits = httpx.Limits(
         max_connections=max_concurrent, max_keepalive_connections=max_concurrent
     )
-    semaphore: asyncio.Semaphore | None = (
-        asyncio.Semaphore(max_concurrent) if max_concurrent is not None else None
-    )
-
-    async def _bounded(args: dict[str, Any]) -> tuple[pd.DataFrame, httpx.Response]:
-        if semaphore is None:
-            return await fetch_async(args)
-        async with semaphore:
-            return await fetch_async(args)
-
+    # ``sys.maxsize`` stands in for "unbounded": ``asyncio.Semaphore``
+    # only decrements a counter, never preallocates slots.
+    semaphore = asyncio.Semaphore(max_concurrent or sys.maxsize)
     call = ChunkedCall(plan, fetch_once)
 
     async with httpx.AsyncClient(limits=limits, **HTTPX_DEFAULTS) as client:
         with _publish_async_session(client):
-            # Record the total so the progress line can show
-            # ``chunk K/N``; ``_track`` bumps K as each sub-request
-            # completes so the parallel display advances chunk by
-            # chunk just like the serial path.
             reporter = _progress.current()
             if reporter is not None:
                 reporter.set_chunks(plan.total)
 
-            async def _track(offset: int, args: dict[str, Any]):
-                """Issue one sub-request, record its result, and
-                report completion. Used for both the probe-first call
-                and the gathered fan-out so the record+report happen
-                exactly once per success. asyncio is single-threaded
-                within one event loop, so the record + len read
-                sequence is atomic at the scheduler level."""
-                result = await _bounded(args)
+            async def track(
+                offset: int, args: dict[str, Any]
+            ) -> tuple[pd.DataFrame, httpx.Response]:
+                """One sub-request + record + progress tick. asyncio
+                is single-threaded within an event loop, so the
+                record-then-read sequence is atomic at the scheduler
+                level."""
+                async with semaphore:
+                    result = await fetch_async(args)
                 call.record(offset, result)
                 if reporter is not None:
-                    reporter.start_chunk(len(call._chunks))
+                    reporter.start_chunk(call.completed_chunks)
                 return result
 
-            # Probe-first: issue index 0 alone, check quota, then
-            # fan out the rest. A transient failure here surfaces as a
-            # ChunkInterrupted whose .call has no completed sub-requests
-            # yet, so .call.resume() re-issues the entire plan.
-            try:
-                first_pair = await _track(0, sub_args_list[0])
-            except (RuntimeError, httpx.HTTPError) as exc:
-                interrupted = call.wrap_failure(exc)
-                if interrupted is not None:
-                    raise interrupted from exc
-                raise
+            first_pair = await _probe_first(call, sub_args_list[0], track)
 
             if len(sub_args_list) > 1 and not _quota_check_disabled():
                 remaining = _read_remaining(first_pair[1])
@@ -1559,26 +1594,7 @@ async def _track(offset: int, args: dict[str, Any]):
                         call=call,
                     )
 
-            # Fan out the remaining sub-requests. Completed pairs
-            # survive a sibling's transient failure (``return_exceptions``),
-            # so the partial result stays recoverable through
-            # ``ChunkedCall.resume()``.
-            results = await asyncio.gather(
-                *(_track(off, a) for off, a in enumerate(sub_args_list[1:], start=1)),
-                return_exceptions=True,
-            )
-            exceptions = [r for r in results if isinstance(r, BaseException)]
-            # Prefer wrapping the first *recognized transient* failure
-            # so the user still gets a resumable ``ChunkInterrupted``
-            # even if a non-transient error happened to land first by
-            # submission order. Only if none of the failures is
-            # transient do we fall back to raising the first one.
-            for exc in exceptions:
-                interrupted = call.wrap_failure(exc)
-                if interrupted is not None:
-                    raise interrupted from exc
-            if exceptions:
-                raise exceptions[0]
+            await _fan_out_rest(call, sub_args_list[1:], track)
 
     ordered = call._ordered_chunks()
     return (
@@ -1660,48 +1676,63 @@ def wrapper(
             plan = ChunkPlan(args, build_request, limit)
             concurrency = _read_concurrency_env()
 
-            # Stay on the sync path for trivial plans and explicit
-            # opt-outs. The remaining branches all need a wired
-            # ``fetch_async`` and a usable event loop.
+            # Trivial plans and explicit opt-outs stay on the sync
+            # path; ``_execute_in_parallel`` owns the rest of the
+            # serial/parallel decision (async wiring, running loop).
             if plan.total <= 1 or concurrency == 1:
                 return plan.execute(fetch_once)
-            if fetch_async is None:
-                warnings.warn(
-                    f"{_CONCURRENCY_ENV} is set to {concurrency} but this "
-                    f"call site has no async fetch sibling wired; falling "
-                    f"back to the serial path. Either set "
-                    f"{_CONCURRENCY_ENV}=1 to silence this warning or pass "
-                    f"fetch_async= to @multi_value_chunked.",
-                    UserWarning,
-                    stacklevel=2,
-                )
-                return plan.execute(fetch_once)
-            # ``asyncio.run`` raises ``RuntimeError`` if an event loop
-            # is already running (e.g. Jupyter / IPython kernels,
-            # async apps). Detect that case and fall back to the
-            # serial path with a one-time warning so notebook users
-            # don't see a confusing ``RuntimeError``.
-            if _running_event_loop() is not None:
-                warnings.warn(
-                    "Detected a running asyncio event loop; the parallel "
-                    f"chunker path cannot run inside one. Falling back to "
-                    f"the serial path. Set {_CONCURRENCY_ENV}=1 to silence "
-                    f"this warning.",
-                    UserWarning,
-                    stacklevel=2,
-                )
-                return plan.execute(fetch_once)
-            return asyncio.run(
-                _fan_out_async(
-                    plan, fetch_once, fetch_async, max_concurrent=concurrency
-                )
-            )
+            return _execute_in_parallel(plan, fetch_once, fetch_async, concurrency)
 
         return wrapper
 
     return decorator
 
 
+def _execute_in_parallel(
+    plan: ChunkPlan,
+    fetch_once: _FetchOnce,
+    fetch_async: _FetchOnceAsync | None,
+    concurrency: int | None,
+) -> tuple[pd.DataFrame, httpx.Response]:
+    """
+    Run ``plan`` on the parallel async path, falling back to the
+    serial sync path when the runtime can't host an event loop.
+
+    Falls back (with a one-time :class:`UserWarning`) when:
+
+    * ``fetch_async`` wasn't wired into the decorator, or
+    * an asyncio event loop is already running (Jupyter / IPython
+      kernels, async apps — ``asyncio.run`` would raise).
+
+    Otherwise opens a fresh event loop via :func:`asyncio.run` and
+    drives :func:`_fan_out_async`.
+    """
+    if fetch_async is None:
+        warnings.warn(
+            f"{_CONCURRENCY_ENV} is set to {concurrency} but this "
+            f"call site has no async fetch sibling wired; falling "
+            f"back to the serial path. Either set "
+            f"{_CONCURRENCY_ENV}=1 to silence this warning or pass "
+            f"fetch_async= to @multi_value_chunked.",
+            UserWarning,
+            stacklevel=3,
+        )
+        return plan.execute(fetch_once)
+    if _running_event_loop() is not None:
+        warnings.warn(
+            "Detected a running asyncio event loop; the parallel "
+            f"chunker path cannot run inside one. Falling back to "
+            f"the serial path. Set {_CONCURRENCY_ENV}=1 to silence "
+            f"this warning.",
+            UserWarning,
+            stacklevel=3,
+        )
+        return plan.execute(fetch_once)
+    return asyncio.run(
+        _fan_out_async(plan, fetch_once, fetch_async, max_concurrent=concurrency)
+    )
+
+
 def _running_event_loop() -> asyncio.AbstractEventLoop | None:
     """Return the active asyncio event loop, or ``None`` when none."""
     try: