Skip to content

Commit d958005

Browse files
thodson-usgsclaude
andcommitted
docs+tests(waterdata): apply /simplify sweep — close stale refs and consolidate sleep helpers
Doc-vs-code drift left by the recent inlining + the 01c734b transport-error fix: - multi_value_chunked Raises (chunking.py:1716): now lists transport errors alongside 429/5xx — the surface the 01c734b sweep missed. - _combine_raw docstring (chunking.py:1444): "record" → "the track closure in _run" since record() was inlined in c63da1b. - track closure docstring (chunking.py:1627): "+ record +" → "+ result-store +" for the same reason. - _aiozero test helper docstring: tightened to "asyncio.sleep (via the chunking module's binding)" — chunking.asyncio IS the asyncio module. - Test section banner: "drivers" (plural) → "driver" (only one remaining after the async-only collapse). Simplifications: - Drop the redundant 3-line comment above the inlined `return self.finalize(*self._combine_raw())` — partial_frame's docstring and the class Attributes already say the same thing twice. - Test sleep-faker variants consolidated: replaced 3 inline `async def _noslept(_d): return None` blocks with the existing module-level `_aiozero`; replaced 2 inline `_record` closures with a new module-level `_recording_sleep(slept)` factory. Net −8 lines. 296 offline tests pass; ruff clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c63da1b commit d958005

2 files changed

Lines changed: 28 additions & 36 deletions

File tree

dataretrieval/waterdata/chunking.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,10 +1441,11 @@ def _combine_raw(self) -> tuple[pd.DataFrame, httpx.Response]:
14411441
Frames concatenate in sub-args *index* order (``sorted`` keys —
14421442
deterministic, independent of parallel completion order). The
14431443
aggregated response takes its headers from the most-recently-
1444-
*completed* sub-request: ``record`` is the only writer of
1445-
``self._chunks`` and ``dict`` preserves insertion order, so the
1446-
chunks' natural order is completion order and the last one carries
1447-
the freshest ``x-ratelimit-remaining``.
1444+
*completed* sub-request: the ``track`` closure in :meth:`_run`
1445+
is the only writer of ``self._chunks`` and ``dict`` preserves
1446+
insertion order, so the chunks' natural order is completion
1447+
order and the last one carries the freshest
1448+
``x-ratelimit-remaining``.
14481449
14491450
Returns
14501451
-------
@@ -1624,7 +1625,7 @@ async def _run(self, max_concurrent: int | None) -> tuple[pd.DataFrame, Any]:
16241625
async def track(
16251626
index: int, args: dict[str, Any]
16261627
) -> tuple[pd.DataFrame, httpx.Response]:
1627-
"""One sub-request (with retry) + record + progress tick."""
1628+
"""One sub-request (with retry) + result-store + progress tick."""
16281629
result = await _retry(lambda: self.fetch(args), self.retry_policy)
16291630
self._chunks[index] = result
16301631
if reporter is not None:
@@ -1667,9 +1668,6 @@ async def track(
16671668
interrupted, exc = first_transient
16681669
raise interrupted from exc
16691670

1670-
# Apply the injected ``finalize`` to the raw combined result.
1671-
# ``partial_frame`` / ``partial_response`` deliberately bypass
1672-
# ``finalize`` to stay cheap and side-effect-free.
16731671
return self.finalize(*self._combine_raw())
16741672

16751673

@@ -1713,9 +1711,9 @@ def multi_value_chunked(
17131711
RequestTooLarge
17141712
If no plan can fit ``url_limit``.
17151713
ChunkInterrupted
1716-
On a mid-execution 429 (:class:`QuotaExhausted`) or 5xx
1717-
(:class:`ServiceInterrupted`). See :class:`ChunkedCall` for
1718-
the resume semantics.
1714+
On a mid-execution transient — 429, 5xx, or a bare transport
1715+
error: :class:`QuotaExhausted` for 429, :class:`ServiceInterrupted`
1716+
for the rest. See :class:`ChunkedCall` for the resume semantics.
17191717
17201718
See Also
17211719
--------

tests/waterdata_chunking_test.py

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,26 @@
6060

6161

6262
def _aiozero(_d):
63-
"""An async no-op sleep — monkeypatched over the ``chunking`` module's
64-
``asyncio.sleep`` so retry backoff doesn't actually wait in tests."""
63+
"""An async no-op sleep — monkeypatched over ``asyncio.sleep`` (via
64+
the chunking module's binding) so retry backoff doesn't wait in tests."""
6565

6666
async def _noop():
6767
return None
6868

6969
return _noop()
7070

7171

72+
def _recording_sleep(slept):
73+
"""An ``_aiozero`` variant that appends each requested delay to ``slept``
74+
before resolving — for tests that need to assert what would have been waited."""
75+
76+
def _record(delay):
77+
slept.append(delay)
78+
return _aiozero(delay)
79+
80+
return _record
81+
82+
7283
class _FakeReq:
7384
"""Stand-in for ``httpx.Request`` whose ``_request_bytes`` shape
7485
is ``len(str(url)) + len(content)``."""
@@ -1487,7 +1498,7 @@ def test_combine_chunk_responses_does_not_mutate_input_urls():
14871498

14881499

14891500
# ---------------------------------------------------------------------------
1490-
# Retry-with-backoff: RetryPolicy + _retryable + drivers + decorator wiring.
1501+
# Retry-with-backoff: RetryPolicy + _retryable + driver + decorator wiring.
14911502
# Conftest pins API_USGS_RETRIES=0, so these tests opt in explicitly and
14921503
# patch the chunking module's ``asyncio.sleep`` to a no-op (no real backoff).
14931504
# ---------------------------------------------------------------------------
@@ -1631,11 +1642,7 @@ async def afn():
16311642
def test_retry_non_retryable_not_retried(monkeypatch):
16321643
slept: list[float] = []
16331644

1634-
def _record(delay):
1635-
slept.append(delay)
1636-
return _aiozero(delay)
1637-
1638-
monkeypatch.setattr(_chunking.asyncio, "sleep", _record)
1645+
monkeypatch.setattr(_chunking.asyncio, "sleep", _recording_sleep(slept))
16391646
calls = {"n": 0}
16401647

16411648
async def afn():
@@ -1650,11 +1657,7 @@ async def afn():
16501657
def test_retry_long_retry_after_escalates(monkeypatch):
16511658
slept: list[float] = []
16521659

1653-
def _record(delay):
1654-
slept.append(delay)
1655-
return _aiozero(delay)
1656-
1657-
monkeypatch.setattr(_chunking.asyncio, "sleep", _record)
1660+
monkeypatch.setattr(_chunking.asyncio, "sleep", _recording_sleep(slept))
16581661
calls = {"n": 0}
16591662

16601663
async def afn():
@@ -1670,10 +1673,7 @@ async def afn():
16701673

16711674

16721675
def test_retry_transient_then_success(monkeypatch):
1673-
async def _noslept(_d):
1674-
return None
1675-
1676-
monkeypatch.setattr(_chunking.asyncio, "sleep", _noslept)
1676+
monkeypatch.setattr(_chunking.asyncio, "sleep", _aiozero)
16771677
calls = {"n": 0}
16781678

16791679
async def afn():
@@ -1734,10 +1734,7 @@ def test_async_fan_out_retries_transient_then_completes(monkeypatch):
17341734
"""The parallel path retries a transient sub-request and completes."""
17351735
monkeypatch.setenv("API_USGS_RETRIES", "3")
17361736

1737-
async def _noslept(_d):
1738-
return None
1739-
1740-
monkeypatch.setattr(_chunking.asyncio, "sleep", _noslept)
1737+
monkeypatch.setattr(_chunking.asyncio, "sleep", _aiozero)
17411738
state = {"failed": False}
17421739

17431740
async def fetch_async(args):
@@ -1756,10 +1753,7 @@ def test_async_fan_out_surfaces_fatal_over_transient(monkeypatch):
17561753
being masked behind a resumable interruption from a transient sibling."""
17571754
monkeypatch.setenv("API_USGS_RETRIES", "2")
17581755

1759-
async def _noslept(_d):
1760-
return None
1761-
1762-
monkeypatch.setattr(_chunking.asyncio, "sleep", _noslept)
1756+
monkeypatch.setattr(_chunking.asyncio, "sleep", _aiozero)
17631757

17641758
async def fetch_async(args):
17651759
# One chunk carries a deterministic programmer error; the rest are

0 commit comments

Comments
 (0)