Skip to content

Commit 0768245

Browse files
thodson-usgsclaude
andcommitted
Tidy chunking.py: extract _chunk_bytes, name quota sentinel, use math.prod
Three small simplifications, no behavior change: - Extract _chunk_bytes(chunk) helper for len(",".join(map(str, chunk))). Used in both _worst_case_args and _plan_chunks; the helper documents the cost model the planner compares chunks under. - Name the magic sentinel 10**9 as _QUOTA_UNKNOWN. _read_remaining returns it on missing/malformed x-ratelimit-remaining headers; having one definition prevents the value from drifting between branches. - Use math.prod for the cartesian-product cardinality calculation in _plan_chunks (max_chunks check) and the wrapper (quota-floor loop bound). Replaces an open-coded multiply-loop in two places. All 25 chunker tests and 88 filter tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ed216dd commit 0768245

1 file changed

Lines changed: 24 additions & 11 deletions

File tree

dataretrieval/waterdata/chunking.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848

4949
import functools
5050
import itertools
51+
import math
5152
from collections.abc import Callable
5253
from typing import Any, TypeVar
5354

@@ -100,6 +101,12 @@
100101
# known offset instead of retrying the whole chunked call from scratch.
101102
_DEFAULT_QUOTA_SAFETY_FLOOR = 50
102103

104+
# Sentinel returned by ``_read_remaining`` when the response has no
105+
# parseable ``x-ratelimit-remaining`` header. Large enough to beat any
106+
# plausible safety floor so a missing/malformed header doesn't trigger
107+
# spurious ``QuotaExhausted`` aborts.
108+
_QUOTA_UNKNOWN = 10**9
109+
103110

104111
class RequestTooLarge(ValueError):
105112
"""Raised when a chunked request cannot be issued. Two cases:
@@ -194,6 +201,16 @@ def _filter_aware_probe_args(args: dict[str, Any]) -> dict[str, Any]:
194201
return {**args, "filter": max(parts, key=len)}
195202

196203

204+
def _chunk_bytes(chunk: list) -> int:
205+
"""Byte length of ``chunk`` when comma-joined into a URL param value.
206+
207+
This is the cost the planner uses to compare chunks across dims; the
208+
real URL builder also URL-encodes the comma, but the byte counts come
209+
out the same modulo a constant per-chunk overhead.
210+
"""
211+
return len(",".join(map(str, chunk)))
212+
213+
197214
def _worst_case_args(
198215
probe_args: dict[str, Any], plan: dict[str, list[list]]
199216
) -> dict[str, Any]:
@@ -202,7 +219,7 @@ def _worst_case_args(
202219
already reduced to its filter-chunker floor."""
203220
out = dict(probe_args)
204221
for k, chunks in plan.items():
205-
out[k] = max(chunks, key=lambda c: len(",".join(map(str, c))))
222+
out[k] = max(chunks, key=_chunk_bytes)
206223
return out
207224

208225

@@ -242,7 +259,7 @@ def _plan_chunks(
242259
for idx, chunk in enumerate(dim_chunks):
243260
if len(chunk) <= 1:
244261
continue
245-
size = len(",".join(map(str, chunk)))
262+
size = _chunk_bytes(chunk)
246263
if best is None or size > best[2]:
247264
best = (dim, idx, size)
248265

@@ -258,9 +275,7 @@ def _plan_chunks(
258275
mid = len(big) // 2
259276
plan[dim] = plan[dim][:idx] + [big[:mid], big[mid:]] + plan[dim][idx + 1 :]
260277

261-
total = 1
262-
for chunks in plan.values():
263-
total *= len(chunks)
278+
total = math.prod(len(chunks) for chunks in plan.values())
264279
if total > max_chunks:
265280
raise RequestTooLarge(
266281
f"Chunked plan would issue {total} sub-requests, exceeding "
@@ -279,15 +294,15 @@ def _plan_chunks(
279294

280295
def _read_remaining(response: requests.Response) -> int:
281296
"""Parse ``x-ratelimit-remaining`` from a response. Missing or
282-
malformed header → return a large sentinel so the safety check
297+
malformed header → return ``_QUOTA_UNKNOWN`` so the safety check
283298
treats it as 'plenty of quota' (don't abort on header glitches)."""
284299
raw = response.headers.get("x-ratelimit-remaining")
285300
if raw is None:
286-
return 10**9
301+
return _QUOTA_UNKNOWN
287302
try:
288303
return int(raw)
289304
except (TypeError, ValueError):
290-
return 10**9
305+
return _QUOTA_UNKNOWN
291306

292307

293308
def multi_value_chunked(
@@ -341,9 +356,7 @@ def wrapper(
341356
return fetch_once(args)
342357

343358
keys = list(plan)
344-
total = 1
345-
for k in keys:
346-
total *= len(plan[k])
359+
total = math.prod(len(plan[k]) for k in keys)
347360
frames: list[pd.DataFrame] = []
348361
responses: list[requests.Response] = []
349362
for i, combo in enumerate(itertools.product(*(plan[k] for k in keys))):

0 commit comments

Comments
 (0)