Skip to content

Commit adb6629

Browse files
thodson-usgsclaude
andauthored
fix(waterdata): make get_ratings resilient and stop truncating results (#304)
Two reliability issues in the ratings getter: 1. The per-feature download loop caught only (httpx.HTTPError, ValueError, OSError), but _download_and_parse -> _raise_for_non_200 raises the module's typed errors (RateLimited / ServiceUnavailable / RuntimeError, all RuntimeError subclasses), and a feature missing its data asset raises LookupError. So one rate-limited / failed / malformed feature aborted the entire multi-site call instead of being logged and skipped. Broadened the except to cover RuntimeError and LookupError. 2. _search sent `limit` verbatim and returned only the first page, silently truncating large result sets despite the docstring. It now clamps the page size to the service max (10,000) and follows the STAC `next` link until exhausted, returning all matching features. (Behavior change for the narrow case it fixes: >1-page queries, or small explicit `limit`s, now return all matches; common default queries are unchanged.) Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 0cb38d2 commit adb6629

1 file changed

Lines changed: 41 additions & 12 deletions

File tree

dataretrieval/waterdata/ratings.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,18 @@ def get_ratings(
184184
fid = feature["id"]
185185
try:
186186
out[fid] = _download_and_parse(feature, file_path, ssl_check)
187-
except (httpx.HTTPError, ValueError, OSError) as e:
187+
# _download_and_parse can raise the module's typed errors via
188+
# _raise_for_non_200 (RateLimited / ServiceUnavailable / RuntimeError —
189+
# all RuntimeError subclasses), and a feature missing its data asset
190+
# raises LookupError. Catch those too so one bad feature is logged and
191+
# skipped rather than aborting the whole multi-site batch.
192+
except (
193+
httpx.HTTPError,
194+
RuntimeError,
195+
ValueError,
196+
OSError,
197+
LookupError,
198+
) as e:
188199
logger.warning("Failed to download / parse %s: %s", fid, e)
189200

190201
return out
@@ -229,24 +240,42 @@ def _search(
229240
limit: int,
230241
ssl_check: bool,
231242
) -> list[dict[str, Any]]:
232-
"""Run a single STAC ``/search`` request and return its features."""
233-
params: dict[str, Any] = {"limit": limit}
243+
"""Run STAC ``/search`` and return ALL matching features.
244+
245+
``limit`` is the page size (clamped to the service maximum of 10,000); the
246+
STAC ``next`` link is followed until exhausted so a result set larger than
247+
one page isn't silently truncated.
248+
"""
249+
params: dict[str, Any] | None = {"limit": min(limit, 10000)}
234250
if filter_str is not None:
235251
params["filter"] = filter_str
236252
if time_str is not None:
237253
params["datetime"] = time_str
238254
if bbox is not None:
239255
params["bbox"] = ",".join(map(str, bbox))
240256

241-
response = httpx.get(
242-
f"{STAC_URL}/search",
243-
params=params,
244-
headers=_default_headers(),
245-
verify=ssl_check,
246-
**HTTPX_DEFAULTS,
247-
)
248-
_raise_for_non_200(response)
249-
return response.json().get("features", [])
257+
url: str | None = f"{STAC_URL}/search"
258+
features: list[dict[str, Any]] = []
259+
while url is not None:
260+
response = httpx.get(
261+
url,
262+
params=params,
263+
headers=_default_headers(),
264+
verify=ssl_check,
265+
**HTTPX_DEFAULTS,
266+
)
267+
_raise_for_non_200(response)
268+
body = response.json()
269+
features.extend(body.get("features", []))
270+
# The STAC ``next`` link is a fully-formed GET href carrying the
271+
# limit/filter/bbox and a continuation token, so follow it verbatim
272+
# (dropping our own params) until the server stops emitting one.
273+
url = next(
274+
(lnk["href"] for lnk in body.get("links", []) if lnk.get("rel") == "next"),
275+
None,
276+
)
277+
params = None
278+
return features
250279

251280

252281
def _download_and_parse(

0 commit comments

Comments
 (0)