Skip to content

Commit ecf2833

Browse files
thodson-usgsclaude
andauthored
refactor(errors)!: unify request failures under a DataRetrievalError taxonomy (#313)
Before, an HTTP failure surfaced as a different exception type depending on which module made the request -- a ValueError (or bare Exception) on the legacy query() path, RuntimeError-based types on the waterdata path, a bare httpx.HTTPStatusError elsewhere -- so there was no single `except` for "any dataretrieval request failure". Introduce dataretrieval/exceptions.py (dependency-free, re-exported at top level as dataretrieval.<Name>), rooted at DataRetrievalError, with two intermediate bases that name the axes a caller reasons about: DataRetrievalError(Exception) |- BadRequestError(.., ValueError) # 400 |- NotFoundError(.., ValueError) # 404 |- RequestTooLarge(.., ValueError) # base: request too large to satisfy | |- URLTooLong # 414 / client-side URL reject | '- Unchunkable # chunker planner floor |- NoSitesError # empty result '- TransientError(.., RuntimeError) # base: retryable; carries retry_after |- RateLimited # 429 '- ServiceUnavailable # 5xx (both paths) - One type per condition, raised by both the legacy query() path and the Water Data chunker. Callers can catch a whole family (`except RequestTooLarge` / `except TransientError`); the chunker's retry check is a single isinstance(exc, TransientError). - query()'s inline status ladder is extracted into a reusable _raise_for_status(). - NoSitesError now subclasses DataRetrievalError (was Exception). - Built-in compatibility by kind: fatal client errors are also ValueError, transient transport errors also RuntimeError, so existing `except ValueError` / `except RuntimeError` handlers keep working. BREAKING CHANGES - The legacy query() path raises typed errors instead of ad-hoc ValueErrors (400 -> BadRequestError, 404 -> NotFoundError, 414/over-long URL -> URLTooLong). - A 5xx on the legacy query() path now raises ServiceUnavailable, a RuntimeError (was a ValueError): a transient server failure is a runtime condition, not a bad value. - The Water Data chunker's planner-floor error is Unchunkable (a RequestTooLarge subclass). - Import the transport types/bases from dataretrieval / dataretrieval.exceptions, not from dataretrieval.waterdata.chunking. Verified: 477 passed / 2 skipped, ruff clean; live API spot checks (404/400/ over-long URL raise the typed errors, 200 unaffected); all 21 example notebooks execute end-to-end against the live API (227/227 cells, 0 errors). Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent b06fbdd commit ecf2833

8 files changed

Lines changed: 338 additions & 125 deletions

File tree

dataretrieval/__init__.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
1717
``nldi`` requires geopandas (``pip install dataretrieval[nldi]``) and is
1818
imported on demand: ``from dataretrieval import nldi``.
19+
20+
Every request failure raises a subclass of :class:`dataretrieval.DataRetrievalError`;
21+
the taxonomy lives in ``dataretrieval.exceptions``.
1922
"""
2023

2124
from importlib.metadata import PackageNotFoundError, version
@@ -25,7 +28,21 @@
2528
except PackageNotFoundError:
2629
__version__ = "version-unknown"
2730

31+
from dataretrieval.exceptions import (
32+
BadRequestError,
33+
DataRetrievalError,
34+
NoSitesError,
35+
NotFoundError,
36+
RateLimited,
37+
RequestTooLarge,
38+
ServiceUnavailable,
39+
TransientError,
40+
Unchunkable,
41+
URLTooLong,
42+
)
43+
2844
from . import (
45+
exceptions,
2946
nadp,
3047
nwis,
3148
samples,
@@ -36,12 +53,26 @@
3653
)
3754

3855
__all__ = [
56+
# service modules
3957
"nadp",
4058
"nwis",
4159
"samples",
4260
"streamstats",
4361
"utils",
4462
"waterdata",
4563
"wqp",
64+
# error taxonomy (canonical home: ``dataretrieval.exceptions``), re-exported
65+
# so callers can ``except dataretrieval.DataRetrievalError``
66+
"exceptions",
67+
"BadRequestError",
68+
"DataRetrievalError",
69+
"NoSitesError",
70+
"NotFoundError",
71+
"RateLimited",
72+
"RequestTooLarge",
73+
"ServiceUnavailable",
74+
"TransientError",
75+
"URLTooLong",
76+
"Unchunkable",
4677
"__version__",
4778
]

dataretrieval/exceptions.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
"""Exception taxonomy for ``dataretrieval``.
2+
3+
A failed request from any service module (``nwis``, ``wqp``, ``waterdata``,
4+
``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can
5+
handle any request failure with a single ``except dataretrieval.DataRetrievalError``.
6+
7+
The tree has two intermediate bases a caller can catch to span a whole family:
8+
:class:`RequestTooLarge` (the request can't fit, however it was issued) and
9+
:class:`TransientError` (a temporary failure worth retrying).
10+
11+
This module deliberately has no third-party dependencies, so any module can
12+
import it without pulling in pandas/httpx.
13+
"""
14+
15+
from __future__ import annotations
16+
17+
from typing import TYPE_CHECKING
18+
19+
if TYPE_CHECKING:
20+
import httpx
21+
22+
__all__ = [
23+
"DataRetrievalError",
24+
"BadRequestError",
25+
"NotFoundError",
26+
"RequestTooLarge",
27+
"URLTooLong",
28+
"Unchunkable",
29+
"NoSitesError",
30+
"TransientError",
31+
"RateLimited",
32+
"ServiceUnavailable",
33+
]
34+
35+
36+
class DataRetrievalError(Exception):
37+
"""Base class for errors raised when a request to a USGS or EPA web
38+
service fails.
39+
40+
Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...)
41+
raises a subclass of this when a request fails, so a caller can handle any
42+
request failure uniformly::
43+
44+
try:
45+
df, md = dataretrieval.wqp.get_results(...)
46+
except dataretrieval.DataRetrievalError:
47+
...
48+
49+
Subclasses also inherit from the built-in exception this package has
50+
historically raised for the condition's *kind* -- :class:`ValueError` for a
51+
request that can't succeed as written (bad params, too large), and
52+
:class:`RuntimeError` for a transient transport failure -- so existing
53+
``except ValueError`` / ``except RuntimeError`` handlers keep working.
54+
"""
55+
56+
57+
# --- Fatal client errors -------------------------------------------------
58+
# The request can't succeed as written; retrying it unchanged won't help. Each
59+
# is also a ``ValueError`` -- the built-in the legacy ``query`` path has always
60+
# raised -- so existing ``except ValueError`` handlers keep working.
61+
62+
63+
class BadRequestError(DataRetrievalError, ValueError):
64+
"""The service rejected the request parameters (HTTP 400)."""
65+
66+
67+
class NotFoundError(DataRetrievalError, ValueError):
68+
"""The requested resource was not found; often an empty query (HTTP 404)."""
69+
70+
71+
class RequestTooLarge(DataRetrievalError, ValueError):
72+
"""The request is too large for the service to satisfy.
73+
74+
A base for the two ways a request can exceed what the service accepts;
75+
catch it to handle either. The concrete subclasses are :class:`URLTooLong`
76+
(a single request the server rejected) and :class:`Unchunkable` (the Water
77+
Data chunker could not split the call small enough to fit).
78+
"""
79+
80+
81+
class URLTooLong(RequestTooLarge):
82+
"""A single request URL exceeded the service's limit (HTTP 414, or rejected
83+
client-side before it was sent).
84+
85+
Raised by the legacy ``query`` path, which issues one request without
86+
chunking. Remediation: query fewer sites, or split the call manually.
87+
"""
88+
89+
90+
class Unchunkable(RequestTooLarge):
91+
"""No chunking plan fits the URL byte limit.
92+
93+
Raised by the Water Data chunker when even the smallest reducible plan
94+
(every list axis at one atom per sub-request, the filter at one clause per
95+
sub-request) still exceeds the server's byte limit -- so unlike
96+
:class:`URLTooLong`, automatic splitting has already been tried and
97+
exhausted. Shrink the input lists, simplify the filter, or split the call
98+
manually.
99+
"""
100+
101+
102+
class NoSitesError(DataRetrievalError):
103+
"""The selection criteria matched no sites/data."""
104+
105+
def __init__(self, url: httpx.URL) -> None:
106+
self.url = url
107+
108+
def __str__(self) -> str:
109+
return (
110+
"No sites/data found using the selection criteria specified in "
111+
f"url: {self.url}"
112+
)
113+
114+
115+
# --- Transient transport errors ------------------------------------------
116+
# The service was reachable but temporarily refused the request; the same call
117+
# may succeed if retried. Each is also a ``RuntimeError`` (the built-in the
118+
# waterdata path has always raised). The Water Data chunker recognizes them via
119+
# ``isinstance(exc, TransientError)`` and wraps them as resumable
120+
# ``ChunkInterrupted`` subclasses.
121+
122+
123+
class TransientError(DataRetrievalError, RuntimeError):
124+
"""Base for transient HTTP failures that are worth an automatic retry.
125+
126+
One subclass per recoverable HTTP status family (429 -> :class:`RateLimited`,
127+
5xx -> :class:`ServiceUnavailable`); the Water Data chunker recognizes them
128+
by this shared base and wraps them as resumable interruptions.
129+
130+
Parameters
131+
----------
132+
message : str
133+
Human-readable error message.
134+
retry_after : float, optional
135+
Seconds to wait before retrying, parsed from the ``Retry-After``
136+
response header; stored on the :attr:`retry_after` attribute (``None``
137+
when the header is absent or unparseable).
138+
"""
139+
140+
def __init__(self, message: str, *, retry_after: float | None = None) -> None:
141+
super().__init__(message)
142+
self.retry_after = retry_after
143+
144+
145+
class RateLimited(TransientError):
146+
"""A request was rejected with HTTP 429 (too many requests)."""
147+
148+
149+
class ServiceUnavailable(TransientError):
150+
"""A request was rejected with a server error (HTTP 5xx).
151+
152+
Raised by both the legacy ``query`` path and the Water Data path, so a 5xx
153+
surfaces as one type regardless of which subsystem issued the request.
154+
"""

dataretrieval/utils.py

Lines changed: 46 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@
1313

1414
import dataretrieval
1515
from dataretrieval.codes import tz
16+
from dataretrieval.exceptions import (
17+
BadRequestError,
18+
NoSitesError,
19+
NotFoundError,
20+
ServiceUnavailable,
21+
URLTooLong,
22+
)
1623

1724
# Typed as ``dict[str, Any]`` (not the inferred ``dict[str, object]``) so that
1825
# splatting it as ``**HTTPX_DEFAULTS`` into ``httpx.get`` / ``httpx.AsyncClient``
@@ -274,14 +281,42 @@ def __repr__(self) -> str:
274281
data_list.append(data) # append results to list"""
275282

276283

277-
def _url_too_long_error(detail: str) -> ValueError:
278-
return ValueError(
284+
def _url_too_long_error(detail: str) -> URLTooLong:
285+
return URLTooLong(
279286
"Request URL too long. Modify your query to use fewer sites. "
280287
f"{detail}. Pseudo-code example of how to split your query: "
281288
f"\n {_URL_TOO_LONG_EXAMPLE}"
282289
)
283290

284291

292+
def _raise_for_status(response: httpx.Response) -> None:
293+
"""Map an unsuccessful HTTP status to a typed :class:`DataRetrievalError`;
294+
return ``None`` on success.
295+
296+
Shared by the legacy :func:`query` path. The 4xx types stay
297+
:class:`ValueError`-compatible (this path's historical contract), but a 5xx
298+
raises the transient :class:`ServiceUnavailable` (a :class:`RuntimeError`),
299+
since a server failure is retryable rather than a bad request.
300+
"""
301+
status = response.status_code
302+
if status == 400:
303+
raise BadRequestError(
304+
f"Bad Request, check that your parameters are correct. URL: {response.url}"
305+
)
306+
elif status == 404:
307+
raise NotFoundError(
308+
"Page Not Found Error. May be the result of an empty query. "
309+
f"URL: {response.url}"
310+
)
311+
elif status == 414:
312+
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
313+
elif 500 <= status < 600:
314+
raise ServiceUnavailable(
315+
f"Service Unavailable: {status} {response.reason_phrase}. "
316+
f"The service at {response.url} may be down or experiencing issues."
317+
)
318+
319+
285320
def query(
286321
url: str,
287322
payload: dict[str, Any],
@@ -312,11 +347,14 @@ def query(
312347
313348
Raises
314349
------
315-
ValueError
316-
If the service returns a 400, 404, 414, or 5xx status code, or if
317-
``httpx`` rejects the URL client-side (e.g. it is too long).
318-
NoSitesError
319-
If the response indicates that no sites or data matched the query.
350+
DataRetrievalError
351+
On failure: :class:`~dataretrieval.exceptions.BadRequestError` (400),
352+
:class:`~dataretrieval.exceptions.NotFoundError` (404),
353+
:class:`~dataretrieval.exceptions.URLTooLong` (414 or a client-side
354+
over-long URL), :class:`~dataretrieval.exceptions.ServiceUnavailable`
355+
(5xx), or :class:`~dataretrieval.exceptions.NoSitesError` (no sites/data
356+
matched). The 4xx types are also :class:`ValueError`;
357+
``ServiceUnavailable`` is a :class:`RuntimeError`.
320358
"""
321359

322360
for key, value in payload.items():
@@ -338,37 +376,9 @@ def query(
338376
except httpx.InvalidURL as exc:
339377
raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc
340378

341-
if response.status_code == 400:
342-
raise ValueError(
343-
f"Bad Request, check that your parameters are correct. URL: {response.url}"
344-
)
345-
elif response.status_code == 404:
346-
raise ValueError(
347-
"Page Not Found Error. May be the result of an empty query. "
348-
+ f"URL: {response.url}"
349-
)
350-
elif response.status_code == 414:
351-
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
352-
elif 500 <= response.status_code < 600:
353-
raise ValueError(
354-
f"Service Unavailable: {response.status_code} {response.reason_phrase}. "
355-
+ f"The service at {response.url} may be down or experiencing issues."
356-
)
379+
_raise_for_status(response)
357380

358381
if response.text.startswith("No sites/data"):
359382
raise NoSitesError(response.url)
360383

361384
return response
362-
363-
364-
class NoSitesError(Exception):
365-
"""Custom error class used when selection criteria return no sites/data."""
366-
367-
def __init__(self, url: httpx.URL) -> None:
368-
self.url = url
369-
370-
def __str__(self) -> str:
371-
return (
372-
"No sites/data found using the selection criteria specified in "
373-
f"url: {self.url}"
374-
)

0 commit comments

Comments
 (0)