Skip to content

Commit 2afd2d7

Browse files
thodson-usgsclaude
andcommitted
refactor(errors): introduce DataRetrievalError as a unified request-error root
An HTTP failure surfaced as a different exception type depending on which module made the request: - legacy query() (wqp, nwis, ngwmn, nldi): ValueError, or NoSitesError(Exception) - waterdata: RateLimited / ServiceUnavailable (RuntimeError), RequestTooLarge (ValueError), ChunkInterrupted (RuntimeError) - nadp / streamstats: bare httpx.HTTPStatusError so no single `except` clause could catch "any dataretrieval request failure." Add a shared base class, dataretrieval.utils.DataRetrievalError, and root the existing exceptions on it. Unification is at the root only -- each exception keeps the built-in type it has always raised, so the change is backward compatible: - New typed errors for the legacy query() path -- BadRequestError (400), NotFoundError (404), RequestTooLargeError (414), ServiceUnavailableError (5xx) -- each subclassing (DataRetrievalError, ValueError). Messages and the set of statuses query() raises on are unchanged, and every type is still a ValueError, so existing `except ValueError` handlers keep working. - Consolidate query()'s inline status-code ladder into a reusable _raise_for_status() helper. - NoSitesError now subclasses DataRetrievalError (was Exception). - waterdata's RateLimited / ServiceUnavailable / RequestTooLarge / ChunkInterrupted gain DataRetrievalError as an additional base (additive -- they remain RuntimeError / ValueError, so the chunker's isinstance classification and all existing handlers are unaffected). Callers can now write `except dataretrieval.DataRetrievalError` to handle a failure from any module while keeping the historical built-in types. Out of scope (follow-ups): nadp/streamstats still raise httpx.HTTPStatusError; nldi's manual non-200 ValueError and nwis._parse_json_or_raise's HTML-detection ValueError are not yet rooted; waterdata's _raise_for_non_200 catch-all for non-retryable 4xx stays a bare RuntimeError (load-bearing for the chunker's fatal-vs-resumable classifier). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 473a07c commit 2afd2d7

3 files changed

Lines changed: 141 additions & 23 deletions

File tree

dataretrieval/utils.py

Lines changed: 69 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -270,14 +270,79 @@ def __repr__(self) -> str:
270270
data_list.append(data) # append results to list"""
271271

272272

273-
def _url_too_long_error(detail: str) -> ValueError:
274-
return ValueError(
273+
class DataRetrievalError(Exception):
274+
"""Base class for errors raised when a request to a USGS or EPA web
275+
service fails.
276+
277+
Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...)
278+
raises a subclass of this when a request fails, so a caller can handle any
279+
request failure uniformly::
280+
281+
try:
282+
df, md = dataretrieval.wqp.get_results(...)
283+
except dataretrieval.DataRetrievalError:
284+
...
285+
286+
Subclasses also inherit from the built-in exception this package has
287+
historically raised for the same condition (e.g. :class:`BadRequestError`
288+
is also a :class:`ValueError`), so existing ``except ValueError`` handlers
289+
keep working unchanged.
290+
"""
291+
292+
293+
class BadRequestError(DataRetrievalError, ValueError):
294+
"""The service rejected the request parameters (HTTP 400)."""
295+
296+
297+
class NotFoundError(DataRetrievalError, ValueError):
298+
"""The requested resource was not found; often an empty query (HTTP 404)."""
299+
300+
301+
class RequestTooLargeError(DataRetrievalError, ValueError):
302+
"""The request URL was too long for the service (HTTP 414, or rejected
303+
client-side before it was sent)."""
304+
305+
306+
class ServiceUnavailableError(DataRetrievalError, ValueError):
307+
"""The service is down or returned a server error (HTTP 5xx)."""
308+
309+
310+
def _url_too_long_error(detail: str) -> RequestTooLargeError:
311+
return RequestTooLargeError(
275312
"Request URL too long. Modify your query to use fewer sites. "
276313
f"{detail}. Pseudo-code example of how to split your query: "
277314
f"\n {_URL_TOO_LONG_EXAMPLE}"
278315
)
279316

280317

318+
def _raise_for_status(response: httpx.Response) -> None:
319+
"""Raise a typed :class:`DataRetrievalError` for an unsuccessful response.
320+
321+
Centralizes the HTTP-status-to-exception mapping for the shared
322+
:func:`query` path so every legacy service module (``wqp``, ``nwis``,
323+
``ngwmn``, ``nldi``) surfaces request failures the same way. A successful
324+
response returns ``None``. The raised types are also :class:`ValueError`
325+
subclasses, preserving this module's historical contract.
326+
"""
327+
status = response.status_code
328+
if status == 400:
329+
raise BadRequestError(
330+
f"Bad Request, check that your parameters are correct. URL: {response.url}"
331+
)
332+
elif status == 404:
333+
raise NotFoundError(
334+
"Page Not Found Error. May be the result of an empty query. "
335+
+ f"URL: {response.url}"
336+
)
337+
elif status == 414:
338+
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
339+
elif 500 <= status < 600:
340+
raise ServiceUnavailableError(
341+
f"Service Unavailable: {status} {response.reason_phrase}. "
342+
+ f"The service at {response.url} may be down or experiencing issues."
343+
)
344+
345+
281346
def query(url, payload, delimiter=",", ssl_check=True):
282347
"""Send a query.
283348
@@ -321,30 +386,15 @@ def query(url, payload, delimiter=",", ssl_check=True):
321386
except httpx.InvalidURL as exc:
322387
raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc
323388

324-
if response.status_code == 400:
325-
raise ValueError(
326-
f"Bad Request, check that your parameters are correct. URL: {response.url}"
327-
)
328-
elif response.status_code == 404:
329-
raise ValueError(
330-
"Page Not Found Error. May be the result of an empty query. "
331-
+ f"URL: {response.url}"
332-
)
333-
elif response.status_code == 414:
334-
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
335-
elif 500 <= response.status_code < 600:
336-
raise ValueError(
337-
f"Service Unavailable: {response.status_code} {response.reason_phrase}. "
338-
+ f"The service at {response.url} may be down or experiencing issues."
339-
)
389+
_raise_for_status(response)
340390

341391
if response.text.startswith("No sites/data"):
342392
raise NoSitesError(response.url)
343393

344394
return response
345395

346396

347-
class NoSitesError(Exception):
397+
class NoSitesError(DataRetrievalError):
348398
"""Custom error class used when selection criteria returns no sites/data."""
349399

350400
def __init__(self, url):

dataretrieval/waterdata/chunking.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
import pandas as pd
6767
from anyio.from_thread import start_blocking_portal
6868

69-
from dataretrieval.utils import HTTPX_DEFAULTS
69+
from dataretrieval.utils import HTTPX_DEFAULTS, DataRetrievalError
7070

7171
from . import _progress
7272
from .filters import (
@@ -383,7 +383,7 @@ def _passthrough_result(
383383
return frame, response
384384

385385

386-
class _RetryableTransportError(RuntimeError):
386+
class _RetryableTransportError(DataRetrievalError, RuntimeError):
387387
"""
388388
Base for typed HTTP transport failures the chunker recognizes as
389389
transient.
@@ -435,7 +435,7 @@ class ServiceUnavailable(_RetryableTransportError):
435435
"""
436436

437437

438-
class RequestTooLarge(ValueError):
438+
class RequestTooLarge(DataRetrievalError, ValueError):
439439
"""
440440
No chunking plan fits the URL byte limit.
441441
@@ -446,7 +446,7 @@ class RequestTooLarge(ValueError):
446446
"""
447447

448448

449-
class ChunkInterrupted(RuntimeError):
449+
class ChunkInterrupted(DataRetrievalError, RuntimeError):
450450
"""
451451
Base class for mid-stream chunk failures whose completed work is
452452
preserved and resumable.

tests/utils_test.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,74 @@ def test_header(self):
4242
assert "user-agent" in response.request.headers
4343

4444

45+
class Test_error_taxonomy:
46+
"""The unified request-error hierarchy.
47+
48+
Every module's request failures are catchable as ``DataRetrievalError``,
49+
while remaining backward-compatible with the built-in type each path
50+
historically raised (``ValueError`` for the legacy ``query`` path,
51+
``RuntimeError`` for the waterdata retryable types).
52+
"""
53+
54+
@pytest.mark.parametrize(
55+
"status, exc_name, match",
56+
[
57+
(400, "BadRequestError", "Bad Request"),
58+
(404, "NotFoundError", "Page Not Found"),
59+
(414, "RequestTooLargeError", "Request URL too long"),
60+
(503, "ServiceUnavailableError", "Service Unavailable: 503"),
61+
],
62+
)
63+
def test_query_maps_status_to_typed_error(
64+
self, httpx_mock, status, exc_name, match
65+
):
66+
"""``query`` maps each HTTP status family to a typed error that is both
67+
a ``DataRetrievalError`` (new, unified) and a ``ValueError`` (the type
68+
this path has always raised), with the message preserved."""
69+
exc_cls = getattr(utils, exc_name)
70+
url = "https://example.invalid/x"
71+
httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=status)
72+
with pytest.raises(exc_cls, match=match) as excinfo:
73+
utils.query(url, {"a": "1"})
74+
assert isinstance(excinfo.value, utils.DataRetrievalError)
75+
assert isinstance(excinfo.value, ValueError) # backward compatibility
76+
77+
def test_query_failure_catchable_as_base(self, httpx_mock):
78+
"""A bare ``except DataRetrievalError`` catches a legacy query failure."""
79+
url = "https://example.invalid/y"
80+
httpx_mock.add_response(method="GET", url=f"{url}?a=1", status_code=400)
81+
with pytest.raises(utils.DataRetrievalError):
82+
utils.query(url, {"a": "1"})
83+
84+
def test_no_sites_error_is_data_retrieval_error(self):
85+
"""``NoSitesError`` joins the root (was a bare ``Exception``)."""
86+
assert issubclass(utils.NoSitesError, utils.DataRetrievalError)
87+
assert not issubclass(utils.NoSitesError, ValueError) # unchanged
88+
89+
def test_waterdata_exceptions_share_the_root(self):
90+
"""waterdata's typed exceptions are ``DataRetrievalError`` too, so one
91+
``except`` clause spans the legacy and waterdata subsystems — while
92+
keeping their historical ``RuntimeError``/``ValueError`` bases."""
93+
from dataretrieval.waterdata.chunking import (
94+
ChunkInterrupted,
95+
RateLimited,
96+
RequestTooLarge,
97+
ServiceUnavailable,
98+
)
99+
100+
for cls in (RateLimited, ServiceUnavailable, RequestTooLarge, ChunkInterrupted):
101+
assert issubclass(cls, utils.DataRetrievalError)
102+
assert issubclass(RateLimited, RuntimeError)
103+
assert issubclass(ServiceUnavailable, RuntimeError)
104+
assert issubclass(RequestTooLarge, ValueError)
105+
106+
def test_base_exported_at_top_level(self):
107+
"""Users can write ``except dataretrieval.DataRetrievalError``."""
108+
import dataretrieval
109+
110+
assert dataretrieval.DataRetrievalError is utils.DataRetrievalError
111+
112+
45113
class Test_BaseMetadata:
46114
"""Tests of BaseMetadata"""
47115

0 commit comments

Comments
 (0)