refactor(errors): unify request errors under a DataRetrievalError taxonomy

thodson-usgs · claude · thodson-usgs · commit 9801177a088c · 2026-06-01T13:05:23.000-04:00
An HTTP failure used to surface as a different exception type depending on which
module made the request:

  - legacy query() (wqp, nwis, ngwmn, nldi): ValueError, or NoSitesError(Exception)
  - waterdata: RateLimited / ServiceUnavailable (RuntimeError),
    RequestTooLarge (ValueError), ChunkInterrupted (RuntimeError)
  - nadp / streamstats: bare httpx.HTTPStatusError

so no single `except` clause could catch "any dataretrieval request failure."

Add a `DataRetrievalError` base and root the existing exceptions on it, so a
caller can `except dataretrieval.DataRetrievalError` for any request failure.
Each subclass also keeps the built-in it has historically raised (BadRequestError
is a ValueError; RateLimited is a RuntimeError), so existing `except ValueError` /
`except RuntimeError` handlers keep working unchanged.

The taxonomy lives in a new, dependency-free `dataretrieval/exceptions.py` — the
single home for it (cf. requests.exceptions, botocore.exceptions) — rather than
buried in the pandas/httpx-heavy utils.py:

  - exceptions.py: DataRetrievalError + the legacy query() types (BadRequestError,
    NotFoundError, RequestTooLargeError, ServiceUnavailableError; all ValueError)
    + NoSitesError + the Water Data transport types (RateLimited, ServiceUnavailable,
    RequestTooLarge). Explicit __all__, re-exported from dataretrieval/__init__.py
    so the top-level export is intentional, not an accident of `import *`.
  - utils.py keeps the behavior: query() and a consolidated _raise_for_status()
    status-&gt;exception mapper; it imports the types it raises.
  - chunking.py keeps the chunker-specific resumable types (ChunkInterrupted and
    its subclasses, which carry a ChunkedCall resume handle) and imports the
    transport types from exceptions.

The old import paths still resolve (utils.NoSitesError,
waterdata.chunking.RateLimited, ...) via re-import, so nothing downstream breaks.

Out of scope (follow-ups): nadp/streamstats still raise httpx.HTTPStatusError;
nldi's manual non-200 ValueError isn't rooted; waterdata.utils._raise_for_non_200's
catch-all for non-retryable 4xx stays a bare RuntimeError (a deliberate
fatal/non-resumable signal the chunker relies on).

Co-Authored-By: Claude Opus 4.8 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/dataretrieval/__init__.py b/dataretrieval/__init__.py
@@ -5,6 +5,7 @@
 except PackageNotFoundError:
     __version__ = "version-unknown"
 
+from dataretrieval.exceptions import *
 from dataretrieval.nadp import *
 from dataretrieval.nwis import *
 from dataretrieval.samples import *
diff --git a/dataretrieval/exceptions.py b/dataretrieval/exceptions.py
@@ -0,0 +1,142 @@
+"""Exception taxonomy for ``dataretrieval``.
+
+A failed request from any service module (``nwis``, ``wqp``, ``waterdata``,
+``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can
+handle any request failure with a single ``except dataretrieval.DataRetrievalError``.
+
+This module deliberately has no third-party dependencies, so any module can
+import it without pulling in pandas/httpx.
+"""
+
+from __future__ import annotations
+
+__all__ = [
+    "DataRetrievalError",
+    "BadRequestError",
+    "NotFoundError",
+    "RequestTooLargeError",
+    "ServiceUnavailableError",
+    "NoSitesError",
+    "RateLimited",
+    "ServiceUnavailable",
+    "RequestTooLarge",
+]
+
+
+class DataRetrievalError(Exception):
+    """Base class for errors raised when a request to a USGS or EPA web
+    service fails.
+
+    Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...)
+    raises a subclass of this when a request fails, so a caller can handle any
+    request failure uniformly::
+
+        try:
+            df, md = dataretrieval.wqp.get_results(...)
+        except dataretrieval.DataRetrievalError:
+            ...
+
+    Subclasses also inherit from the built-in exception this package has
+    historically raised for the same condition (e.g. :class:`BadRequestError`
+    is also a :class:`ValueError`, :class:`RateLimited` is also a
+    :class:`RuntimeError`), so existing ``except ValueError`` / ``except
+    RuntimeError`` handlers keep working unchanged.
+    """
+
+
+# Legacy ``query()`` path: HTTP status families mapped to ValueError-compatible
+# types (the type that path has always raised).
+class BadRequestError(DataRetrievalError, ValueError):
+    """The service rejected the request parameters (HTTP 400)."""
+
+
+class NotFoundError(DataRetrievalError, ValueError):
+    """The requested resource was not found; often an empty query (HTTP 404)."""
+
+
+class RequestTooLargeError(DataRetrievalError, ValueError):
+    """The request URL was too long for the service (HTTP 414, or rejected
+    client-side before it was sent)."""
+
+
+class ServiceUnavailableError(DataRetrievalError, ValueError):
+    """The service is down or returned a server error (HTTP 5xx)."""
+
+
+class NoSitesError(DataRetrievalError):
+    """The selection criteria matched no sites/data."""
+
+    def __init__(self, url):
+        self.url = url
+
+    def __str__(self):
+        return (
+            "No sites/data found using the selection criteria specified in "
+            f"url: {self.url}"
+        )
+
+
+# Water Data API transport errors: retryable HTTP status families, surfaced as
+# RuntimeError-compatible types the chunker detects via ``isinstance`` and wraps
+# as resumable interruptions.
+class _RetryableTransportError(DataRetrievalError, RuntimeError):
+    """
+    Base for typed HTTP transport failures the chunker recognizes as
+    transient.
+
+    Raised by :func:`dataretrieval.waterdata.utils._raise_for_non_200`
+    and walked by :func:`dataretrieval.waterdata.chunking._classify_chunk_error`.
+    One subclass per recoverable HTTP status family (429 → :class:`RateLimited`,
+    5xx → :class:`ServiceUnavailable`); ``ChunkedCall`` wraps them as resumable
+    :class:`~dataretrieval.waterdata.chunking.ChunkInterrupted` subclasses.
+
+    Parameters
+    ----------
+    message : str
+        Human-readable error message.
+    retry_after : float, optional
+        Seconds to wait before retrying, parsed from the
+        ``Retry-After`` response header.
+
+    Attributes
+    ----------
+    retry_after : float or None
+        Seconds to wait before retrying, parsed from the
+        ``Retry-After`` response header. ``None`` when the header was
+        absent or unparseable.
+    """
+
+    def __init__(self, message: str, *, retry_after: float | None = None) -> None:
+        super().__init__(message)
+        self.retry_after = retry_after
+
+
+class RateLimited(_RetryableTransportError):
+    """
+    A USGS Water Data API request was rejected with HTTP 429.
+
+    Exposed as a typed exception so callers (notably the multi-value
+    chunker) can detect rate-limit failures via ``isinstance`` instead
+    of string-matching error messages.
+    """
+
+
+class ServiceUnavailable(_RetryableTransportError):
+    """
+    A USGS Water Data API request was rejected with HTTP 5xx.
+
+    Surfaced as a typed exception (parallel to :class:`RateLimited`)
+    so ``ChunkedCall`` can treat transient server failures as
+    resumable interruptions rather than fatal programmer errors.
+    """
+
+
+class RequestTooLarge(DataRetrievalError, ValueError):
+    """
+    No chunking plan fits the URL byte limit.
+
+    Raised when even the smallest reducible plan (every list axis at
+    singleton chunks and the filter at one clause per sub-request)
+    still exceeds the server's byte limit. Shrink the input lists,
+    simplify the filter, or split the call manually.
+    """
diff --git a/dataretrieval/utils.py b/dataretrieval/utils.py
@@ -10,6 +10,13 @@
 
 import dataretrieval
 from dataretrieval.codes import tz
+from dataretrieval.exceptions import (
+    BadRequestError,
+    NoSitesError,
+    NotFoundError,
+    RequestTooLargeError,
+    ServiceUnavailableError,
+)
 
 HTTPX_DEFAULTS = {
     "follow_redirects": True,
@@ -270,14 +277,42 @@ def __repr__(self) -> str:
                         data_list.append(data)  # append results to list"""
 
 
-def _url_too_long_error(detail: str) -> ValueError:
-    return ValueError(
+def _url_too_long_error(detail: str) -> RequestTooLargeError:
+    return RequestTooLargeError(
         "Request URL too long. Modify your query to use fewer sites. "
         f"{detail}. Pseudo-code example of how to split your query: "
         f"\n {_URL_TOO_LONG_EXAMPLE}"
     )
 
 
+def _raise_for_status(response: httpx.Response) -> None:
+    """Raise a typed :class:`DataRetrievalError` for an unsuccessful response.
+
+    Centralizes the HTTP-status-to-exception mapping for the shared
+    :func:`query` path so every legacy service module (``wqp``, ``nwis``,
+    ``ngwmn``, ``nldi``) surfaces request failures the same way. A successful
+    response returns ``None``. The raised types are also :class:`ValueError`
+    subclasses, preserving this module's historical contract.
+    """
+    status = response.status_code
+    if status == 400:
+        raise BadRequestError(
+            f"Bad Request, check that your parameters are correct. URL: {response.url}"
+        )
+    elif status == 404:
+        raise NotFoundError(
+            "Page Not Found Error. May be the result of an empty query. "
+            f"URL: {response.url}"
+        )
+    elif status == 414:
+        raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
+    elif 500 <= status < 600:
+        raise ServiceUnavailableError(
+            f"Service Unavailable: {status} {response.reason_phrase}. "
+            f"The service at {response.url} may be down or experiencing issues."
+        )
+
+
 def query(url, payload, delimiter=",", ssl_check=True):
     """Send a query.
 
@@ -321,37 +356,9 @@ def query(url, payload, delimiter=",", ssl_check=True):
     except httpx.InvalidURL as exc:
         raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc
 
-    if response.status_code == 400:
-        raise ValueError(
-            f"Bad Request, check that your parameters are correct. URL: {response.url}"
-        )
-    elif response.status_code == 404:
-        raise ValueError(
-            "Page Not Found Error. May be the result of an empty query. "
-            + f"URL: {response.url}"
-        )
-    elif response.status_code == 414:
-        raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
-    elif 500 <= response.status_code < 600:
-        raise ValueError(
-            f"Service Unavailable: {response.status_code} {response.reason_phrase}. "
-            + f"The service at {response.url} may be down or experiencing issues."
-        )
+    _raise_for_status(response)
 
     if response.text.startswith("No sites/data"):
         raise NoSitesError(response.url)
 
     return response
-
-
-class NoSitesError(Exception):
-    """Custom error class used when selection criteria returns no sites/data."""
-
-    def __init__(self, url):
-        self.url = url
-
-    def __str__(self):
-        return (
-            "No sites/data found using the selection criteria specified in "
-            f"url: {self.url}"
-        )
diff --git a/dataretrieval/waterdata/chunking.py b/dataretrieval/waterdata/chunking.py
@@ -66,6 +66,12 @@
 import pandas as pd
 from anyio.from_thread import start_blocking_portal
 
+from dataretrieval.exceptions import (
+    DataRetrievalError,
+    RateLimited,
+    RequestTooLarge,
+    ServiceUnavailable,
+)
 from dataretrieval.utils import HTTPX_DEFAULTS
 
 from . import _progress
@@ -383,70 +389,7 @@ def _passthrough_result(
     return frame, response
 
 
-class _RetryableTransportError(RuntimeError):
-    """
-    Base for typed HTTP transport failures the chunker recognizes as
-    transient.
-
-    Raised by :func:`dataretrieval.waterdata.utils._raise_for_non_200`
-    and walked by :func:`_classify_chunk_error`. One subclass per
-    recoverable HTTP status family (429 → :class:`RateLimited`,
-    5xx → :class:`ServiceUnavailable`); ``ChunkedCall`` wraps them as
-    resumable :class:`ChunkInterrupted` subclasses.
-
-    Parameters
-    ----------
-    message : str
-        Human-readable error message.
-    retry_after : float, optional
-        Seconds to wait before retrying, parsed from the
-        ``Retry-After`` response header.
-
-    Attributes
-    ----------
-    retry_after : float or None
-        Seconds to wait before retrying, parsed from the
-        ``Retry-After`` response header. ``None`` when the header was
-        absent or unparseable.
-    """
-
-    def __init__(self, message: str, *, retry_after: float | None = None) -> None:
-        super().__init__(message)
-        self.retry_after = retry_after
-
-
-class RateLimited(_RetryableTransportError):
-    """
-    A USGS Water Data API request was rejected with HTTP 429.
-
-    Exposed as a typed exception so callers (notably the multi-value
-    chunker) can detect rate-limit failures via ``isinstance`` instead
-    of string-matching error messages.
-    """
-
-
-class ServiceUnavailable(_RetryableTransportError):
-    """
-    A USGS Water Data API request was rejected with HTTP 5xx.
-
-    Surfaced as a typed exception (parallel to :class:`RateLimited`)
-    so ``ChunkedCall`` can treat transient server failures as
-    resumable interruptions rather than fatal programmer errors.
-    """
-
-
-class RequestTooLarge(ValueError):
-    """
-    No chunking plan fits the URL byte limit.
-
-    Raised when even the smallest reducible plan (every list axis at
-    singleton chunks and the filter at one clause per sub-request)
-    still exceeds the server's byte limit. Shrink the input lists,
-    simplify the filter, or split the call manually.
-    """
-
-
-class ChunkInterrupted(RuntimeError):
+class ChunkInterrupted(DataRetrievalError, RuntimeError):
     """
     Base class for mid-stream chunk failures whose completed work is
     preserved and resumable.
diff --git a/dataretrieval/waterdata/utils.py b/dataretrieval/waterdata/utils.py
@@ -26,12 +26,11 @@
 from anyio.from_thread import start_blocking_portal
 
 from dataretrieval import __version__
+from dataretrieval.exceptions import RateLimited, ServiceUnavailable
 from dataretrieval.utils import HTTPX_DEFAULTS, BaseMetadata
 from dataretrieval.waterdata import _progress, chunking
 from dataretrieval.waterdata.chunking import (
     _QUOTA_HEADER,
-    RateLimited,
-    ServiceUnavailable,
     _safe_elapsed,
     get_active_client,
 )
diff --git a/tests/utils_test.py b/tests/utils_test.py