|
1 | 1 | """Exception taxonomy for ``dataretrieval``. |
2 | 2 |
|
3 | | -A failed request from any service module (``nwis``, ``wqp``, ``waterdata``, |
4 | | -``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can |
5 | | -handle any request failure with a single ``except dataretrieval.DataRetrievalError``. |
6 | | -
|
7 | | -The tree has two intermediate bases a caller can catch to span a whole family: |
8 | | -:class:`RequestTooLarge` (the request can't fit, however it was issued) and |
9 | | -:class:`TransientError` (a temporary failure worth retrying). |
10 | | -
|
11 | | -This module deliberately has no third-party dependencies, so any module can |
12 | | -import it without pulling in pandas/httpx. |
| 3 | +When a request gets an HTTP error response, the service modules (``nwis``, |
| 4 | +``wqp``, ``nldi``, ``waterdata``, ``nadp``, ``streamstats``) raise a subclass of |
| 5 | +:class:`DataRetrievalError`, so a caller can handle any of them with one |
| 6 | +``except dataretrieval.DataRetrievalError``. Connection-level failures (timeouts, |
| 7 | +DNS, refused connections) surface as ``httpx`` exceptions on the single-shot |
| 8 | +request paths. |
| 9 | +
|
| 10 | +A status error is an :class:`HTTPError` carrying ``.status_code`` (inspect it to |
| 11 | +branch on the specific code); :class:`TransientError` is the retryable subset |
| 12 | +(429 / 5xx). A few failures are not a plain status -- :class:`RequestTooLarge` |
| 13 | +(:class:`URLTooLong` / :class:`Unchunkable`) and :class:`NoDataError`. |
| 14 | +
|
| 15 | +This module imports only ``httpx`` (the package's core HTTP dependency, always |
| 16 | +installed) -- not pandas/geopandas -- so it stays cheap to import and free of |
| 17 | +import cycles. |
13 | 18 | """ |
14 | 19 |
|
15 | 20 | from __future__ import annotations |
16 | 21 |
|
17 | | -from typing import TYPE_CHECKING |
| 22 | +from typing import Any, ClassVar |
18 | 23 |
|
19 | | -if TYPE_CHECKING: |
20 | | - import httpx |
| 24 | +import httpx |
21 | 25 |
|
22 | 26 | __all__ = [ |
23 | 27 | "DataRetrievalError", |
24 | | - "BadRequestError", |
25 | | - "NotFoundError", |
26 | | - "RequestTooLarge", |
27 | | - "URLTooLong", |
28 | | - "Unchunkable", |
29 | | - "NoSitesError", |
| 28 | + "HTTPError", |
30 | 29 | "TransientError", |
31 | 30 | "RateLimited", |
32 | 31 | "ServiceUnavailable", |
| 32 | + "RequestTooLarge", |
| 33 | + "URLTooLong", |
| 34 | + "Unchunkable", |
| 35 | + "NoDataError", |
| 36 | + "NoSitesError", # deprecated alias for NoDataError |
| 37 | + "error_for_status", |
33 | 38 | ] |
34 | 39 |
|
35 | 40 |
|
36 | 41 | class DataRetrievalError(Exception): |
37 | | - """Base class for errors raised when a request to a USGS or EPA web |
| 42 | + """Base class for every error raised when a request to a USGS or EPA web |
38 | 43 | service fails. |
39 | 44 |
|
40 | | - Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...) |
41 | | - raises a subclass of this when a request fails, so a caller can handle any |
42 | | - request failure uniformly:: |
| 45 | + Service modules raise a subclass of this on a failed request, so a caller |
| 46 | + can handle them uniformly:: |
43 | 47 |
|
44 | 48 | try: |
45 | 49 | df, md = dataretrieval.wqp.get_results(...) |
46 | 50 | except dataretrieval.DataRetrievalError: |
47 | 51 | ... |
48 | 52 |
|
49 | | - Subclasses also inherit from the built-in exception this package has |
50 | | - historically raised for the condition's *kind* -- :class:`ValueError` for a |
51 | | - request that can't succeed as written (bad params, too large), and |
52 | | - :class:`RuntimeError` for a transient transport failure -- so existing |
53 | | - ``except ValueError`` / ``except RuntimeError`` handlers keep working. |
| 53 | + Connection-level failures (timeouts, DNS) still surface as ``httpx`` |
| 54 | + exceptions on the single-shot request paths. |
| 55 | + """ |
| 56 | + |
| 57 | + def __reduce__(self) -> tuple[Any, ...]: |
| 58 | + # The status subclasses declare keyword-only fields (status_code / |
| 59 | + # retry_after); the default ``BaseException.__reduce__`` rebuilds via |
| 60 | + # ``cls(*self.args)``, which drops them and raises TypeError on unpickle |
| 61 | + # / deepcopy. Reconstruct from args + ``__dict__`` instead so every |
| 62 | + # subclass round-trips -- these get pickled back from multiprocessing / |
| 63 | + # lithops workers. |
| 64 | + return (_rebuild_error, (self.__class__, self.args, self.__dict__)) |
| 65 | + |
| 66 | + |
| 67 | +def _rebuild_error( |
| 68 | + cls: type[DataRetrievalError], |
| 69 | + args: tuple[Any, ...], |
| 70 | + state: dict[str, Any], |
| 71 | +) -> DataRetrievalError: |
| 72 | + """Rebuild a :class:`DataRetrievalError` without calling ``__init__``. |
| 73 | +
|
| 74 | + See :meth:`DataRetrievalError.__reduce__`. |
| 75 | + """ |
| 76 | + err = cls.__new__(cls) |
| 77 | + err.args = args |
| 78 | + err.__dict__.update(state) |
| 79 | + return err |
| 80 | + |
| 81 | + |
| 82 | +# --- HTTP status errors -------------------------------------------------- |
| 83 | + |
| 84 | + |
| 85 | +class HTTPError(DataRetrievalError): |
| 86 | + """The service returned an error HTTP status. |
| 87 | +
|
| 88 | + The numeric status is on :attr:`status_code`; inspect it to branch on the |
| 89 | + specific code, e.g. ``except HTTPError as e: ... e.status_code == 404``. |
| 90 | + :class:`TransientError` (429 / 5xx) is the retryable subset and is itself an |
| 91 | + ``HTTPError``. The one carve-out: a 413/414 surfaces as :class:`URLTooLong` |
| 92 | + (a :class:`RequestTooLarge`), *not* an ``HTTPError`` -- catch |
| 93 | + :class:`DataRetrievalError` to span every failure. |
| 94 | +
|
| 95 | + Parameters |
| 96 | + ---------- |
| 97 | + message : str |
| 98 | + Human-readable error message. |
| 99 | + status_code : int |
| 100 | + The HTTP status the service returned. |
| 101 | + """ |
| 102 | + |
| 103 | + def __init__(self, message: str, *, status_code: int) -> None: |
| 104 | + super().__init__(message) |
| 105 | + self.status_code = status_code |
| 106 | + |
| 107 | + |
| 108 | +class TransientError(HTTPError): |
| 109 | + """A 429 or 5xx the server may serve on a later try (:class:`RateLimited` |
| 110 | + for 429, :class:`ServiceUnavailable` for 5xx). |
| 111 | +
|
| 112 | + This classifies the HTTP condition; it does not by itself retry the request. |
| 113 | + Whether a transient is retried is up to the calling path -- a single-shot |
| 114 | + request raises it for the caller to handle (e.g. wait :attr:`retry_after` |
| 115 | + and re-issue). |
| 116 | +
|
| 117 | + Parameters |
| 118 | + ---------- |
| 119 | + message : str |
| 120 | + Human-readable error message. |
| 121 | + status_code : int, optional |
| 122 | + The HTTP status the service returned. Defaults to the concrete leaf's |
| 123 | + canonical code (:attr:`_DEFAULT_STATUS`) when omitted; |
| 124 | + :func:`error_for_status` always passes the real status. |
| 125 | + retry_after : float, optional |
| 126 | + Seconds to wait before retrying, parsed from the ``Retry-After`` |
| 127 | + response header; ``None`` when the header is absent or unparseable. |
54 | 128 | """ |
55 | 129 |
|
| 130 | + #: Canonical status a concrete transient stamps when built without an |
| 131 | + #: explicit ``status_code`` (:class:`RateLimited` = 429, |
| 132 | + #: :class:`ServiceUnavailable` = 503). ``TransientError`` itself is abstract |
| 133 | + #: and sets none, so constructing it bare requires ``status_code``. |
| 134 | + _DEFAULT_STATUS: ClassVar[int] |
| 135 | + |
| 136 | + def __init__( |
| 137 | + self, |
| 138 | + message: str, |
| 139 | + *, |
| 140 | + status_code: int | None = None, |
| 141 | + retry_after: float | None = None, |
| 142 | + ) -> None: |
| 143 | + super().__init__( |
| 144 | + message, |
| 145 | + status_code=self._DEFAULT_STATUS if status_code is None else status_code, |
| 146 | + ) |
| 147 | + self.retry_after = retry_after |
| 148 | + |
56 | 149 |
|
57 | | -# --- Fatal client errors ------------------------------------------------- |
58 | | -# The request can't succeed as written; retrying it unchanged won't help. Each |
59 | | -# is also a ``ValueError`` -- the built-in the legacy ``query`` path has always |
60 | | -# raised -- so existing ``except ValueError`` handlers keep working. |
| 150 | +class RateLimited(TransientError): |
| 151 | + """A request was rejected with HTTP 429 (too many requests).""" |
61 | 152 |
|
| 153 | + _DEFAULT_STATUS = 429 |
62 | 154 |
|
63 | | -class BadRequestError(DataRetrievalError, ValueError): |
64 | | - """The service rejected the request parameters (HTTP 400).""" |
65 | 155 |
|
| 156 | +class ServiceUnavailable(TransientError): |
| 157 | + """A request was rejected with a server error (HTTP 5xx). |
66 | 158 |
|
67 | | -class NotFoundError(DataRetrievalError, ValueError): |
68 | | - """The requested resource was not found; often an empty query (HTTP 404).""" |
| 159 | + Raised by both the legacy ``query`` path and the Water Data path, so a 5xx |
| 160 | + surfaces as one type regardless of which subsystem issued the request. |
| 161 | + ``status_code`` defaults to 503 (its namesake) only when built by hand |
| 162 | + without one; the factory always supplies the real 5xx. |
| 163 | + """ |
69 | 164 |
|
| 165 | + _DEFAULT_STATUS = 503 |
70 | 166 |
|
71 | | -class RequestTooLarge(DataRetrievalError, ValueError): |
| 167 | + |
| 168 | +# --- Request can't fit (not necessarily an HTTP status) ------------------ |
| 169 | + |
| 170 | + |
| 171 | +class RequestTooLarge(DataRetrievalError): |
72 | 172 | """The request is too large for the service to satisfy. |
73 | 173 |
|
74 | 174 | A base for the two ways a request can exceed what the service accepts; |
@@ -99,56 +199,45 @@ class Unchunkable(RequestTooLarge): |
99 | 199 | """ |
100 | 200 |
|
101 | 201 |
|
102 | | -class NoSitesError(DataRetrievalError): |
103 | | - """The selection criteria matched no sites/data.""" |
| 202 | +# --- Empty result -------------------------------------------------------- |
| 203 | + |
| 204 | + |
| 205 | +class NoDataError(DataRetrievalError): |
| 206 | + """The request succeeded (HTTP 200) but the selection criteria matched |
| 207 | + no data.""" |
104 | 208 |
|
105 | 209 | def __init__(self, url: httpx.URL) -> None: |
106 | 210 | self.url = url |
107 | 211 |
|
108 | 212 | def __str__(self) -> str: |
109 | 213 | return ( |
110 | | - "No sites/data found using the selection criteria specified in " |
111 | | - f"url: {self.url}" |
| 214 | + f"No data found using the selection criteria specified in url: {self.url}" |
112 | 215 | ) |
113 | 216 |
|
114 | 217 |
|
115 | | -# --- Transient transport errors ------------------------------------------ |
116 | | -# The service was reachable but temporarily refused the request; the same call |
117 | | -# may succeed if retried. Each is also a ``RuntimeError`` (the built-in the |
118 | | -# waterdata path has always raised). The Water Data chunker recognizes them via |
119 | | -# ``isinstance(exc, TransientError)`` and wraps them as resumable |
120 | | -# ``ChunkInterrupted`` subclasses. |
| 218 | +#: Deprecated alias for :class:`NoDataError`. The original name leaked NWIS-era |
| 219 | +#: "sites" terminology; it is retained so existing ``except NoSitesError`` |
| 220 | +#: handlers keep working, and will be removed in a future release. |
| 221 | +NoSitesError = NoDataError |
121 | 222 |
|
122 | 223 |
|
123 | | -class TransientError(DataRetrievalError, RuntimeError): |
124 | | - """Base for transient HTTP failures that are worth an automatic retry. |
| 224 | +def error_for_status( |
| 225 | + status: int, message: str, *, retry_after: float | None = None |
| 226 | +) -> DataRetrievalError: |
| 227 | + """Return the typed :class:`DataRetrievalError` for an HTTP error *status*. |
125 | 228 |
|
126 | | - One subclass per recoverable HTTP status family (429 -> :class:`RateLimited`, |
127 | | - 5xx -> :class:`ServiceUnavailable`); the Water Data chunker recognizes them |
128 | | - by this shared base and wraps them as resumable interruptions. |
129 | | -
|
130 | | - Parameters |
131 | | - ---------- |
132 | | - message : str |
133 | | - Human-readable error message. |
134 | | - retry_after : float, optional |
135 | | - Seconds to wait before retrying, parsed from the ``Retry-After`` |
136 | | - response header; stored on the :attr:`retry_after` attribute (``None`` |
137 | | - when the header is absent or unparseable). |
138 | | - """ |
139 | | - |
140 | | - def __init__(self, message: str, *, retry_after: float | None = None) -> None: |
141 | | - super().__init__(message) |
142 | | - self.retry_after = retry_after |
143 | | - |
144 | | - |
145 | | -class RateLimited(TransientError): |
146 | | - """A request was rejected with HTTP 429 (too many requests).""" |
147 | | - |
148 | | - |
149 | | -class ServiceUnavailable(TransientError): |
150 | | - """A request was rejected with a server error (HTTP 5xx). |
151 | | -
|
152 | | - Raised by both the legacy ``query`` path and the Water Data path, so a 5xx |
153 | | - surfaces as one type regardless of which subsystem issued the request. |
| 229 | + The single status-to-type mapping shared by every request path (the legacy |
| 230 | + ``query`` path, ``waterdata``, ``nadp`` / ``streamstats``), so a given status |
| 231 | + surfaces as the same type everywhere. ``message`` is used verbatim; |
| 232 | + ``retry_after`` is attached only to the transient (:class:`TransientError`) |
| 233 | + types. A 413/414 surfaces as :class:`URLTooLong` (a :class:`RequestTooLarge`) |
| 234 | + rather than a generic :class:`HTTPError`, matching the client-side |
| 235 | + over-long-URL case. |
154 | 236 | """ |
| 237 | + if status in (413, 414): |
| 238 | + return URLTooLong(message) |
| 239 | + if status == 429: |
| 240 | + return RateLimited(message, status_code=status, retry_after=retry_after) |
| 241 | + if 500 <= status < 600: |
| 242 | + return ServiceUnavailable(message, status_code=status, retry_after=retry_after) |
| 243 | + return HTTPError(message, status_code=status) |
0 commit comments