|
1 | 1 | """Exception taxonomy for ``dataretrieval``. |
2 | 2 |
|
3 | | -A failed request from any service module (``nwis``, ``wqp``, ``waterdata``, |
4 | | -``nldi``, ...) raises a subclass of :class:`DataRetrievalError`, so a caller can |
5 | | -handle any request failure with a single ``except dataretrieval.DataRetrievalError``. |
6 | | -
|
7 | | -The tree has two intermediate bases a caller can catch to span a whole family: |
8 | | -:class:`RequestTooLarge` (the request can't fit, however it was issued) and |
9 | | -:class:`TransientError` (a temporary failure worth retrying). |
10 | | -
|
11 | | -This module deliberately has no third-party dependencies, so any module can |
12 | | -import it without pulling in pandas/httpx. |
| 3 | +When a request gets an HTTP error response, the service modules (``nwis``, |
| 4 | +``wqp``, ``nldi``, ``waterdata``, ``nadp``, ``streamstats``) raise a subclass of |
| 5 | +:class:`DataRetrievalError`, so a caller can handle any of them with one |
| 6 | +``except dataretrieval.DataRetrievalError``. Connection-level failures (timeouts, |
| 7 | +DNS, refused connections) surface as ``httpx`` exceptions on the single-shot |
| 8 | +request paths. |
| 9 | +
|
| 10 | +A status error is an :class:`HTTPError` carrying ``.status_code`` (inspect it to |
| 11 | +branch on the specific code); :class:`TransientError` is the retryable subset |
| 12 | +(429 / 5xx). A few failures are not a plain status -- :class:`RequestTooLarge` |
| 13 | +(:class:`URLTooLong` / :class:`Unchunkable`) and :class:`NoDataError`. |
| 14 | +
|
| 15 | +This module imports only ``httpx`` (the package's core HTTP dependency, always |
| 16 | +installed) -- not pandas/geopandas -- so it stays cheap to import and free of |
| 17 | +import cycles. |
13 | 18 | """ |
14 | 19 |
|
15 | 20 | from __future__ import annotations |
16 | 21 |
|
17 | | -from typing import TYPE_CHECKING |
18 | | - |
19 | | -if TYPE_CHECKING: |
20 | | - import httpx |
| 22 | +import httpx |
21 | 23 |
|
22 | 24 | __all__ = [ |
23 | 25 | "DataRetrievalError", |
24 | | - "BadRequestError", |
25 | | - "NotFoundError", |
26 | | - "RequestTooLarge", |
27 | | - "URLTooLong", |
28 | | - "Unchunkable", |
29 | | - "NoSitesError", |
| 26 | + "HTTPError", |
30 | 27 | "TransientError", |
31 | 28 | "RateLimited", |
32 | 29 | "ServiceUnavailable", |
| 30 | + "RequestTooLarge", |
| 31 | + "URLTooLong", |
| 32 | + "Unchunkable", |
| 33 | + "NoDataError", |
| 34 | + "NoSitesError", # deprecated alias for NoDataError |
| 35 | + "error_for_status", |
33 | 36 | ] |
34 | 37 |
|
35 | 38 |
|
36 | 39 | class DataRetrievalError(Exception): |
37 | | - """Base class for errors raised when a request to a USGS or EPA web |
| 40 | + """Base class for every error raised when a request to a USGS or EPA web |
38 | 41 | service fails. |
39 | 42 |
|
40 | | - Every service module (``nwis``, ``wqp``, ``waterdata``, ``nldi``, ...) |
41 | | - raises a subclass of this when a request fails, so a caller can handle any |
42 | | - request failure uniformly:: |
| 43 | + Service modules raise a subclass of this on a failed request, so a caller |
| 44 | + can handle them uniformly:: |
43 | 45 |
|
44 | 46 | try: |
45 | 47 | df, md = dataretrieval.wqp.get_results(...) |
46 | 48 | except dataretrieval.DataRetrievalError: |
47 | 49 | ... |
48 | 50 |
|
49 | | - Subclasses also inherit from the built-in exception this package has |
50 | | - historically raised for the condition's *kind* -- :class:`ValueError` for a |
51 | | - request that can't succeed as written (bad params, too large), and |
52 | | - :class:`RuntimeError` for a transient transport failure -- so existing |
53 | | - ``except ValueError`` / ``except RuntimeError`` handlers keep working. |
| 51 | + Connection-level failures (timeouts, DNS) still surface as ``httpx`` |
| 52 | + exceptions on the single-shot request paths. |
54 | 53 | """ |
55 | 54 |
|
56 | 55 |
|
57 | | -# --- Fatal client errors ------------------------------------------------- |
58 | | -# The request can't succeed as written; retrying it unchanged won't help. Each |
59 | | -# is also a ``ValueError`` -- the built-in the legacy ``query`` path has always |
60 | | -# raised -- so existing ``except ValueError`` handlers keep working. |
| 56 | +# --- HTTP status errors -------------------------------------------------- |
61 | 57 |
|
62 | 58 |
|
63 | | -class BadRequestError(DataRetrievalError, ValueError): |
64 | | - """The service rejected the request parameters (HTTP 400).""" |
| 59 | +class HTTPError(DataRetrievalError): |
| 60 | + """The service returned an error HTTP status. |
| 61 | +
|
| 62 | + The numeric status is on :attr:`status_code`; inspect it to branch on the |
| 63 | + specific code, e.g. ``except HTTPError as e: ... e.status_code == 404``. |
| 64 | + :class:`TransientError` (429 / 5xx) is the retryable subset and is itself an |
| 65 | + ``HTTPError``. The one carve-out: a 413/414 surfaces as :class:`URLTooLong` |
| 66 | + (a :class:`RequestTooLarge`), *not* an ``HTTPError`` -- catch |
| 67 | + :class:`DataRetrievalError` to span every failure. |
| 68 | +
|
| 69 | + Parameters |
| 70 | + ---------- |
| 71 | + message : str |
| 72 | + Human-readable error message. |
| 73 | + status_code : int |
| 74 | + The HTTP status the service returned. |
| 75 | + """ |
| 76 | + |
| 77 | + def __init__(self, message: str, *, status_code: int) -> None: |
| 78 | + super().__init__(message) |
| 79 | + self.status_code = status_code |
65 | 80 |
|
66 | 81 |
|
67 | | -class NotFoundError(DataRetrievalError, ValueError): |
68 | | - """The requested resource was not found; often an empty query (HTTP 404).""" |
| 82 | +class TransientError(HTTPError): |
| 83 | + """A 429 or 5xx the server may serve on a later try (:class:`RateLimited` |
| 84 | + for 429, :class:`ServiceUnavailable` for 5xx). |
69 | 85 |
|
| 86 | + This classifies the HTTP condition; it does not by itself retry the request. |
| 87 | + Whether a transient is retried is up to the calling path -- a single-shot |
| 88 | + request raises it for the caller to handle (e.g. wait :attr:`retry_after` |
| 89 | + and re-issue). |
70 | 90 |
|
71 | | -class RequestTooLarge(DataRetrievalError, ValueError): |
| 91 | + Parameters |
| 92 | + ---------- |
| 93 | + message : str |
| 94 | + Human-readable error message. |
| 95 | + status_code : int |
| 96 | + The HTTP status the service returned. |
| 97 | + retry_after : float, optional |
| 98 | + Seconds to wait before retrying, parsed from the ``Retry-After`` |
| 99 | + response header; ``None`` when the header is absent or unparseable. |
| 100 | + """ |
| 101 | + |
| 102 | + def __init__( |
| 103 | + self, message: str, *, status_code: int, retry_after: float | None = None |
| 104 | + ) -> None: |
| 105 | + super().__init__(message, status_code=status_code) |
| 106 | + self.retry_after = retry_after |
| 107 | + |
| 108 | + |
| 109 | +class RateLimited(TransientError): |
| 110 | + """A request was rejected with HTTP 429 (too many requests).""" |
| 111 | + |
| 112 | + def __init__( |
| 113 | + self, message: str, *, status_code: int = 429, retry_after: float | None = None |
| 114 | + ) -> None: |
| 115 | + super().__init__(message, status_code=status_code, retry_after=retry_after) |
| 116 | + |
| 117 | + |
| 118 | +class ServiceUnavailable(TransientError): |
| 119 | + """A request was rejected with a server error (HTTP 5xx). |
| 120 | +
|
| 121 | + Raised by both the legacy ``query`` path and the Water Data path, so a 5xx |
| 122 | + surfaces as one type regardless of which subsystem issued the request. |
| 123 | + """ |
| 124 | + |
| 125 | + def __init__( |
| 126 | + self, message: str, *, status_code: int = 503, retry_after: float | None = None |
| 127 | + ) -> None: |
| 128 | + super().__init__(message, status_code=status_code, retry_after=retry_after) |
| 129 | + |
| 130 | + |
| 131 | +# --- Request can't fit (not necessarily an HTTP status) ------------------ |
| 132 | + |
| 133 | + |
| 134 | +class RequestTooLarge(DataRetrievalError): |
72 | 135 | """The request is too large for the service to satisfy. |
73 | 136 |
|
74 | 137 | A base for the two ways a request can exceed what the service accepts; |
@@ -99,56 +162,45 @@ class Unchunkable(RequestTooLarge): |
99 | 162 | """ |
100 | 163 |
|
101 | 164 |
|
102 | | -class NoSitesError(DataRetrievalError): |
103 | | - """The selection criteria matched no sites/data.""" |
| 165 | +# --- Empty result -------------------------------------------------------- |
| 166 | + |
| 167 | + |
| 168 | +class NoDataError(DataRetrievalError): |
| 169 | + """The request succeeded (HTTP 200) but the selection criteria matched |
| 170 | + no data.""" |
104 | 171 |
|
105 | 172 | def __init__(self, url: httpx.URL) -> None: |
106 | 173 | self.url = url |
107 | 174 |
|
108 | 175 | def __str__(self) -> str: |
109 | 176 | return ( |
110 | | - "No sites/data found using the selection criteria specified in " |
111 | | - f"url: {self.url}" |
| 177 | + f"No data found using the selection criteria specified in url: {self.url}" |
112 | 178 | ) |
113 | 179 |
|
114 | 180 |
|
115 | | -# --- Transient transport errors ------------------------------------------ |
116 | | -# The service was reachable but temporarily refused the request; the same call |
117 | | -# may succeed if retried. Each is also a ``RuntimeError`` (the built-in the |
118 | | -# waterdata path has always raised). The Water Data chunker recognizes them via |
119 | | -# ``isinstance(exc, TransientError)`` and wraps them as resumable |
120 | | -# ``ChunkInterrupted`` subclasses. |
121 | | - |
122 | | - |
123 | | -class TransientError(DataRetrievalError, RuntimeError): |
124 | | - """Base for transient HTTP failures that are worth an automatic retry. |
125 | | -
|
126 | | - One subclass per recoverable HTTP status family (429 -> :class:`RateLimited`, |
127 | | - 5xx -> :class:`ServiceUnavailable`); the Water Data chunker recognizes them |
128 | | - by this shared base and wraps them as resumable interruptions. |
129 | | -
|
130 | | - Parameters |
131 | | - ---------- |
132 | | - message : str |
133 | | - Human-readable error message. |
134 | | - retry_after : float, optional |
135 | | - Seconds to wait before retrying, parsed from the ``Retry-After`` |
136 | | - response header; stored on the :attr:`retry_after` attribute (``None`` |
137 | | - when the header is absent or unparseable). |
138 | | - """ |
139 | | - |
140 | | - def __init__(self, message: str, *, retry_after: float | None = None) -> None: |
141 | | - super().__init__(message) |
142 | | - self.retry_after = retry_after |
| 181 | +#: Deprecated alias for :class:`NoDataError`. The original name leaked NWIS-era |
| 182 | +#: "sites" terminology; it is retained so existing ``except NoSitesError`` |
| 183 | +#: handlers keep working, and will be removed in a future release. |
| 184 | +NoSitesError = NoDataError |
143 | 185 |
|
144 | 186 |
|
145 | | -class RateLimited(TransientError): |
146 | | - """A request was rejected with HTTP 429 (too many requests).""" |
147 | | - |
148 | | - |
149 | | -class ServiceUnavailable(TransientError): |
150 | | - """A request was rejected with a server error (HTTP 5xx). |
| 187 | +def error_for_status( |
| 188 | + status: int, message: str, *, retry_after: float | None = None |
| 189 | +) -> DataRetrievalError: |
| 190 | + """Return the typed :class:`DataRetrievalError` for an HTTP error *status*. |
151 | 191 |
|
152 | | - Raised by both the legacy ``query`` path and the Water Data path, so a 5xx |
153 | | - surfaces as one type regardless of which subsystem issued the request. |
| 192 | + The single status-to-type mapping shared by every request path (the legacy |
| 193 | + ``query`` path, ``waterdata``, ``nadp`` / ``streamstats``), so a given status |
| 194 | + surfaces as the same type everywhere. ``message`` is used verbatim; |
| 195 | + ``retry_after`` is attached only to the transient (:class:`TransientError`) |
| 196 | + types. A 413/414 surfaces as :class:`URLTooLong` (a :class:`RequestTooLarge`) |
| 197 | + rather than a generic :class:`HTTPError`, matching the client-side |
| 198 | + over-long-URL case. |
154 | 199 | """ |
| 200 | + if status in (413, 414): |
| 201 | + return URLTooLong(message) |
| 202 | + if status == 429: |
| 203 | + return RateLimited(message, status_code=status, retry_after=retry_after) |
| 204 | + if 500 <= status < 600: |
| 205 | + return ServiceUnavailable(message, status_code=status, retry_after=retry_after) |
| 206 | + return HTTPError(message, status_code=status) |
0 commit comments