Skip to content

Commit 484a86a

Browse files
thodson-usgsclaude
andauthored
refactor(waterdata): coherent error handling + safe cleanups for non-OGC getters (#298)
Non-OGC getter error coherence + safe cleanups. All public signatures, kwargs (incl. the upstream-driven Samples camelCase), and return shapes are unchanged — the only behavior change is the exception TYPE on HTTP errors. Error coherence: - Route the direct-httpx getters (get_samples, get_samples_summary, get_codes, ratings._search/_download_and_parse) through the module's typed _raise_for_non_200 instead of a bare response.raise_for_status(), so a 429 raises RateLimited and a 5xx raises ServiceUnavailable (with Retry-After and USGS-aware messages) — the same typed errors the OGC/stats path already raises. NOTE: this changes the exception type from httpx.HTTPStatusError; flagged for review. Safe cleanups: - Extract `_get_samples_csv` and call it from get_samples/get_samples_summary (collapses the duplicated GET + read_csv tail). - Fix get_channel docstring drift: channel_flow was labeled as units (it's the discharge value), channel_flow_unit was undocumented, and measurement_type carried last_modified's pasted RFC-3339 block. - _format_api_dates: resolve the local timezone only after the all-NA / duration / interval guards (skip the lookup when it's discarded). Tests: add typed-error regression tests (429 -> RateLimited, 5xx -> ServiceUnavailable). 118 samples/ratings/utils tests pass; ruff clean. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 9cf7251 commit 484a86a

4 files changed

Lines changed: 61 additions & 50 deletions

File tree

dataretrieval/waterdata/api.py

Lines changed: 31 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
_check_profiles,
3535
_default_headers,
3636
_get_args,
37+
_raise_for_non_200,
3738
get_ogc_data,
3839
get_stats_data,
3940
)
@@ -2125,7 +2126,7 @@ def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:
21252126

21262127
response = httpx.get(url, headers=_default_headers(), **HTTPX_DEFAULTS)
21272128

2128-
response.raise_for_status()
2129+
_raise_for_non_200(response)
21292130

21302131
data_dict = json.loads(response.text)
21312132
data_list = data_dict["data"]
@@ -2135,6 +2136,30 @@ def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:
21352136
return df
21362137

21372138

2139+
def _get_samples_csv(
2140+
url: str, params: dict, ssl_check: bool
2141+
) -> tuple[pd.DataFrame, httpx.Response]:
2142+
"""Issue a Samples CSV request and parse the body into a DataFrame.
2143+
2144+
Shared tail for the Samples getters: sends the GET with the standard
2145+
headers (including ``X-Api-Key``), raises a typed error on a non-200
2146+
(consistent with the OGC/stats path) instead of a bare
2147+
``HTTPStatusError``, and reads the CSV. The caller wraps the response
2148+
as metadata and applies any per-getter post-step.
2149+
"""
2150+
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))
2151+
response = httpx.get(
2152+
url,
2153+
params=params,
2154+
verify=ssl_check,
2155+
headers=_default_headers(),
2156+
**HTTPX_DEFAULTS,
2157+
)
2158+
_raise_for_non_200(response)
2159+
df = pd.read_csv(StringIO(response.text), delimiter=",")
2160+
return df, response
2161+
2162+
21382163
def get_samples(
21392164
ssl_check: bool = True,
21402165
service: SERVICES = "results",
@@ -2349,19 +2374,7 @@ def get_samples(
23492374

23502375
url = f"{SAMPLES_URL}/{service}/{profile}"
23512376

2352-
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))
2353-
2354-
response = httpx.get(
2355-
url,
2356-
params=params,
2357-
verify=ssl_check,
2358-
headers=_default_headers(),
2359-
**HTTPX_DEFAULTS,
2360-
)
2361-
2362-
response.raise_for_status()
2363-
2364-
df = pd.read_csv(StringIO(response.text), delimiter=",")
2377+
df, response = _get_samples_csv(url, params, ssl_check)
23652378
df = _attach_datetime_columns(df)
23662379

23672380
return df, BaseMetadata(response)
@@ -2423,19 +2436,7 @@ def get_samples_summary(
24232436
url = f"{SAMPLES_URL}/summary/{quote(monitoringLocationIdentifier, safe='')}"
24242437
params = {"mimeType": "text/csv"}
24252438

2426-
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))
2427-
2428-
response = httpx.get(
2429-
url,
2430-
params=params,
2431-
verify=ssl_check,
2432-
headers=_default_headers(),
2433-
**HTTPX_DEFAULTS,
2434-
)
2435-
2436-
response.raise_for_status()
2437-
2438-
df = pd.read_csv(StringIO(response.text), delimiter=",")
2439+
df, response = _get_samples_csv(url, params, ssl_check)
24392440

24402441
return df, BaseMetadata(response)
24412442

@@ -2767,6 +2768,8 @@ def get_channel(
27672768
channel_name : string or iterable of strings, optional
27682769
The channel name.
27692770
channel_flow : string or iterable of strings, optional
2771+
The channel discharge (flow).
2772+
channel_flow_unit : string or iterable of strings, optional
27702773
The units for channel discharge.
27712774
channel_width : string or iterable of strings, optional
27722775
The channel width.
@@ -2797,24 +2800,7 @@ def get_channel(
27972800
longitudinal_velocity_description : string or iterable of strings, optional
27982801
The longitudinal velocity description.
27992802
measurement_type : string or iterable of strings, optional
2800-
The measurement type.
2801-
The last time a record was refreshed in our database. This may happen
2802-
due to regular operational processes and does not necessarily indicate
2803-
anything about the measurement has changed. You can query this field
2804-
using date-times or intervals, adhering to RFC 3339, or using ISO 8601
2805-
duration objects. Intervals may be bounded or half-bounded (double-dots
2806-
at start or end).
2807-
Examples:
2808-
2809-
* A date-time: "2018-02-12T23:20:50Z"
2810-
* A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z"
2811-
* Half-bounded intervals: "2018-02-12T00:00:00Z/.." or
2812-
"../2018-03-18T12:31:12Z"
2813-
* Duration objects: "P1M" for data from the past month or "PT36H" for the
2814-
last 36 hours
2815-
2816-
Only features that have a last_modified that intersects the value of
2817-
datetime are selected.
2803+
The type of channel measurement.
28182804
skip_geometry : boolean, optional
28192805
This option can be used to skip response geometries for each feature.
28202806
The returning object will be a data frame with no spatial information.

dataretrieval/waterdata/ratings.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
_check_monitoring_location_id,
3030
_default_headers,
3131
_format_api_dates,
32+
_raise_for_non_200,
3233
)
3334

3435
logger = logging.getLogger(__name__)
@@ -248,7 +249,7 @@ def _search(
248249
verify=ssl_check,
249250
**HTTPX_DEFAULTS,
250251
)
251-
response.raise_for_status()
252+
_raise_for_non_200(response)
252253
return response.json().get("features", [])
253254

254255

@@ -262,7 +263,7 @@ def _download_and_parse(
262263
response = httpx.get(
263264
url, headers=_default_headers(), verify=ssl_check, **HTTPX_DEFAULTS
264265
)
265-
response.raise_for_status()
266+
_raise_for_non_200(response)
266267

267268
if file_path is not None:
268269
with open(os.path.join(file_path, feature["id"]), "w") as f:

dataretrieval/waterdata/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,6 @@ def _format_api_dates(
268268
"""
269269
if datetime_input is None:
270270
return None
271-
# Get timezone
272-
local_timezone = datetime.now().astimezone().tzinfo
273271

274272
# Convert single string to list for uniform processing
275273
if isinstance(datetime_input, str):
@@ -300,7 +298,9 @@ def _format_api_dates(
300298
return single
301299

302300
# Half-bounded ranges: NA endpoints render as ".."; any unparseable non-NA
303-
# element invalidates the range.
301+
# element invalidates the range. Resolve the local tz only now — after the
302+
# all-NA / duration / interval guards above have had their chance to return.
303+
local_timezone = datetime.now().astimezone().tzinfo
304304
formatted = [
305305
_format_one(dt, date=date, local_tz=local_timezone) for dt in datetime_input
306306
]

tests/waterdata_test.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,30 @@ def test_get_samples_summary_rejects_list():
129129
get_samples_summary(monitoringLocationIdentifier=["USGS-04183500"])
130130

131131

132+
def test_get_samples_raises_typed_error_on_429(httpx_mock):
133+
"""Non-200 from the Samples endpoint now raises the module's typed error
134+
(RateLimited on 429) — consistent with the OGC/stats path — instead of a
135+
bare httpx.HTTPStatusError."""
136+
from dataretrieval.waterdata.chunking import RateLimited
137+
138+
httpx_mock.add_response(status_code=429, headers={"Retry-After": "30"})
139+
with pytest.raises(RateLimited):
140+
get_samples(
141+
service="results",
142+
profile="fullphyschem",
143+
monitoringLocationIdentifier="USGS-05406500",
144+
)
145+
146+
147+
def test_get_samples_summary_raises_typed_error_on_5xx(httpx_mock):
148+
"""A 5xx from the Samples summary endpoint raises ServiceUnavailable."""
149+
from dataretrieval.waterdata.chunking import ServiceUnavailable
150+
151+
httpx_mock.add_response(status_code=503)
152+
with pytest.raises(ServiceUnavailable):
153+
get_samples_summary(monitoringLocationIdentifier="USGS-04183500")
154+
155+
132156
def test_check_profiles():
133157
"""Tests that correct errors are raised for invalid profiles."""
134158
with pytest.raises(ValueError):

0 commit comments

Comments
 (0)