Skip to content

Commit c6e7e2d

Browse files
thodson-usgsclaude
andcommitted
refactor(waterdata): coherent error handling + safe cleanups for non-OGC getters
Non-OGC getter error coherence + safe cleanups. All public signatures, kwargs (incl. the upstream-driven Samples camelCase), and return shapes are unchanged — the only behavior change is the exception TYPE on HTTP errors. Error coherence: - Route the direct-httpx getters (get_samples, get_samples_summary, get_codes, ratings._search/_download_and_parse) through the module's typed _raise_for_non_200 instead of a bare response.raise_for_status(), so a 429 raises RateLimited and a 5xx raises ServiceUnavailable (with Retry-After and USGS-aware messages) — the same typed errors the OGC/stats path already raises. NOTE: this changes the exception type from httpx.HTTPStatusError; flagged for review. Safe cleanups: - Extract `_get_samples_csv` and call it from get_samples/get_samples_summary (collapses the duplicated GET + read_csv tail). - Fix get_channel docstring drift: channel_flow was labeled as units (it's the discharge value), channel_flow_unit was undocumented, and measurement_type carried last_modified's pasted RFC-3339 block. - _format_api_dates: resolve the local timezone only after the all-NA / duration / interval guards (skip the lookup when it's discarded). Tests: add typed-error regression tests (429 -> RateLimited, 5xx -> ServiceUnavailable). 118 samples/ratings/utils tests pass; ruff clean. Deliberately NOT included (kept backward-compatible / out of scope): no kwarg renames (Samples camelCase mirrors the upstream API), no get_codes return-shape change, no ssl_check additions. The _aggregate_paginated_response / _combine_chunk_responses response-merge dedup is deferred to avoid colliding with the in-flight chunking PR (#295). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 906c859 commit c6e7e2d

4 files changed

Lines changed: 61 additions & 50 deletions

File tree

dataretrieval/waterdata/api.py

Lines changed: 31 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
_check_profiles,
3535
_default_headers,
3636
_get_args,
37+
_raise_for_non_200,
3738
get_ogc_data,
3839
get_stats_data,
3940
)
@@ -2116,7 +2117,7 @@ def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:
21162117

21172118
response = httpx.get(url, headers=_default_headers(), **HTTPX_DEFAULTS)
21182119

2119-
response.raise_for_status()
2120+
_raise_for_non_200(response)
21202121

21212122
data_dict = json.loads(response.text)
21222123
data_list = data_dict["data"]
@@ -2126,6 +2127,30 @@ def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:
21262127
return df
21272128

21282129

2130+
def _get_samples_csv(
2131+
url: str, params: dict, ssl_check: bool
2132+
) -> tuple[pd.DataFrame, httpx.Response]:
2133+
"""Issue a Samples CSV request and parse the body into a DataFrame.
2134+
2135+
Shared tail for the Samples getters: sends the GET with the standard
2136+
headers (including ``X-Api-Key``), raises a typed error on a non-200
2137+
(consistent with the OGC/stats path) instead of a bare
2138+
``HTTPStatusError``, and reads the CSV. The caller wraps the response
2139+
as metadata and applies any per-getter post-step.
2140+
"""
2141+
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))
2142+
response = httpx.get(
2143+
url,
2144+
params=params,
2145+
verify=ssl_check,
2146+
headers=_default_headers(),
2147+
**HTTPX_DEFAULTS,
2148+
)
2149+
_raise_for_non_200(response)
2150+
df = pd.read_csv(StringIO(response.text), delimiter=",")
2151+
return df, response
2152+
2153+
21292154
def get_samples(
21302155
ssl_check: bool = True,
21312156
service: SERVICES = "results",
@@ -2340,19 +2365,7 @@ def get_samples(
23402365

23412366
url = f"{SAMPLES_URL}/{service}/{profile}"
23422367

2343-
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))
2344-
2345-
response = httpx.get(
2346-
url,
2347-
params=params,
2348-
verify=ssl_check,
2349-
headers=_default_headers(),
2350-
**HTTPX_DEFAULTS,
2351-
)
2352-
2353-
response.raise_for_status()
2354-
2355-
df = pd.read_csv(StringIO(response.text), delimiter=",")
2368+
df, response = _get_samples_csv(url, params, ssl_check)
23562369
df = _attach_datetime_columns(df)
23572370

23582371
return df, BaseMetadata(response)
@@ -2414,19 +2427,7 @@ def get_samples_summary(
24142427
url = f"{SAMPLES_URL}/summary/{quote(monitoringLocationIdentifier, safe='')}"
24152428
params = {"mimeType": "text/csv"}
24162429

2417-
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))
2418-
2419-
response = httpx.get(
2420-
url,
2421-
params=params,
2422-
verify=ssl_check,
2423-
headers=_default_headers(),
2424-
**HTTPX_DEFAULTS,
2425-
)
2426-
2427-
response.raise_for_status()
2428-
2429-
df = pd.read_csv(StringIO(response.text), delimiter=",")
2430+
df, response = _get_samples_csv(url, params, ssl_check)
24302431

24312432
return df, BaseMetadata(response)
24322433

@@ -2758,6 +2759,8 @@ def get_channel(
27582759
channel_name : string or iterable of strings, optional
27592760
The channel name.
27602761
channel_flow : string or iterable of strings, optional
2762+
The channel discharge (flow).
2763+
channel_flow_unit : string or iterable of strings, optional
27612764
The units for channel discharge.
27622765
channel_width : string or iterable of strings, optional
27632766
The channel width.
@@ -2788,24 +2791,7 @@ def get_channel(
27882791
longitudinal_velocity_description : string or iterable of strings, optional
27892792
The longitudinal velocity description.
27902793
measurement_type : string or iterable of strings, optional
2791-
The measurement type.
2792-
The last time a record was refreshed in our database. This may happen
2793-
due to regular operational processes and does not necessarily indicate
2794-
anything about the measurement has changed. You can query this field
2795-
using date-times or intervals, adhering to RFC 3339, or using ISO 8601
2796-
duration objects. Intervals may be bounded or half-bounded (double-dots
2797-
at start or end).
2798-
Examples:
2799-
2800-
* A date-time: "2018-02-12T23:20:50Z"
2801-
* A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z"
2802-
* Half-bounded intervals: "2018-02-12T00:00:00Z/.." or
2803-
"../2018-03-18T12:31:12Z"
2804-
* Duration objects: "P1M" for data from the past month or "PT36H" for the
2805-
last 36 hours
2806-
2807-
Only features that have a last_modified that intersects the value of
2808-
datetime are selected.
2794+
The type of channel measurement.
28092795
skip_geometry : boolean, optional
28102796
This option can be used to skip response geometries for each feature.
28112797
The returning object will be a data frame with no spatial information.

dataretrieval/waterdata/ratings.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
_check_monitoring_location_id,
3030
_default_headers,
3131
_format_api_dates,
32+
_raise_for_non_200,
3233
)
3334

3435
logger = logging.getLogger(__name__)
@@ -248,7 +249,7 @@ def _search(
248249
verify=ssl_check,
249250
**HTTPX_DEFAULTS,
250251
)
251-
response.raise_for_status()
252+
_raise_for_non_200(response)
252253
return response.json().get("features", [])
253254

254255

@@ -262,7 +263,7 @@ def _download_and_parse(
262263
response = httpx.get(
263264
url, headers=_default_headers(), verify=ssl_check, **HTTPX_DEFAULTS
264265
)
265-
response.raise_for_status()
266+
_raise_for_non_200(response)
266267

267268
if file_path is not None:
268269
with open(os.path.join(file_path, feature["id"]), "w") as f:

dataretrieval/waterdata/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,6 @@ def _format_api_dates(
263263
"""
264264
if datetime_input is None:
265265
return None
266-
# Get timezone
267-
local_timezone = datetime.now().astimezone().tzinfo
268266

269267
# Convert single string to list for uniform processing
270268
if isinstance(datetime_input, str):
@@ -295,7 +293,9 @@ def _format_api_dates(
295293
return single
296294

297295
# Half-bounded ranges: NA endpoints render as ".."; any unparseable non-NA
298-
# element invalidates the range.
296+
# element invalidates the range. Resolve the local tz only now — after the
297+
# all-NA / duration / interval guards above have had their chance to return.
298+
local_timezone = datetime.now().astimezone().tzinfo
299299
formatted = [
300300
_format_one(dt, date=date, local_tz=local_timezone) for dt in datetime_input
301301
]

tests/waterdata_test.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,30 @@ def test_get_samples_summary_rejects_list():
128128
get_samples_summary(monitoringLocationIdentifier=["USGS-04183500"])
129129

130130

131+
def test_get_samples_raises_typed_error_on_429(httpx_mock):
132+
"""Non-200 from the Samples endpoint now raises the module's typed error
133+
(RateLimited on 429) — consistent with the OGC/stats path — instead of a
134+
bare httpx.HTTPStatusError."""
135+
from dataretrieval.waterdata.chunking import RateLimited
136+
137+
httpx_mock.add_response(status_code=429, headers={"Retry-After": "30"})
138+
with pytest.raises(RateLimited):
139+
get_samples(
140+
service="results",
141+
profile="fullphyschem",
142+
monitoringLocationIdentifier="USGS-05406500",
143+
)
144+
145+
146+
def test_get_samples_summary_raises_typed_error_on_5xx(httpx_mock):
147+
"""A 5xx from the Samples summary endpoint raises ServiceUnavailable."""
148+
from dataretrieval.waterdata.chunking import ServiceUnavailable
149+
150+
httpx_mock.add_response(status_code=503)
151+
with pytest.raises(ServiceUnavailable):
152+
get_samples_summary(monitoringLocationIdentifier="USGS-04183500")
153+
154+
131155
def test_check_profiles():
132156
"""Tests that correct errors are raised for invalid profiles."""
133157
with pytest.raises(ValueError):

0 commit comments

Comments
 (0)