Skip to content

Commit 13a63fc

Browse files
thodson-usgsclaude
andcommitted
Extract _raise_for_non_200 helper and trim PR #273 comments
The status-code guard appeared four times in waterdata/utils.py (initial-request and pagination paths in both _walk_pages and get_stats_data) with bit-identical bodies. Extract into a single named helper; the helper's docstring carries the "silent-empty-frame" WHY that the inline comment was apologizing for. Also: - trim the second multi-line comment in _walk_pages to a single line noting the actual constraint (`resp` may be stale) - hoist `import logging` to module level in waterdata_utils_test.py - condense the two new test docstrings to one-line contract statements; the bug history lives in the commit message Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 13b9032 commit 13a63fc

2 files changed

Lines changed: 19 additions & 26 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,17 @@ def _error_body(resp: requests.Response):
368368
)
369369

370370

371+
def _raise_for_non_200(resp: requests.Response) -> None:
372+
"""Raise ``RuntimeError(_error_body(resp))`` if ``resp`` is not 200.
373+
374+
Used by both the initial-request gate and the pagination loops; without
375+
the loop-side check, a 5xx body lacking ``numberReturned`` would be
376+
silently treated as an empty page and pagination would stop quietly.
377+
"""
378+
if resp.status_code != 200:
379+
raise RuntimeError(_error_body(resp))
380+
381+
371382
def _construct_api_requests(
372383
service: str,
373384
properties: list[str] | None = None,
@@ -612,8 +623,7 @@ def _walk_pages(
612623
client = client or requests.Session()
613624
try:
614625
resp = client.send(req)
615-
if resp.status_code != 200:
616-
raise RuntimeError(_error_body(resp))
626+
_raise_for_non_200(resp)
617627

618628
# Store the initial response for metadata
619629
initial_response = resp
@@ -635,17 +645,11 @@ def _walk_pages(
635645
headers=headers,
636646
data=content if method == "POST" else None,
637647
)
638-
# Mirror the initial-request check; otherwise a 5xx body
639-
# without "numberReturned" silently yields an empty frame
640-
# and the loop quietly stops.
641-
if resp.status_code != 200:
642-
raise RuntimeError(_error_body(resp))
648+
_raise_for_non_200(resp)
643649
dfs.append(_get_resp_data(resp, geopd=geopd))
644650
curr_url = _next_req_url(resp)
645651
except Exception as e: # noqa: BLE001
646-
# Report the *actual* failure — not _error_body(resp) on a
647-
# stale prior-page response (which would describe the wrong
648-
# request and can itself raise on non-JSON bodies).
652+
# `resp` may be stale (prior page) if client.request() raised.
649653
logger.error("Request incomplete: %s", e)
650654
logger.warning(
651655
"Request failed for URL: %s. Data download interrupted.", curr_url
@@ -1079,8 +1083,7 @@ def get_stats_data(
10791083

10801084
try:
10811085
resp = client.send(req)
1082-
if resp.status_code != 200:
1083-
raise RuntimeError(_error_body(resp))
1086+
_raise_for_non_200(resp)
10841087

10851088
# Store the initial response for metadata
10861089
initial_response = resp
@@ -1106,8 +1109,7 @@ def get_stats_data(
11061109
params=args,
11071110
headers=headers,
11081111
)
1109-
if resp.status_code != 200:
1110-
raise RuntimeError(_error_body(resp))
1112+
_raise_for_non_200(resp)
11111113
body = resp.json()
11121114
all_dfs.append(_handle_stats_nesting(body, geopd=GEOPANDAS))
11131115
next_token = body["next"]

tests/waterdata_utils_test.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
from unittest import mock
23

34
import pandas as pd
@@ -123,12 +124,7 @@ def _walk_pages_with_failure(failure_resp_or_exc):
123124

124125

125126
def test_walk_pages_logs_actual_exception_when_request_raises(caplog):
126-
"""When client.request() itself raises (e.g. a connection error), the
127-
logged failure must reflect that exception — not _error_body() on the
128-
stale prior-page response, which described the wrong request and could
129-
itself crash on non-JSON bodies."""
130-
import logging
131-
127+
"""Exception from client.request() must be logged with its actual message."""
132128
caplog.set_level(logging.ERROR, logger="dataretrieval.waterdata.utils")
133129

134130
df, _ = _walk_pages_with_failure(requests.ConnectionError("boom"))
@@ -143,12 +139,7 @@ def test_walk_pages_logs_actual_exception_when_request_raises(caplog):
143139

144140

145141
def test_walk_pages_surfaces_5xx_mid_pagination(caplog):
146-
"""A non-200 response on a paginated page must be detected; previously
147-
the loop happily called _get_resp_data() on the 5xx body, which —
148-
lacking 'numberReturned' — silently produced an empty DataFrame and
149-
the loop quietly stopped with no error logged."""
150-
import logging
151-
142+
"""A non-200 mid-pagination response must be logged, not silently swallowed."""
152143
caplog.set_level(logging.ERROR, logger="dataretrieval.waterdata.utils")
153144

154145
page2_500 = mock.MagicMock()

0 commit comments

Comments
 (0)