Skip to content

Commit af00705

Browse files
thodson-usgsclaude
andcommitted
Harden _error_body and add geopandas continuation regression test
Per copilot review on PR #255: - _error_body: catch JSONDecodeError when a 4xx/5xx returns plain text or HTML. Previously, _raise_if_not_ok -> _error_body -> resp.json() raised JSONDecodeError on non-JSON bodies, defeating the in-loop status check. - Tests: - Rename test_walk_pages_raises_on_non_200_in_loop to test_walk_pages_truncates_on_non_200_continuation; the assertion verifies log-and-truncate, not raise. - New test_get_stats_data_preserves_geometry_across_pages exercises the GEOPANDAS=True continuation path so a regression to geopd=False on page 2..N is caught. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 0fa758e commit af00705

2 files changed

Lines changed: 62 additions & 5 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,14 @@ def _error_body(resp: requests.Response):
332332
"403: Query request denied. Possible reasons include "
333333
"query exceeding server limits."
334334
)
335-
j_txt = resp.json()
335+
try:
336+
j_txt = resp.json()
337+
except (json.JSONDecodeError, ValueError):
338+
# Non-JSON 4xx/5xx bodies (HTML error pages, plain text) — fall back
339+
# to the raw text so callers still get a useful message instead of
340+
# the helper crashing with JSONDecodeError.
341+
snippet = (resp.text or "").strip()[:500]
342+
return f"{status}: {resp.reason or 'Error'}. {snippet}"
336343
return (
337344
f"{status}: {j_txt.get('code', 'Unknown type')}. "
338345
f"{j_txt.get('description', 'No description provided')}."

tests/waterdata_utils_test.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
from unittest import mock
22

33
import pandas as pd
4+
import pytest
45
import requests
56

67
from dataretrieval.waterdata.utils import (
8+
GEOPANDAS,
79
_get_args,
810
_walk_pages,
911
get_stats_data,
1012
)
1113

14+
if GEOPANDAS:
15+
import geopandas as gpd
16+
1217

1318
def test_get_args_basic():
1419
local_vars = {
@@ -84,13 +89,15 @@ def test_walk_pages_multiple_mocked():
8489
assert mock_client.request.call_args[0][1] == "https://example.com/page2"
8590

8691

87-
def test_walk_pages_raises_on_non_200_in_loop():
88-
"""`_walk_pages` must surface a non-200 mid-loop, not silently truncate.
92+
def test_walk_pages_truncates_on_non_200_continuation():
93+
"""`_walk_pages` must truncate (not silently extend) on a non-200 page.
8994
9095
Regression: previously any non-200 page was appended (with whatever
9196
body it had) and pagination quietly stopped because `_get_resp_data`
92-
or `_next_req_url` raised inside the bare except. The user got a
93-
partial result with no warning.
97+
or `_next_req_url` raised inside the bare except. Now the explicit
98+
status check raises inside the loop, and the existing log-and-truncate
99+
handler converts that to a clean partial result. Page 1 is still
100+
returned; page 2 is dropped.
94101
"""
95102
resp1 = mock.MagicMock()
96103
resp1.json.return_value = {
@@ -200,3 +207,46 @@ def test_get_stats_data_truncates_on_non_200_continuation():
200207
# Page 1 still surfaces; page 2 was caught by the in-loop status check.
201208
assert isinstance(df, pd.DataFrame)
202209
assert len(df) >= 1
210+
211+
212+
def _stats_feature_with_geometry(loc_id, lon, lat):
213+
"""Stats feature with a real point geometry."""
214+
feat = _stats_feature()
215+
feat["id"] = loc_id
216+
feat["geometry"] = {"type": "Point", "coordinates": [lon, lat]}
217+
feat["properties"]["monitoring_location_id"] = loc_id
218+
return feat
219+
220+
221+
@pytest.mark.skipif(not GEOPANDAS, reason="geopandas not installed")
222+
def test_get_stats_data_preserves_geometry_across_pages():
223+
"""Pages 2..N must use ``geopd=GEOPANDAS`` so a multi-page response
224+
stays a GeoDataFrame and doesn't lose geometry/CRS at the page-1 boundary.
225+
226+
Regression: previously pages 2..N hard-coded ``geopd=False``, producing
227+
plain DataFrames. ``pd.concat`` then silently downgraded the result to
228+
a plain DataFrame.
229+
"""
230+
resp1 = mock.MagicMock()
231+
resp1.status_code = 200
232+
resp1.json.return_value = _stats_body(
233+
[_stats_feature_with_geometry("USGS-1", -89.0, 43.0)],
234+
next_token="abc",
235+
)
236+
237+
resp2 = mock.MagicMock()
238+
resp2.status_code = 200
239+
resp2.json.return_value = _stats_body(
240+
[_stats_feature_with_geometry("USGS-2", -90.0, 44.0)]
241+
)
242+
243+
client = mock.MagicMock(spec=requests.Session)
244+
client.send.return_value = resp1
245+
client.request.return_value = resp2
246+
247+
df, _ = get_stats_data(
248+
args={}, service="observationNormals", expand_percentiles=False, client=client
249+
)
250+
assert isinstance(df, gpd.GeoDataFrame)
251+
assert len(df) == 2
252+
assert df.geometry.notna().all()

0 commit comments

Comments
 (0)