Skip to content

Commit 2ec0fef

Browse files
thodson-usgsclaude
andcommitted
fix(nwis): extend major-filter & format guards to get_stats, get_info, peaks, ratings
Same package-review bug class as the rest of this PR, in the remaining deprecated nwis getters: - get_stats injected `sites=sites` unconditionally, defeating query_waterservices' major-filter check, so get_stats() (and get_record(service="stat")) hit the network and returned a confusing "Bad Request" instead of the documented TypeError. Inject only when given. - get_record(service="site") forwarded sites=None into get_info, the only getter not guarding its own injection; drop an unset `sites` there too. - get_discharge_peaks passed format="rdb" explicitly alongside **kwargs, so any caller-supplied `format` raised "multiple values for 'format'". Generalize _reject_non_json_format -> _reject_unexpected_format and apply it (json for dv/iv, rdb for peaks and stats). - get_stats now rejects a non-RDB `format` instead of requesting it and mis-parsing the response. - get_ratings is per-site; with no site it issued a request that returned an unhelpful error page. Fail fast with a clear TypeError (also covers get_record(service="ratings")). Adds 7 regression tests; all fail on the prior commit. Full nwis suite (57) passes; ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 0ccd573 commit 2ec0fef

2 files changed

Lines changed: 102 additions & 12 deletions

File tree

dataretrieval/nwis.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -130,15 +130,17 @@ def _parse_json_or_raise(response: httpx.Response) -> pd.DataFrame:
130130
raise
131131

132132

133-
def _reject_non_json_format(kwargs: dict, func_name: str) -> None:
134-
"""Drop ``format`` from ``kwargs`` and reject any non-JSON value.
133+
def _reject_unexpected_format(kwargs: dict, func_name: str, expected: str) -> None:
134+
"""Drop ``format`` from ``kwargs`` and reject any value other than ``expected``.
135135
136-
These getters always request ``format="json"`` and parse the JSON body, so
137-
a caller-supplied ``format`` (e.g. ``"rdb"``) would be silently overridden;
138-
reject it explicitly instead.
136+
These getters always request a fixed ``format`` and parse that specific body,
137+
so a caller-supplied ``format`` would either collide with the explicit
138+
``format=`` argument or be silently overridden; reject it explicitly instead.
139139
"""
140-
if kwargs.pop("format", "json") != "json":
141-
raise ValueError(f"{func_name} returns JSON and does not accept a `format`.")
140+
if kwargs.pop("format", expected) != expected:
141+
raise ValueError(
142+
f"{func_name} returns {expected.upper()} and does not accept a `format`."
143+
)
142144

143145

144146
def format_response(
@@ -293,6 +295,7 @@ def get_discharge_peaks(
293295
kwargs["end_date"] = kwargs.pop("end_date", end)
294296
kwargs["multi_index"] = multi_index
295297

298+
_reject_unexpected_format(kwargs, "get_discharge_peaks", "rdb")
296299
response = query_waterdata("peaks", format="rdb", ssl_check=ssl_check, **kwargs)
297300

298301
df = _read_rdb(response.text)
@@ -368,9 +371,16 @@ def get_stats(
368371
"""
369372
_check_sites_value_types(sites)
370373

371-
response = query_waterservices(
372-
service="stat", sites=sites, ssl_check=ssl_check, **kwargs
373-
)
374+
# Inject `sites` only when provided; unconditionally setting it (even to
375+
# None) would defeat query_waterservices' major-filter check, yielding a
376+
# confusing "Bad Request" instead of the intended TypeError.
377+
if sites is not None:
378+
kwargs.setdefault("sites", sites)
379+
380+
# get_stats parses an RDB body; reject a caller-supplied non-RDB `format`
381+
# rather than request it and then mis-parse the response.
382+
_reject_unexpected_format(kwargs, "get_stats", "rdb")
383+
response = query_waterservices(service="stat", ssl_check=ssl_check, **kwargs)
374384

375385
return _read_rdb(response.text), NWIS_Metadata(response, **kwargs)
376386

@@ -550,7 +560,7 @@ def get_dv(
550560
kwargs.setdefault("sites", sites)
551561
kwargs["multi_index"] = multi_index
552562

553-
_reject_non_json_format(kwargs, "get_dv")
563+
_reject_unexpected_format(kwargs, "get_dv", "json")
554564
response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
555565
df = _parse_json_or_raise(response)
556566

@@ -668,6 +678,12 @@ def get_info(ssl_check: bool = True, **kwargs) -> tuple[pd.DataFrame, BaseMetada
668678
# cannot have both seriesCatalogOutput and the expanded format
669679
kwargs["siteOutput"] = "Expanded"
670680

681+
# get_record(service="site") forwards sites=None; drop it so an unset filter
682+
# doesn't defeat query_waterservices' major-filter check (other getters
683+
# already guard their own injection).
684+
if kwargs.get("sites") is None:
685+
kwargs.pop("sites", None)
686+
671687
response = query_waterservices("site", ssl_check=ssl_check, **kwargs)
672688

673689
return _read_rdb(response.text), NWIS_Metadata(response, **kwargs)
@@ -737,7 +753,7 @@ def get_iv(
737753
kwargs.setdefault("sites", sites)
738754
kwargs["multi_index"] = multi_index
739755

740-
_reject_non_json_format(kwargs, "get_iv")
756+
_reject_unexpected_format(kwargs, "get_iv", "json")
741757
response = query_waterservices(
742758
service="iv", format="json", ssl_check=ssl_check, **kwargs
743759
)
@@ -802,6 +818,11 @@ def get_ratings(
802818
803819
"""
804820
site = kwargs.pop("site_no", site)
821+
# The ratings endpoint is per-site; without one it would issue a request
822+
# that returns an unhelpful error page. Fail fast with a clear message
823+
# (also covers get_record(service="ratings") called without a site).
824+
if site is None:
825+
raise TypeError("get_ratings requires a `site` (USGS site number).")
805826

806827
payload = {}
807828
url = WATERDATA_BASE_URL + "nwisweb/get_ratings/"

tests/nwis_test.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,15 @@
1313
NWIS_Metadata,
1414
_read_rdb,
1515
get_discharge_measurements,
16+
get_discharge_peaks,
1617
get_dv,
1718
get_gwlevels,
1819
get_info,
1920
get_iv,
2021
get_pmcodes,
2122
get_qwdata,
2223
get_record,
24+
get_stats,
2325
get_water_use,
2426
preformat_peaks_response,
2527
what_sites,
@@ -161,6 +163,73 @@ def fake_query_waterservices(service, format=None, ssl_check=True, **kw):
161163
assert captured.get("stateCd") == "OH"
162164

163165

166+
def test_get_stats_requires_major_filter():
167+
"""Regression: get_stats passed ``sites=sites`` explicitly, so the key was
168+
always present (None when absent) and defeated query_waterservices' filter
169+
check -- get_stats() reached the network and returned a confusing Bad
170+
Request instead of the documented TypeError."""
171+
with warnings.catch_warnings():
172+
warnings.simplefilter("ignore")
173+
with pytest.raises(TypeError, match="major filter"):
174+
get_stats()
175+
176+
177+
@pytest.mark.parametrize("service", ["stat", "site"])
178+
def test_get_record_requires_major_filter(service):
179+
"""Regression: get_record(service="stat"/"site") forwarded sites=None into
180+
get_stats/get_info, defeating the major-filter check. With no filter it must
181+
raise the documented TypeError rather than reach the network."""
182+
with warnings.catch_warnings():
183+
warnings.simplefilter("ignore")
184+
with pytest.raises(TypeError, match="major filter"):
185+
get_record(service=service)
186+
187+
188+
def test_get_discharge_peaks_rejects_format():
189+
"""Regression: get_discharge_peaks passed format="rdb" explicitly alongside
190+
**kwargs, so any caller-supplied format raised 'multiple values for format'.
191+
A non-native format is now rejected with a clear ValueError (it parses RDB)."""
192+
with warnings.catch_warnings():
193+
warnings.simplefilter("ignore")
194+
with pytest.raises(ValueError, match="RDB"):
195+
get_discharge_peaks(sites="01491000", format="json")
196+
197+
198+
def test_get_discharge_peaks_accepts_native_format(monkeypatch):
199+
"""The reported collision was that even the *native* format="rdb" raised
200+
'multiple values for format'. Popping it first resolves the collision, so
201+
format="rdb" now reaches the request rather than crashing."""
202+
import dataretrieval.nwis as nwis_mod
203+
204+
def fake_query_waterdata(service, format=None, ssl_check=True, **kw):
205+
raise RuntimeError("stop before network")
206+
207+
monkeypatch.setattr(nwis_mod, "query_waterdata", fake_query_waterdata)
208+
with warnings.catch_warnings():
209+
warnings.simplefilter("ignore")
210+
with pytest.raises(RuntimeError, match="stop"):
211+
get_discharge_peaks(sites="01491000", format="rdb")
212+
213+
214+
def test_get_stats_rejects_non_rdb_format():
215+
"""get_stats parses RDB; a caller-supplied non-RDB format would be requested
216+
and then mis-parsed. It is now rejected with a clear ValueError."""
217+
with warnings.catch_warnings():
218+
warnings.simplefilter("ignore")
219+
with pytest.raises(ValueError, match="RDB"):
220+
get_stats(sites="01646500", format="json")
221+
222+
223+
def test_get_ratings_requires_site():
224+
"""The ratings endpoint is per-site; get_ratings() / get_record(
225+
service="ratings") with no site previously issued a request that returned an
226+
unhelpful error page. It now fails fast with a clear TypeError."""
227+
with warnings.catch_warnings():
228+
warnings.simplefilter("ignore")
229+
with pytest.raises(TypeError, match="requires a `site`"):
230+
get_record(service="ratings")
231+
232+
164233
class TestDeprecationWarnings:
165234
"""Verify per-function DeprecationWarning fires with the right replacement.
166235

0 commit comments

Comments
 (0)