Skip to content

Commit 4a02192

Browse files
nodohsclaude
authored andcommitted
fix(nwis): forward get_record state, fix major-filter validation, reject bad format
Several fixes in the deprecated `nwis` module, surfaced by the package review. - `get_record(state=…)` was accepted and documented but never forwarded. It now reaches the request as the NWIS `stateCd` major filter; previously it was silently dropped, producing a confusing "Bad Request" when used without `sites`. - The "must specify a major filter" validation was defeated: callers injected a filter key unconditionally (e.g. `kwargs["sites"] = kwargs.pop("sites", sites)` set the key to None when absent), so the membership-based guards in `query_waterservices`/`query_waterdata` always passed and a filterless request went out as a confusing "Bad Request" instead of the intended TypeError. The guards now require a filter that is present *and* non-None, rejecting an unset filter at the chokepoint — no per-getter pre-filtering needed. (`utils.query` already strips None-valued params, so this is purely a validation change.) - `get_dv`/`get_iv`/`get_discharge_peaks`/`get_stats` each parse a fixed response body but passed `format=` explicitly alongside `**kwargs`, so e.g. `get_dv(sites=…, format="rdb")` raised "multiple values for 'format'". A caller-supplied non-native `format` is now rejected with a clear ValueError via the shared `_reject_unexpected_format` helper (json for dv/iv, rdb for peaks/stats). - `get_ratings` is per-site; called without one it issued a request returning an unhelpful error page. It now fails fast with a clear TypeError (also covering `get_record(service="ratings")`). Adds regression tests for each. Full nwis suite passes; ruff and mypy --strict clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 4daf771 commit 4a02192

2 files changed

Lines changed: 162 additions & 4 deletions

File tree

dataretrieval/nwis.py

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,21 @@ def _parse_json_or_raise(response: httpx.Response) -> pd.DataFrame:
126126
raise
127127

128128

129+
def _reject_unexpected_format(
130+
kwargs: dict[str, Any], func_name: str, expected: str
131+
) -> None:
132+
"""Drop ``format`` from ``kwargs`` and reject any value other than ``expected``.
133+
134+
These getters always request a fixed ``format`` and parse that specific body,
135+
so a caller-supplied ``format`` would either collide with the explicit
136+
``format=`` argument or be silently overridden; reject it explicitly instead.
137+
"""
138+
if kwargs.pop("format", expected) != expected:
139+
raise ValueError(
140+
f"{func_name} returns {expected.upper()} and does not accept a `format`."
141+
)
142+
143+
129144
def format_response(
130145
df: pd.DataFrame, service: str | None = None, **kwargs: Any
131146
) -> pd.DataFrame:
@@ -277,6 +292,7 @@ def get_discharge_peaks(
277292
kwargs["end_date"] = kwargs.pop("end_date", end)
278293
kwargs["multi_index"] = multi_index
279294

295+
_reject_unexpected_format(kwargs, "get_discharge_peaks", "rdb")
280296
response = query_waterdata("peaks", format="rdb", ssl_check=ssl_check, **kwargs)
281297

282298
# Parse raw (read_rdb), not _read_rdb — the latter already runs
@@ -355,6 +371,8 @@ def get_stats(
355371
"""
356372
_check_sites_value_types(sites)
357373

374+
# get_stats parses an RDB body; reject a caller-supplied non-RDB `format`.
375+
_reject_unexpected_format(kwargs, "get_stats", "rdb")
358376
response = query_waterservices(
359377
service="stat", sites=sites, ssl_check=ssl_check, **kwargs
360378
)
@@ -392,11 +410,13 @@ def query_waterdata(
392410
"se_latitude_va",
393411
]
394412

395-
if not any(key in kwargs for key in major_params + bbox_params):
413+
# Present *and* non-None — see query_waterservices for why membership alone
414+
# would let an unset filter slip through.
415+
if not any(kwargs.get(key) is not None for key in major_params + bbox_params):
396416
raise TypeError("Query must specify a major filter: site_no, stateCd, bBox")
397417

398-
elif any(key in kwargs for key in bbox_params) and not all(
399-
key in kwargs for key in bbox_params
418+
elif any(kwargs.get(key) is not None for key in bbox_params) and not all(
419+
kwargs.get(key) is not None for key in bbox_params
400420
):
401421
raise TypeError("One or more lat/long coordinates missing or invalid.")
402422

@@ -453,8 +473,15 @@ def query_waterservices(
453473
The response object from the API request to the web service
454474
455475
"""
476+
# A major filter must be present *and* non-None. Membership alone is not
477+
# enough: callers may inject an unset filter (e.g. sites=None), and
478+
# utils.query() later strips None-valued params, so a plain `"sites" in
479+
# kwargs` would wave a filterless request through to a confusing "Bad
480+
# Request". Checking the value keeps that decision in one place instead of
481+
# forcing every getter to pre-filter its own kwargs.
456482
if not any(
457-
key in kwargs for key in ["sites", "stateCd", "bBox", "huc", "countyCd"]
483+
kwargs.get(key) is not None
484+
for key in ["sites", "stateCd", "bBox", "huc", "countyCd"]
458485
):
459486
raise TypeError(
460487
"Query must specify a major filter: sites, stateCd, bBox, huc, or countyCd"
@@ -537,6 +564,7 @@ def get_dv(
537564
kwargs["sites"] = kwargs.pop("sites", sites)
538565
kwargs["multi_index"] = multi_index
539566

567+
_reject_unexpected_format(kwargs, "get_dv", "json")
540568
response = query_waterservices("dv", format="json", ssl_check=ssl_check, **kwargs)
541569
df = _parse_json_or_raise(response)
542570

@@ -724,6 +752,7 @@ def get_iv(
724752
kwargs["sites"] = kwargs.pop("sites", sites)
725753
kwargs["multi_index"] = multi_index
726754

755+
_reject_unexpected_format(kwargs, "get_iv", "json")
727756
response = query_waterservices(
728757
service="iv", format="json", ssl_check=ssl_check, **kwargs
729758
)
@@ -797,6 +826,11 @@ def get_ratings(
797826
798827
"""
799828
site = kwargs.pop("site_no", site)
829+
# The ratings endpoint is per-site; without one it would issue a request
830+
# that returns an unhelpful error page. Fail fast with a clear message
831+
# (also covers get_record(service="ratings") called without a site).
832+
if site is None:
833+
raise TypeError("get_ratings requires a `site` (USGS site number).")
800834

801835
payload = {}
802836
url = WATERDATA_BASE_URL + "nwisweb/get_ratings/"
@@ -965,6 +999,12 @@ def get_record(
965999
if service not in WATERSERVICES_SERVICES + WATERDATA_SERVICES:
9661000
raise TypeError(f"Unrecognized service: {service}")
9671001

1002+
# Forward the documented `state` filter as the NWIS `stateCd` major filter;
1003+
# it was previously accepted but silently ignored, which produced a
1004+
# confusing "Bad Request" when used without `sites`.
1005+
if state is not None:
1006+
kwargs.setdefault("stateCd", state)
1007+
9681008
if service == "iv":
9691009
df, _ = get_iv(
9701010
sites=sites,

tests/nwis_test.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,14 @@
1212
NWIS_Metadata,
1313
_read_rdb,
1414
get_discharge_measurements,
15+
get_discharge_peaks,
16+
get_dv,
1517
get_gwlevels,
1618
get_iv,
1719
get_pmcodes,
1820
get_qwdata,
1921
get_record,
22+
get_stats,
2023
get_water_use,
2124
preformat_peaks_response,
2225
)
@@ -90,6 +93,121 @@ def test_preformat_peaks_response():
9093
assert df["datetime"].isna().sum() == 0
9194

9295

96+
# tests using real queries to USGS webservices
97+
# these specific queries represent some edge-cases and the tests to address
98+
# incomplete date-time information
99+
100+
101+
# Removed defunct gwlevels tests.
102+
103+
104+
def test_get_dv_requires_major_filter():
105+
"""Regression: get_dv() with no major filter must raise the documented
106+
TypeError. The getters injected ``kwargs["sites"] = kwargs.pop("sites",
107+
sites)``, which always set the key (None when absent) and so defeated
108+
query_waterservices' filter check, yielding a confusing Bad Request."""
109+
with warnings.catch_warnings():
110+
warnings.simplefilter("ignore")
111+
with pytest.raises(TypeError, match="major filter"):
112+
get_dv()
113+
114+
115+
def test_get_dv_rejects_non_json_format():
116+
"""Regression: get_dv passed format= explicitly alongside **kwargs, so
117+
get_dv(..., format="rdb") raised 'multiple values for format'. It now
118+
rejects a non-json format with a clear ValueError (it parses JSON)."""
119+
with warnings.catch_warnings():
120+
warnings.simplefilter("ignore")
121+
with pytest.raises(ValueError, match="JSON"):
122+
get_dv(sites="01646500", format="rdb")
123+
124+
125+
def test_get_record_forwards_state_as_statecd(monkeypatch):
126+
"""Regression: get_record's documented `state` arg was accepted but never
127+
forwarded. It now reaches the request as the NWIS `stateCd` major filter."""
128+
import dataretrieval.nwis as nwis_mod
129+
130+
captured: dict = {}
131+
132+
def fake_query_waterservices(service, format=None, ssl_check=True, **kw):
133+
captured.update(kw)
134+
raise RuntimeError("stop before network")
135+
136+
monkeypatch.setattr(nwis_mod, "query_waterservices", fake_query_waterservices)
137+
with warnings.catch_warnings():
138+
warnings.simplefilter("ignore")
139+
with pytest.raises(RuntimeError, match="stop"):
140+
get_record(state="OH", service="dv")
141+
assert captured.get("stateCd") == "OH"
142+
143+
144+
def test_get_stats_requires_major_filter():
145+
"""Regression: get_stats passed ``sites=sites`` explicitly, so the key was
146+
always present (None when absent) and defeated query_waterservices' filter
147+
check -- get_stats() reached the network and returned a confusing Bad
148+
Request instead of the documented TypeError."""
149+
with warnings.catch_warnings():
150+
warnings.simplefilter("ignore")
151+
with pytest.raises(TypeError, match="major filter"):
152+
get_stats()
153+
154+
155+
@pytest.mark.parametrize("service", ["stat", "site"])
156+
def test_get_record_requires_major_filter(service):
157+
"""Regression: get_record(service="stat"/"site") forwarded sites=None into
158+
get_stats/get_info, defeating the major-filter check. With no filter it must
159+
raise the documented TypeError rather than reach the network."""
160+
with warnings.catch_warnings():
161+
warnings.simplefilter("ignore")
162+
with pytest.raises(TypeError, match="major filter"):
163+
get_record(service=service)
164+
165+
166+
def test_get_discharge_peaks_rejects_format():
167+
"""Regression: get_discharge_peaks passed format="rdb" explicitly alongside
168+
**kwargs, so any caller-supplied format raised 'multiple values for format'.
169+
A non-native format is now rejected with a clear ValueError (it parses RDB)."""
170+
with warnings.catch_warnings():
171+
warnings.simplefilter("ignore")
172+
with pytest.raises(ValueError, match="RDB"):
173+
get_discharge_peaks(sites="01491000", format="json")
174+
175+
176+
def test_get_discharge_peaks_accepts_native_format(monkeypatch):
177+
"""The reported collision was that even the *native* format="rdb" raised
178+
'multiple values for format'. Popping it first resolves the collision, so
179+
format="rdb" now reaches the request rather than crashing."""
180+
import dataretrieval.nwis as nwis_mod
181+
182+
def fake_query_waterdata(service, format=None, ssl_check=True, **kw):
183+
raise RuntimeError("stop before network")
184+
185+
monkeypatch.setattr(nwis_mod, "query_waterdata", fake_query_waterdata)
186+
with warnings.catch_warnings():
187+
warnings.simplefilter("ignore")
188+
with pytest.raises(RuntimeError, match="stop"):
189+
get_discharge_peaks(sites="01491000", format="rdb")
190+
191+
192+
def test_get_stats_rejects_non_rdb_format():
193+
"""get_stats parses RDB; a caller-supplied non-RDB format would be requested
194+
and then mis-parsed. It is now rejected with a clear ValueError."""
195+
with warnings.catch_warnings():
196+
warnings.simplefilter("ignore")
197+
with pytest.raises(ValueError, match="RDB"):
198+
get_stats(sites="01646500", format="json")
199+
200+
201+
def test_get_ratings_requires_site():
202+
"""The ratings endpoint is per-site; get_ratings() / get_record(
203+
service="ratings") with no site previously issued a request that returned an
204+
unhelpful error page. It now fails fast with a clear TypeError."""
205+
with warnings.catch_warnings():
206+
warnings.simplefilter("ignore")
207+
with pytest.raises(TypeError, match="requires a `site`"):
208+
get_record(service="ratings")
209+
210+
93211
class TestDeprecationWarnings:
94212
"""Verify per-function DeprecationWarning fires with the right replacement.
95213

0 commit comments

Comments
 (0)