Skip to content

Commit ece2b54

Browse files
thodson-usgsclaude
andcommitted
test(nwis): mock the live NWIS tests; drop the flaky_api marker
The legacy NWIS getters hit waterservices.usgs.gov, so several tests made live calls and flaked CI on transient outages (the recurring nwis/site connect timeouts). NWIS is deprecated (removal ~2027), so rather than maintain live coverage, convert these tests to offline fixtures: - nwis_test.py: mock TestMetaData (site_info → what_sites → /site), TestSiteseriesCatalogOutput (/site, with a captured seriesCatalogOutput fixture vs. the basic one), and TestTZ (/site + /iv). Remove the pure live-smoke test_nwis_service_live (nothing to assert once offline). Drop the now-unneeded module-level flaky_api marker. - utils_test.py: mock Test_query — a 414 → URLTooLong via the nwis.get_iv getter path (DOI-USGS#64), and the User-Agent header check. Drop flaky_api. - Add tests/data/nwis_site_seriescatalog.txt (a small captured seriesCatalogOutput=True RDB response). The module is now fully offline (45 + 36 tests, ~1s, no network), preserving the DOI-USGS#34 / DOI-USGS#60 / DOI-USGS#73 regression intent without the flakes. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01Sjb14HkwuCydKSKMsaXsgd
1 parent 047a53e commit ece2b54

3 files changed

Lines changed: 134 additions & 150 deletions

File tree

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#
2+
#
3+
# US Geological Survey
4+
# retrieved: 2026-06-24 13:30:23 -04:00 (caas01)
5+
#
6+
# The Site File stores location and general information about groundwater,
7+
# surface water, and meteorological sites
8+
# for sites in USA.
9+
#
10+
# File-format description: http://help.waterdata.usgs.gov/faq/about-tab-delimited-output
11+
# Automated-retrieval info: http://waterservices.usgs.gov/rest/Site-Service.html
12+
#
13+
# Contact: gs-w_support_nwisweb@usgs.gov
14+
#
15+
# The following selected fields are included in this output:
16+
#
17+
# agency_cd -- Agency
18+
# site_no -- Site identification number
19+
# station_nm -- Site name
20+
# site_tp_cd -- Site type
21+
# dec_lat_va -- Decimal latitude
22+
# dec_long_va -- Decimal longitude
23+
# coord_acy_cd -- Latitude-longitude accuracy
24+
# dec_coord_datum_cd -- Decimal Latitude-longitude datum
25+
# alt_va -- Altitude of Gage/land surface
26+
# alt_acy_va -- Altitude accuracy
27+
# alt_datum_cd -- Altitude datum
28+
# huc_cd -- Hydrologic unit code
29+
# data_type_cd -- Data type
30+
# parm_cd -- Parameter code
31+
# stat_cd -- Statistical code
32+
# ts_id -- Internal timeseries ID
33+
# loc_web_ds -- Additional measurement description
34+
# medium_grp_cd -- Medium group code
35+
# parm_grp_cd -- Parameter group code
36+
# srs_id -- SRS ID
37+
# access_cd -- Access code
38+
# begin_date -- Begin date
39+
# end_date -- End date
40+
# count_nu -- Record count
41+
#
42+
agency_cd site_no station_nm site_tp_cd dec_lat_va dec_long_va coord_acy_cd dec_coord_datum_cd alt_va alt_acy_va alt_datum_cd huc_cd data_type_cd parm_cd stat_cd ts_id loc_web_ds medium_grp_cd parm_grp_cd srs_id access_cd begin_date end_date count_nu
43+
5s 15s 50s 7s 16s 16s 1s 10s 8s 3s 10s 16s 2s 5s 5s 5n 30s 3s 3s 5n 4n 20d 20d 5n
44+
USGS 01491000 CHOPTANK RIVER NEAR GREENSBORO, MD ST 38.99719444 -75.7858056 S NAD83 2.73 .1 NAVD88 02060005 ad 0 wat 0 0 2006 2025 20
45+
USGS 01491000 CHOPTANK RIVER NEAR GREENSBORO, MD ST 38.99719444 -75.7858056 S NAD83 2.73 .1 NAVD88 02060005 dv 00010 00001 68074 [Discontinued] wat 1645597 0 1988-10-01 2012-05-09 894
46+
USGS 01491000 CHOPTANK RIVER NEAR GREENSBORO, MD ST 38.99719444 -75.7858056 S NAD83 2.73 .1 NAVD88 02060005 dv 00010 00001 327630 wat 1645597 0 2023-04-21 2026-06-23 1155

tests/nwis_test.py

Lines changed: 76 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import pandas as pd
1010
import pytest
1111

12-
from dataretrieval.exceptions import DataRetrievalError
1312
from dataretrieval.nwis import (
1413
NWIS_Metadata,
1514
_read_rdb,
@@ -24,17 +23,16 @@
2423
preformat_peaks_response,
2524
what_sites,
2625
)
27-
from tests.conftest import flaky_api
2826

2927
START_DATE = "2018-01-24"
3028
END_DATE = "2018-01-25"
3129

3230
DATETIME_COL = "datetime"
3331
SITENO_COL = "site_no"
3432

35-
# Several tests in this module hit the live NWIS services, so retry a transient
36-
# upstream failure rather than failing CI (see ``conftest.flaky_api``).
37-
pytestmark = flaky_api
33+
# Legacy NWIS endpoints these tests mock — this module makes no live calls.
34+
_SITE_RE = re.compile(r"^https://waterservices\.usgs\.gov/nwis/site(\?.*)?$")
35+
_IV_RE = re.compile(r"^https://waterservices\.usgs\.gov/nwis/iv(\?.*)?$")
3836

3937

4038
def _load_mock_json(file_name):
@@ -44,6 +42,16 @@ def _load_mock_json(file_name):
4442
return json.load(f)
4543

4644

45+
def _load_fixture(file_name):
46+
"""Read a raw fixture file (e.g. an RDB response) from tests/data."""
47+
return (Path(__file__).parent / "data" / file_name).read_text(encoding="utf-8")
48+
49+
50+
def _mock_site(httpx_mock, fixture="waterservices_site.txt"):
51+
"""Mock the legacy NWIS ``site`` endpoint with an RDB fixture."""
52+
httpx_mock.add_response(method="GET", url=_SITE_RE, text=_load_fixture(fixture))
53+
54+
4755
def _test_iv_service(httpx_mock):
4856
"""Mocked test of instantaneous value service"""
4957
start = START_DATE
@@ -73,39 +81,6 @@ def test_iv_service_answer(httpx_mock):
7381
], f"iv service returned incorrect index: {df.index.names}"
7482

7583

76-
def test_nwis_service_live():
77-
"""Live sanity check of NWIS service, tolerant of transient NWIS outages."""
78-
site = "01491000"
79-
try:
80-
# Minimal query: just most recent record
81-
get_iv(sites=site)
82-
except (DataRetrievalError, ValueError) as e:
83-
# Catch known transient service failures: a typed DataRetrievalError
84-
# (e.g. ServiceUnavailable on a 5xx, a RuntimeError) or a legacy ValueError
85-
error_text = str(e)
86-
if any(
87-
err in error_text
88-
for err in [
89-
"500",
90-
"502",
91-
"503",
92-
"Service Unavailable",
93-
"Received HTML response instead of JSON",
94-
]
95-
):
96-
pytest.skip(
97-
f"Service is currently unavailable (transient NWIS outage): {e}"
98-
)
99-
raise
100-
except Exception as e:
101-
# Fallback for other potential transient network issues
102-
if "Expecting value" in str(e) or "JSON" in str(e):
103-
pytest.skip(
104-
f"Service returned invalid response (likely transient outage): {e}"
105-
)
106-
raise
107-
108-
10984
def test_preformat_peaks_response():
11085
# make a data frame with a "peak_dt" datetime column
11186
# it will have some nan and none values
@@ -119,14 +94,6 @@ def test_preformat_peaks_response():
11994
assert df["datetime"].isna().sum() == 0
12095

12196

122-
# tests using real queries to USGS webservices
123-
# these specific queries represent some edge-cases and the tests to address
124-
# incomplete date-time information
125-
126-
127-
# Removed defunct gwlevels tests.
128-
129-
13097
class TestDeprecationWarnings:
13198
"""Verify per-function DeprecationWarning fires with the right replacement.
13299
@@ -257,78 +224,80 @@ def test_get_record_defunct_service_water_use(self):
257224

258225

259226
class TestTZ:
260-
"""Tests relating to GitHub Issue #60."""
227+
"""Tests relating to GitHub Issue #60 — merging IV results across sites
228+
yields a proper datetime index. Mocked against fixture responses."""
261229

262-
@pytest.fixture(scope="class")
263-
def sites(self):
264-
# Fetch once per class, at test time (not at collection) so a transient
265-
# upstream failure is retried by the module ``flaky`` marker instead of
266-
# aborting collection — a class-body call cannot be reran.
267-
sites, _ = what_sites(stateCd="MD")
268-
return sites
230+
def _mock(self, httpx_mock):
231+
_mock_site(httpx_mock)
232+
httpx_mock.add_response(
233+
method="GET", url=_IV_RE, json=_load_mock_json("nwis_iv_mock.json")
234+
)
269235

270-
def test_multiple_tz_01(self, sites):
271-
"""Test based on GitHub Issue #60 - error merging different time zones."""
272-
# this test fails before issue #60 is fixed
236+
def test_multiple_tz_01(self, httpx_mock):
237+
"""Issue #60 - merging IV across sites yields a datetime index."""
238+
self._mock(httpx_mock)
239+
sites, _ = what_sites(stateCd="MD")
273240
iv, _ = get_iv(sites=sites.site_no.values[:25].tolist())
274-
# assert that the datetime column exists
275241
assert "datetime" in iv.index.names
276-
# assert that it is a datetime type
277242
assert isinstance(iv.index[0][1], datetime.datetime)
278243

279-
def test_multiple_tz_02(self, sites):
280-
"""Test based on GitHub Issue #60 - confirm behavior for same tz."""
281-
# this test passes before issue #60 is fixed
244+
def test_multiple_tz_02(self, httpx_mock):
245+
"""Issue #60 - the same-tz path also yields a datetime index."""
246+
self._mock(httpx_mock)
247+
sites, _ = what_sites(stateCd="MD")
282248
iv, _ = get_iv(sites=sites.site_no.values[:20].tolist())
283-
# assert that the datetime column exists
284249
assert "datetime" in iv.index.names
285-
# assert that it is a datetime type
286250
assert isinstance(iv.index[0][1], datetime.datetime)
287251

288252

289253
class TestSiteseriesCatalogOutput:
290-
"""Tests relating to GitHub Issue #34."""
254+
"""Tests relating to GitHub Issue #34 — ``seriesCatalogOutput`` adds the
255+
data-inventory columns (begin_date / end_date / count_nu). Mocked against
256+
fixture responses (the chosen fixture, not the request param, decides which
257+
columns come back)."""
291258

292-
def test_seriesCatalogOutput_get_record(self):
293-
"""Test setting seriesCatalogOutput to true with get_record."""
259+
_SERIESCATALOG = "nwis_site_seriescatalog.txt"
260+
261+
def test_seriesCatalogOutput_get_record(self, httpx_mock):
262+
"""seriesCatalogOutput=True with get_record exposes inventory columns."""
263+
_mock_site(httpx_mock, self._SERIESCATALOG)
294264
data = get_record(
295265
huc="20", parameterCd="00060", service="site", seriesCatalogOutput="True"
296266
)
297-
# assert that expected data columns are present
298267
assert "begin_date" in data.columns
299268
assert "end_date" in data.columns
300269
assert "count_nu" in data.columns
301270

302-
def test_seriesCatalogOutput_get_info(self):
303-
"""Test setting seriesCatalogOutput to true with get_info."""
271+
def test_seriesCatalogOutput_get_info(self, httpx_mock):
272+
"""seriesCatalogOutput=TRUE with get_info exposes inventory columns."""
273+
_mock_site(httpx_mock, self._SERIESCATALOG)
304274
data, _ = get_info(huc="20", parameterCd="00060", seriesCatalogOutput="TRUE")
305-
# assert that expected data columns are present
306275
assert "begin_date" in data.columns
307276
assert "end_date" in data.columns
308277
assert "count_nu" in data.columns
309278

310-
def test_seriesCatalogOutput_bool(self):
311-
"""Test setting seriesCatalogOutput with a boolean."""
279+
def test_seriesCatalogOutput_bool(self, httpx_mock):
280+
"""A boolean seriesCatalogOutput is accepted and exposes inventory cols."""
281+
_mock_site(httpx_mock, self._SERIESCATALOG)
312282
data, _ = get_info(huc="20", parameterCd="00060", seriesCatalogOutput=True)
313-
# assert that expected data columns are present
314283
assert "begin_date" in data.columns
315284
assert "end_date" in data.columns
316285
assert "count_nu" in data.columns
317286

318-
def test_expandedrdb_get_record(self):
319-
"""Test default expanded_rdb format with get_record."""
287+
def test_expandedrdb_get_record(self, httpx_mock):
288+
"""The default expanded-rdb format omits the inventory columns."""
289+
_mock_site(httpx_mock)
320290
data = get_record(
321291
huc="20", parameterCd="00060", service="site", seriesCatalogOutput="False"
322292
)
323-
# assert that seriesCatalogOutput columns are not present
324293
assert "begin_date" not in data.columns
325294
assert "end_date" not in data.columns
326295
assert "count_nu" not in data.columns
327296

328-
def test_expandedrdb_get_info(self):
329-
"""Test default expanded_rdb format with get_info."""
297+
def test_expandedrdb_get_info(self, httpx_mock):
298+
"""get_info default omits the inventory columns."""
299+
_mock_site(httpx_mock)
330300
data, _ = get_info(huc="20", parameterCd="00060")
331-
# assert that seriesCatalogOutput columns are not present
332301
assert "begin_date" not in data.columns
333302
assert "end_date" not in data.columns
334303
assert "count_nu" not in data.columns
@@ -353,67 +322,46 @@ def test_empty_timeseries(httpx_mock):
353322

354323

355324
class TestMetaData:
356-
"""Tests of NWIS metadata setting,
357-
358-
Notes
359-
-----
325+
"""Tests of NWIS metadata setting (originally GitHub Issue #73).
360326
361-
- Originally based on GitHub Issue #73.
362-
- Modified to expose site_info as a property, not a callable.
327+
``site_info`` is a property that lazily re-queries ``what_sites``; mocked
328+
here against the ``site`` endpoint so it is exercised offline.
363329
"""
364330

365-
def test_set_metadata_info_site(self):
366-
"""Test metadata info is set when site parameter is supplied."""
367-
# mock the query response
368-
response = mock.MagicMock()
369-
# make metadata call
370-
md = NWIS_Metadata(response, sites="01491000")
371-
# assert that site_info is implemented
331+
def test_set_metadata_info_site(self, httpx_mock):
332+
"""site_info is populated when ``sites`` is supplied."""
333+
_mock_site(httpx_mock)
334+
md = NWIS_Metadata(mock.MagicMock(), sites="01491000")
372335
assert md.site_info
373336

374-
def test_set_metadata_info_site_no(self):
375-
"""Test metadata info is set when site_no parameter is supplied."""
376-
# mock the query response
377-
response = mock.MagicMock()
378-
# make metadata call
379-
md = NWIS_Metadata(response, site_no="01491000")
380-
# assert that site_info is implemented
337+
def test_set_metadata_info_site_no(self, httpx_mock):
338+
"""site_info is populated when ``site_no`` is supplied."""
339+
_mock_site(httpx_mock)
340+
md = NWIS_Metadata(mock.MagicMock(), site_no="01491000")
381341
assert md.site_info
382342

383-
def test_set_metadata_info_stateCd(self):
384-
"""Test metadata info is set when stateCd parameter is supplied."""
385-
# mock the query response
386-
response = mock.MagicMock()
387-
# make metadata call
388-
md = NWIS_Metadata(response, stateCd="RI")
389-
# assert that site_info is implemented
343+
def test_set_metadata_info_stateCd(self, httpx_mock):
344+
"""site_info is populated when ``stateCd`` is supplied."""
345+
_mock_site(httpx_mock)
346+
md = NWIS_Metadata(mock.MagicMock(), stateCd="RI")
390347
assert md.site_info
391348

392-
def test_set_metadata_info_huc(self):
393-
"""Test metadata info is set when huc parameter is supplied."""
394-
# mock the query response
395-
response = mock.MagicMock()
396-
# make metadata call
397-
md = NWIS_Metadata(response, huc="01")
398-
# assert that site_info is implemented
349+
def test_set_metadata_info_huc(self, httpx_mock):
350+
"""site_info is populated when ``huc`` is supplied."""
351+
_mock_site(httpx_mock)
352+
md = NWIS_Metadata(mock.MagicMock(), huc="01")
399353
assert md.site_info
400354

401-
def test_set_metadata_info_bbox(self):
402-
"""Test metadata info is set when bbox parameter is supplied."""
403-
# mock the query response
404-
response = mock.MagicMock()
405-
# make metadata call
406-
md = NWIS_Metadata(response, bBox="-92.8,44.2,-88.9,46.0")
407-
# assert that site_info is implemented
355+
def test_set_metadata_info_bbox(self, httpx_mock):
356+
"""site_info is populated when ``bBox`` is supplied."""
357+
_mock_site(httpx_mock)
358+
md = NWIS_Metadata(mock.MagicMock(), bBox="-92.8,44.2,-88.9,46.0")
408359
assert md.site_info
409360

410-
def test_set_metadata_info_countyCd(self):
411-
"""Test metadata info is set when countyCd parameter is supplied."""
412-
# mock the query response
413-
response = mock.MagicMock()
414-
# make metadata call
415-
md = NWIS_Metadata(response, countyCd="01001")
416-
# assert that site_info is implemented
361+
def test_set_metadata_info_countyCd(self, httpx_mock):
362+
"""site_info is populated when ``countyCd`` is supplied."""
363+
_mock_site(httpx_mock)
364+
md = NWIS_Metadata(mock.MagicMock(), countyCd="01001")
417365
assert md.site_info
418366

419367

0 commit comments

Comments
 (0)