Skip to content

Commit 87caec1

Browse files
thodson-usgsclaude
andcommitted
Fix mangled column names for NWIS dam sublocation timeseries
NWIS methodDescription values like "STAGE - TAILWATER, [Tailwater]" produced column names with bracket qualifiers (e.g. "00065_stage - tailwater, [tailwater"). Strip the ", [...]" suffix before building the column name so get_iv returns clean names like "00065_stage - tailwater". Closes #177. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent c4d0f84 commit 87caec1

2 files changed

Lines changed: 65 additions & 1 deletion

File tree

dataretrieval/nwis.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -972,7 +972,9 @@ def _read_json(json):
972972
method = parameter["method"][0]["methodDescription"]
973973

974974
if method:
975-
method = method.strip("[]()").lower()
975+
# Strip NWIS sublocation qualifier before using the description as a
976+
# column suffix (e.g. "HEADWATER, [Headwater]" → "headwater").
977+
method = method.split(", [")[0].strip("[]() ").lower()
976978
col_name = f"{col_name}_{method}"
977979

978980
if option:

tests/nwis_test.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from dataretrieval.nwis import (
1111
NWIS_Metadata,
12+
_read_json,
1213
_read_rdb,
1314
get_discharge_measurements,
1415
get_gwlevels,
@@ -365,3 +366,64 @@ def test_all_comments_returns_empty_dataframe(self):
365366
df = _read_rdb(rdb)
366367
assert isinstance(df, pd.DataFrame)
367368
assert df.empty
369+
370+
371+
def _make_iv_json(site_no, param_cd, method_description, values):
372+
"""Build a minimal NWIS IV JSON structure for use in _read_json tests."""
373+
return {
374+
"value": {
375+
"timeSeries": [
376+
{
377+
"sourceInfo": {"siteCode": [{"value": site_no}]},
378+
"variable": {
379+
"variableCode": [{"value": param_cd}],
380+
"options": {"option": [{"value": None}]},
381+
},
382+
"values": [
383+
{
384+
"method": [{"methodDescription": method_description}],
385+
"value": [
386+
{
387+
"value": str(v),
388+
"dateTime": f"2023-01-0{i + 1}T00:00:00.000-05:00",
389+
"qualifiers": ["A"],
390+
}
391+
for i, v in enumerate(values)
392+
],
393+
}
394+
],
395+
}
396+
]
397+
}
398+
}
399+
400+
401+
class TestReadJsonColumnNames:
402+
"""Tests that _read_json produces clean column names.
403+
404+
Regression tests for GitHub Issue #177: column names were mangled when
405+
NWIS methodDescription contained a sublocation qualifier such as
406+
"STAGE - TAILWATER, [Tailwater]".
407+
"""
408+
409+
def test_simple_method_description(self):
410+
"""A plain methodDescription like 'HEADWATER' becomes a clean column name."""
411+
data = _make_iv_json("03399800", "00065", "HEADWATER", [13.0, 13.1])
412+
df = _read_json(data)
413+
assert "00065_headwater" in df.columns
414+
assert df.shape[0] == 2
415+
416+
def test_sublocation_bracket_stripped(self):
417+
"""Bracket qualifier in methodDescription is stripped from the column name."""
418+
data = _make_iv_json(
419+
"03399800", "00065", "STAGE - TAILWATER, [Tailwater]", [12.0, 12.1]
420+
)
421+
df = _read_json(data)
422+
assert "00065_stage - tailwater" in df.columns
423+
assert "00065_stage - tailwater, [tailwater" not in df.columns
424+
425+
def test_no_method_description(self):
426+
"""An empty methodDescription leaves the column name as just the param code."""
427+
data = _make_iv_json("01491000", "00060", "", [100.0, 101.0])
428+
df = _read_json(data)
429+
assert "00060" in df.columns

0 commit comments

Comments
 (0)