Skip to content

Commit 353d379

Browse files
thodson-usgsclaude
andauthored
Make _handle_stats_nesting tolerant of missing drop columns (#257)
Both `.drop()` calls in `_handle_stats_nesting` (for the geopandas and pandas branches) hardcoded literal column names — `["type", "properties.data"]` and `["data"]`. If a stats response is ever returned in a slightly different shape (or one of those keys is renamed/removed), `drop()` raises `KeyError` and aborts the helper. The sibling `pd.json_normalize(...)` call later in the same function already passes `errors="ignore"`, so add the same to the two `drop()` calls for parity. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 4570192 commit 353d379

2 files changed

Lines changed: 35 additions & 2 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -892,11 +892,13 @@ def _handle_stats_nesting(
892892
# otherwise return a geodataframe
893893
if not geopd:
894894
df = pd.json_normalize(body["features"]).drop(
895-
columns=["type", "properties.data"]
895+
columns=["type", "properties.data"], errors="ignore"
896896
)
897897
df.columns = df.columns.str.split(".").str[-1]
898898
else:
899-
df = gpd.GeoDataFrame.from_features(body["features"]).drop(columns=["data"])
899+
df = gpd.GeoDataFrame.from_features(body["features"]).drop(
900+
columns=["data"], errors="ignore"
901+
)
900902

901903
# Unnest json features, properties, data, and values while retaining necessary
902904
# metadata to merge with main dataframe.

tests/waterdata_utils_test.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from dataretrieval.waterdata.utils import (
66
_get_args,
7+
_handle_stats_nesting,
78
_walk_pages,
89
)
910

@@ -80,3 +81,33 @@ def test_walk_pages_multiple_mocked():
8081
assert mock_client.send.called
8182
assert mock_client.request.called
8283
assert mock_client.request.call_args[0][1] == "https://example.com/page2"
84+
85+
86+
def test_handle_stats_nesting_tolerates_missing_drop_columns():
87+
"""If the upstream stats response shape ever changes such that one of
88+
the columns we try to drop ("type", "properties.data") is absent, the
89+
function should still return a DataFrame instead of raising KeyError.
90+
"""
91+
body = {
92+
"next": None,
93+
"features": [
94+
{
95+
"properties": {
96+
"monitoring_location_id": "USGS-12345",
97+
"data": [
98+
{
99+
"parameter_code": "00060",
100+
"unit_of_measure": "ft^3/s",
101+
"parent_time_series_id": "ts-1",
102+
"values": [{"statistic_id": "mean", "value": 10.0}],
103+
}
104+
],
105+
},
106+
}
107+
],
108+
}
109+
110+
df = _handle_stats_nesting(body, geopd=False)
111+
112+
assert len(df) == 1
113+
assert df["monitoring_location_id"].iloc[0] == "USGS-12345"

0 commit comments

Comments
 (0)