Skip to content

Commit da6ac46

Browse files
thodson-usgsclaude
andcommitted
Fix get_nearest_continuous: accept scalar targets and missing time column
The docstring says ``targets`` accepts "anything ``pandas.to_datetime`` consumes", which includes a bare string or ``pd.Timestamp``. But ``pd.to_datetime("2024-01-01T00:00:00Z", utc=True)`` returns a scalar ``Timestamp``, and ``pd.DatetimeIndex(scalar)`` raises ``TypeError`` — so single-value cases crashed despite the documented contract. Wrap a scalar result in a one-element ``DatetimeIndex`` so any ``pandas.to_datetime``-consumable input works. Also: when the user passes ``properties`` that excludes ``time``, the helper used to crash with ``KeyError`` deep inside ``df.assign``. Detect the missing column up front and raise a ``ValueError`` pointing at the likely cause. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 51ac674 commit da6ac46

2 files changed

Lines changed: 70 additions & 1 deletion

File tree

dataretrieval/waterdata/nearest.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def get_nearest_continuous(
137137
... )
138138
"""
139139
_check_nearest_kwargs(kwargs, on_tie)
140-
targets = pd.DatetimeIndex(pd.to_datetime(targets, utc=True))
140+
targets = _coerce_targets(targets)
141141
window_td = pd.Timedelta(window)
142142

143143
if len(targets) == 0:
@@ -151,6 +151,11 @@ def get_nearest_continuous(
151151
filter_lang="cql-text",
152152
**kwargs,
153153
)
154+
if "time" not in df.columns:
155+
raise ValueError(
156+
"get_nearest_continuous requires a 'time' column in the response; "
157+
"if a `properties` kwarg was passed, include 'time' in it"
158+
)
154159
if df.empty:
155160
return _empty_nearest_result(df), md
156161

@@ -172,6 +177,19 @@ def get_nearest_continuous(
172177
return pd.DataFrame(selected).reset_index(drop=True), md
173178

174179

180+
def _coerce_targets(targets) -> pd.DatetimeIndex:
181+
"""Accept anything ``pandas.to_datetime`` consumes, including a single value.
182+
183+
A bare scalar (string, ``Timestamp``, ``datetime``, …) becomes a
184+
one-element ``DatetimeIndex``; an iterable round-trips through
185+
``pd.to_datetime`` directly.
186+
"""
187+
parsed = pd.to_datetime(targets, utc=True)
188+
if isinstance(parsed, pd.DatetimeIndex):
189+
return parsed
190+
return pd.DatetimeIndex([parsed])
191+
192+
175193
def _check_nearest_kwargs(kwargs: dict, on_tie: OnTie) -> None:
176194
"""Reject kwargs the helper owns; validate ``on_tie``."""
177195
for forbidden in ("time", "filter", "filter_lang"):

tests/waterdata_nearest_test.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,54 @@ def test_forwards_kwargs_to_get_continuous(patch_get_continuous):
265265
_, kwargs = patch_get_continuous.call_args
266266
assert kwargs["statistic_id"] == "00011"
267267
assert kwargs["approval_status"] == "Approved"
268+
269+
270+
def test_accepts_single_string_target(patch_get_continuous):
271+
"""A bare scalar target must round-trip through pd.to_datetime.
272+
273+
Regression: previously `pd.DatetimeIndex(pd.to_datetime("...", utc=True))`
274+
raised TypeError because pd.to_datetime returns a scalar Timestamp for a
275+
single-string input.
276+
"""
277+
patch_get_continuous.return_value = (
278+
_fake_df([{"time": "2023-06-15T10:30:00Z", "value": 22.4}]),
279+
mock.Mock(),
280+
)
281+
result, _ = get_nearest_continuous(
282+
"2023-06-15T10:30:31Z", monitoring_location_id="USGS-02238500"
283+
)
284+
assert len(result) == 1
285+
assert result["target_time"].iloc[0] == pd.Timestamp(
286+
"2023-06-15T10:30:31Z", tz="UTC"
287+
)
288+
289+
290+
def test_accepts_single_timestamp_target(patch_get_continuous):
291+
"""A single ``pd.Timestamp`` target also round-trips."""
292+
patch_get_continuous.return_value = (
293+
_fake_df([{"time": "2023-06-15T10:30:00Z", "value": 22.4}]),
294+
mock.Mock(),
295+
)
296+
target = pd.Timestamp("2023-06-15T10:30:31Z", tz="UTC")
297+
result, _ = get_nearest_continuous(target, monitoring_location_id="USGS-02238500")
298+
assert len(result) == 1
299+
300+
301+
def test_missing_time_column_raises_helpful_error(patch_get_continuous):
302+
"""If the response has no 'time' column (e.g. user passed `properties`
303+
that excluded it), raise ValueError instead of crashing with KeyError.
304+
"""
305+
df_no_time = pd.DataFrame(
306+
{
307+
"value": [22.4],
308+
"monitoring_location_id": ["USGS-02238500"],
309+
}
310+
)
311+
patch_get_continuous.return_value = (df_no_time, mock.Mock())
312+
313+
with pytest.raises(ValueError, match="'time' column"):
314+
get_nearest_continuous(
315+
["2023-06-15T10:30:31Z"],
316+
monitoring_location_id="USGS-02238500",
317+
properties=["value", "monitoring_location_id"],
318+
)

0 commit comments

Comments
 (0)