Skip to content

Commit 2df9743

Browse files
thodson-usgsclaude
andcommitted
Preserve list-like target inputs and avoid double-tz in test
Per copilot review on PR DOI-USGS#251: - _coerce_targets: detect non-DatetimeIndex iterables (Series, ndarray) via pd.api.types.is_scalar so the elements are preserved instead of being wrapped in a single-element list. Add a regression test passing a pd.Series of two timestamps and assert both are processed. - Tests: drop the redundant tz='UTC' on pd.Timestamp inputs that already carry a Z suffix; pandas 2.x raises on double timezone specification. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b0a8b5c commit 2df9743

2 files changed

Lines changed: 23 additions & 7 deletions

File tree

dataretrieval/waterdata/nearest.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,15 @@ def _coerce_targets(targets: Any) -> pd.DatetimeIndex:
183183
"""Accept anything ``pandas.to_datetime`` consumes, including a single value.
184184
185185
A bare scalar (string, ``Timestamp``, ``datetime``, …) becomes a
186-
one-element ``DatetimeIndex``; an iterable round-trips through
187-
``pd.to_datetime`` directly.
186+
one-element ``DatetimeIndex``; an iterable (list, ``Series``, ``ndarray``)
187+
is wrapped directly so its elements are preserved.
188188
"""
189189
parsed = pd.to_datetime(targets, utc=True)
190190
if isinstance(parsed, pd.DatetimeIndex):
191191
return parsed
192-
return pd.DatetimeIndex([parsed])
192+
if pd.api.types.is_scalar(parsed):
193+
return pd.DatetimeIndex([parsed])
194+
return pd.DatetimeIndex(parsed)
193195

194196

195197
def _check_nearest_kwargs(kwargs: dict[str, Any], on_tie: OnTie) -> None:

tests/waterdata_nearest_test.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -282,9 +282,7 @@ def test_accepts_single_string_target(patch_get_continuous):
282282
"2023-06-15T10:30:31Z", monitoring_location_id="USGS-02238500"
283283
)
284284
assert len(result) == 1
285-
assert result["target_time"].iloc[0] == pd.Timestamp(
286-
"2023-06-15T10:30:31Z", tz="UTC"
287-
)
285+
assert result["target_time"].iloc[0] == pd.Timestamp("2023-06-15T10:30:31Z")
288286

289287

290288
def test_accepts_single_timestamp_target(patch_get_continuous):
@@ -293,11 +291,27 @@ def test_accepts_single_timestamp_target(patch_get_continuous):
293291
_fake_df([{"time": "2023-06-15T10:30:00Z", "value": 22.4}]),
294292
mock.Mock(),
295293
)
296-
target = pd.Timestamp("2023-06-15T10:30:31Z", tz="UTC")
294+
target = pd.Timestamp("2023-06-15T10:30:31Z")
297295
result, _ = get_nearest_continuous(target, monitoring_location_id="USGS-02238500")
298296
assert len(result) == 1
299297

300298

299+
def test_accepts_pandas_series_targets(patch_get_continuous):
300+
"""A ``pd.Series`` of timestamps preserves all elements (not just the first)."""
301+
patch_get_continuous.return_value = (
302+
_fake_df(
303+
[
304+
{"time": "2023-06-15T10:30:00Z", "value": 22.4},
305+
{"time": "2023-06-16T10:30:00Z", "value": 22.5},
306+
]
307+
),
308+
mock.Mock(),
309+
)
310+
targets = pd.Series(["2023-06-15T10:30:31Z", "2023-06-16T10:30:31Z"])
311+
result, _ = get_nearest_continuous(targets, monitoring_location_id="USGS-02238500")
312+
assert len(result) == 2
313+
314+
301315
def test_missing_time_column_raises_helpful_error(patch_get_continuous):
302316
"""If the response has no 'time' column (e.g. user passed `properties`
303317
that excluded it), raise ValueError instead of crashing with KeyError.

0 commit comments

Comments
 (0)