Skip to content

Commit 8aff522

Browse files
thodson-usgsclaude
andcommitted
Use ISO 8601 duration as the window default
Switch the default from "00:07:30" to "PT7M30S" so the user-visible contract points at an actual international standard (ISO 8601 duration) rather than a pandas-specific colon form. ``pandas.Timedelta`` still accepts all the other forms users may already have typed — ISO 8601, HH:MM:SS, shorthand ("7min30s", "450s"), or a ``pd.Timedelta`` directly — and a parametrized test now exercises each shape to lock in the "whatever ``pd.Timedelta`` takes" contract. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 019e93d commit 8aff522

2 files changed

Lines changed: 40 additions & 28 deletions

File tree

dataretrieval/waterdata/api.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def get_nearest_continuous(
445445
monitoring_location_id: str | list[str] | None = None,
446446
parameter_code: str | list[str] | None = None,
447447
*,
448-
window: str | pd.Timedelta = "00:07:30",
448+
window: str | pd.Timedelta = "PT7M30S",
449449
on_tie: Literal["first", "last", "mean"] = "first",
450450
**kwargs,
451451
) -> tuple[pd.DataFrame, BaseMetadata]:
@@ -472,14 +472,21 @@ def get_nearest_continuous(
472472
Forwarded to ``get_continuous``.
473473
parameter_code : string or list of strings, optional
474474
Forwarded to ``get_continuous``.
475-
window : string or ``pandas.Timedelta``, default ``"00:07:30"``
476-
Half-window around each target, in ``HH:MM:SS`` form (or any
477-
``pandas.Timedelta``-parseable string: ``"7min30s"``,
478-
``"450s"``, etc.). Must be small enough that every target's
479-
window captures roughly one observation at the service cadence.
480-
The ``"00:07:30"`` default matches a 15-minute continuous gauge;
481-
use a larger value (e.g. ``"00:15:00"``) when the gauge cadence
482-
is longer or you need more resilience to data gaps.
475+
window : string or ``pandas.Timedelta``, default ``"PT7M30S"``
476+
Half-window around each target, as an ISO 8601 duration
477+
(``"PT7M30S"``, ``"PT15M"``, ``"PT1H"``, etc.). Also accepts
478+
any other form ``pandas.Timedelta`` parses — ``HH:MM:SS``
479+
(``"00:07:30"``), pandas shorthand (``"7min30s"``,
480+
``"450s"``), or a ``pd.Timedelta`` directly. See the
481+
`pandas.Timedelta docs
482+
<https://pandas.pydata.org/docs/reference/api/pandas.Timedelta.html>`_
483+
for the full grammar.
484+
485+
Must be small enough that every target's window captures
486+
roughly one observation at the service cadence. The default
487+
matches a 15-minute continuous gauge; widen (e.g.
488+
``"PT15M"``) for irregular cadences or resilience to data
489+
gaps.
483490
on_tie : {"first", "last", "mean"}, default ``"first"``
484491
How to resolve ties when two observations are exactly equidistant
485492
from a target (which happens when the target falls at the midpoint
@@ -549,7 +556,7 @@ def get_nearest_continuous(
549556
... targets,
550557
... monitoring_location_id="USGS-02238500",
551558
... parameter_code="00060",
552-
... window="00:30:00",
559+
... window="PT30M",
553560
... on_tie="mean",
554561
... )
555562
"""

tests/waterdata_nearest_test.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def test_builds_one_or_clause_per_target(patch_get_continuous):
5858
targets,
5959
monitoring_location_id="USGS-02238500",
6060
parameter_code="00060",
61-
window="00:07:30",
61+
window="PT7M30S",
6262
)
6363
_, kwargs = patch_get_continuous.call_args
6464
filter_expr = kwargs["filter"]
@@ -89,7 +89,7 @@ def test_tie_first_keeps_earlier(patch_get_continuous):
8989
targets,
9090
monitoring_location_id="USGS-02238500",
9191
on_tie="first",
92-
window="00:07:30",
92+
window="PT7M30S",
9393
)
9494
assert len(result) == 1
9595
assert result.iloc[0]["value"] == 22.0
@@ -111,7 +111,7 @@ def test_tie_last_keeps_later(patch_get_continuous):
111111
targets,
112112
monitoring_location_id="USGS-02238500",
113113
on_tie="last",
114-
window="00:07:30",
114+
window="PT7M30S",
115115
)
116116
assert result.iloc[0]["value"] == 22.4
117117
assert result.iloc[0]["time"] == pd.Timestamp("2023-06-15T10:30:00Z")
@@ -132,7 +132,7 @@ def test_tie_mean_averages_numeric_and_uses_target_time(patch_get_continuous):
132132
targets,
133133
monitoring_location_id="USGS-02238500",
134134
on_tie="mean",
135-
window="00:07:30",
135+
window="PT7M30S",
136136
)
137137
assert result.iloc[0]["value"] == pytest.approx(22.2)
138138
# Time is set to the target since no real observation sits at the midpoint
@@ -232,23 +232,28 @@ def test_accepts_list_of_strings(patch_get_continuous):
232232
assert len(result) == 1
233233

234234

235-
def test_window_accepts_hhmmss_and_shorthand_equivalently(patch_get_continuous):
236-
"""``window="00:07:30"`` and ``window="7min30s"`` are the same duration
237-
as far as ``pandas.Timedelta`` is concerned, so the two forms must
238-
produce identical CQL filters."""
235+
@pytest.mark.parametrize(
236+
"window",
237+
[
238+
"00:07:30", # HH:MM:SS
239+
"7min30s", # pandas shorthand
240+
"450s", # seconds shorthand
241+
"PT7M30S", # ISO 8601 duration
242+
pd.Timedelta(minutes=7, seconds=30), # Timedelta object
243+
],
244+
)
245+
def test_window_accepts_any_pandas_timedelta_form(patch_get_continuous, window):
246+
"""Every representation ``pandas.Timedelta`` parses must produce the
247+
same CQL filter. Documents the public contract: ``window`` is
248+
whatever ``pd.Timedelta(window)`` returns."""
239249
targets = pd.to_datetime(["2023-06-15T10:30:00Z"], utc=True)
240250
patch_get_continuous.return_value = (_fake_df([]), mock.Mock())
241251

242-
get_nearest_continuous(targets, monitoring_location_id="USGS-1", window="00:07:30")
243-
filter_hhmmss = patch_get_continuous.call_args.kwargs["filter"]
244-
245-
get_nearest_continuous(targets, monitoring_location_id="USGS-1", window="7min30s")
246-
filter_shorthand = patch_get_continuous.call_args.kwargs["filter"]
247-
248-
assert filter_hhmmss == filter_shorthand
249-
# And the bounds should be 7:30 away from the target
250-
assert "'2023-06-15T10:22:30Z'" in filter_hhmmss
251-
assert "'2023-06-15T10:37:30Z'" in filter_hhmmss
252+
get_nearest_continuous(targets, monitoring_location_id="USGS-1", window=window)
253+
filter_expr = patch_get_continuous.call_args.kwargs["filter"]
254+
# Bounds are 7:30 away from the target regardless of input spelling
255+
assert "'2023-06-15T10:22:30Z'" in filter_expr
256+
assert "'2023-06-15T10:37:30Z'" in filter_expr
252257

253258

254259
def test_forwards_kwargs_to_get_continuous(patch_get_continuous):

0 commit comments

Comments
 (0)