Skip to content

Commit 9bde9e1

Browse files
thodson-usgsclaude
andcommitted
Accept ISO 8601 PT durations and open-ended range NA endpoints
Per copilot review on PR DOI-USGS#247: - Broaden the duration passthrough regex from `^[Pp]\\d` to `^[Pp]T?\\d` so time-only durations like `PT36H` (documented in get_continuous and get_daily as valid `time`/`last_modified` values) are preserved instead of being parsed as datetimes and silently dropped. - Per-element NA handling: a `None` / `NaN` / empty endpoint in a 2-value range now becomes `..` in the output, matching the docstring contract and supporting half-bounded intervals like `[date, None]` and `[None, date]`. Previously, a non-string element would raise from `.endswith` inside `_parse_datetime`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 90c1dd2 commit 9bde9e1

2 files changed

Lines changed: 38 additions & 20 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -215,30 +215,37 @@ def _format_api_dates(
215215
if len(datetime_input) > 2:
216216
raise ValueError("datetime_input should only include 1-2 values")
217217

218-
# Pass through duration ("P7D") and pre-formatted interval ("a/b") strings
219-
# untouched. Anchor the duration check so the bare letter ``P`` / ``p``
220-
# appearing inside a normal word doesn't accidentally bypass parsing.
221-
if len(datetime_input) == 1:
218+
# Pass through duration ("P7D", "PT36H") and pre-formatted interval ("a/b")
219+
# strings untouched. Anchor the duration check so the bare letter ``P``
220+
# appearing inside a normal word doesn't bypass parsing; allow the optional
221+
# ``T`` so time-only durations like ``PT36H`` are recognized.
222+
if len(datetime_input) == 1 and isinstance(datetime_input[0], str):
222223
single = datetime_input[0]
223-
if re.match(r"^[Pp]\d", single) or "/" in single:
224+
if re.match(r"^[Pp]T?\d", single) or "/" in single:
224225
return single
225226

226-
parsed_dates = [_parse_datetime(dt) for dt in datetime_input]
227-
if any(dt is None for dt in parsed_dates):
227+
# Per-element: NA endpoints become ".." in the output for half-bounded
228+
# ranges; otherwise parse. If any non-NA element fails to parse, return
229+
# None overall.
230+
def _format_one(dt) -> str | None:
231+
if pd.isna(dt) or dt == "" or dt is None:
232+
return ".."
233+
parsed = _parse_datetime(dt)
234+
if parsed is None:
235+
return None
236+
if date:
237+
return parsed.strftime("%Y-%m-%d")
238+
utc = (
239+
parsed
240+
if parsed.tzinfo is not None
241+
else parsed.replace(tzinfo=local_timezone)
242+
).astimezone(ZoneInfo("UTC"))
243+
return utc.strftime("%Y-%m-%dT%H:%M:%SZ")
244+
245+
formatted = [_format_one(dt) for dt in datetime_input]
246+
if any(f is None for f in formatted):
228247
return None
229-
230-
if date:
231-
return "/".join(dt.strftime("%Y-%m-%d") for dt in parsed_dates)
232-
233-
# Localize naive datetimes to the runner's local zone before converting
234-
# to UTC; tz-aware datetimes are converted directly.
235-
utc_dates = [
236-
(dt if dt.tzinfo is not None else dt.replace(tzinfo=local_timezone)).astimezone(
237-
ZoneInfo("UTC")
238-
)
239-
for dt in parsed_dates
240-
]
241-
return "/".join(dt.strftime("%Y-%m-%dT%H:%M:%SZ") for dt in utc_dates)
248+
return "/".join(formatted)
242249

243250

244251
def _cql2_param(args: dict[str, Any]) -> str:

tests/waterdata_utils_test.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,11 @@ def test_format_api_dates_passthrough_duration():
114114
assert _format_api_dates("P7D") == "P7D"
115115

116116

117+
def test_format_api_dates_passthrough_time_only_duration():
118+
"""ISO 8601 time-only durations (PT...) are passed through unchanged."""
119+
assert _format_api_dates("PT36H") == "PT36H"
120+
121+
117122
def test_format_api_dates_word_with_p_is_not_a_duration():
118123
"""Strings containing the letter 'p' must not be misclassified as durations."""
119124
assert _format_api_dates("Apr") is None
@@ -133,3 +138,9 @@ def test_format_api_dates_date_only_pair():
133138
def test_format_api_dates_space_separated_still_works():
134139
"""The legacy space-separated format must still parse."""
135140
assert _format_api_dates("2024-01-01 00:00:00", date=True) == "2024-01-01"
141+
142+
143+
def test_format_api_dates_open_ended_range_with_none():
144+
"""A None / NaN endpoint becomes '..' in the output range."""
145+
assert _format_api_dates(["2024-01-01", None], date=True) == "2024-01-01/.."
146+
assert _format_api_dates([None, "2024-01-01"], date=True) == "../2024-01-01"

0 commit comments

Comments
 (0)