Skip to content

Commit 90c1dd2

Browse files
thodson-usgsclaude
andcommitted
Fix _format_api_dates: parse ISO 8601, anchor duration check
A single ISO 8601 datetime such as "2018-02-12T23:20:50Z" — the format the docstrings of every getter cite as a valid `time` example — was silently dropped to None. The function only tried "%Y-%m-%d %H:%M:%S" and "%Y-%m-%d", neither of which matches a string with a `T` separator and trailing `Z`. `_construct_api_requests` then set `params["time"] = None`, requests dropped the param, and the user's time filter was silently discarded — the API fell back to "most recent year". Replace the cascading try/except with a single helper that walks the supported formats in order: ISO 8601 with offset and/or fractional seconds, plain ISO 8601, the legacy space-separated form, and date-only. Tz-aware inputs convert directly to UTC; naive inputs continue to be interpreted in the runner's local zone for backwards compatibility. Also tighten the duration / interval pass-through. The previous `re.search(r"P", ..., re.IGNORECASE)` matched any string containing a `p` or `P` anywhere — words like "Apr" or "sample" would skip parsing entirely. Anchor the check to `^[Pp]\d`. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 51ac674 commit 90c1dd2

2 files changed

Lines changed: 112 additions & 43 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 59 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,34 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s
128128
return [p for p in properties if p not in ["geometry", service_id]]
129129

130130

131+
_DATETIME_FORMATS = (
132+
"%Y-%m-%dT%H:%M:%S.%f%z",
133+
"%Y-%m-%dT%H:%M:%S%z",
134+
"%Y-%m-%dT%H:%M:%S.%f",
135+
"%Y-%m-%dT%H:%M:%S",
136+
"%Y-%m-%d %H:%M:%S.%f",
137+
"%Y-%m-%d %H:%M:%S",
138+
"%Y-%m-%d",
139+
)
140+
141+
142+
def _parse_datetime(value: str) -> datetime | None:
143+
"""Parse a single datetime string against the supported formats.
144+
145+
Returns a ``datetime`` (tz-aware iff the input carried a UTC offset),
146+
or ``None`` if no format matched.
147+
"""
148+
# ``datetime.strptime`` accepts a numeric offset like ``+00:00`` but not
149+
# the ``Z`` shorthand, so normalize trailing ``Z`` first.
150+
candidate = value[:-1] + "+00:00" if value.endswith("Z") else value
151+
for fmt in _DATETIME_FORMATS:
152+
try:
153+
return datetime.strptime(candidate, fmt)
154+
except ValueError:
155+
continue
156+
return None
157+
158+
131159
def _format_api_dates(
132160
datetime_input: str | list[str], date: bool = False
133161
) -> str | None:
@@ -141,8 +169,8 @@ def _format_api_dates(
141169
----------
142170
datetime_input : Union[str, List[str]]
143171
A single date/datetime string or a list of one or two date/datetime
144-
strings. Accepts formats like "%Y-%m-%d %H:%M:%S", ISO 8601, or relative
145-
periods (e.g., "P7D").
172+
strings. Accepts formats like "%Y-%m-%d %H:%M:%S", ISO 8601 (with or
173+
without ``Z``/numeric offset), or relative periods (e.g., "P7D").
146174
date : bool, optional
147175
If True, uses only the date portion ("YYYY-MM-DD"). If False (default),
148176
returns full datetime in UTC ISO 8601 format ("YYYY-MM-DDTHH:MM:SSZ").
@@ -168,7 +196,9 @@ def _format_api_dates(
168196
- Supports relative period strings (e.g., "P7D") and passes them through
169197
unchanged.
170198
- Converts datetimes to UTC and formats as ISO 8601 with 'Z' suffix when
171-
`date` is False.
199+
`date` is False. Inputs with an explicit offset (``Z`` or ``+HH:MM``) are
200+
converted from that offset to UTC; naive inputs are interpreted in the
201+
local time zone for backwards compatibility.
172202
- For date ranges, replaces "nan" with ".." in the output.
173203
"""
174204
# Get timezone
@@ -182,48 +212,34 @@ def _format_api_dates(
182212
if all(pd.isna(dt) or dt == "" or dt is None for dt in datetime_input):
183213
return None
184214

185-
if len(datetime_input) <= 2:
186-
# If the list is of length 1, first look for things like "P7D" or dates
187-
# already formatted in ISO08601. Otherwise, try to coerce to datetime
188-
if len(datetime_input) == 1 and (
189-
re.search(r"P", datetime_input[0], re.IGNORECASE)
190-
or "/" in datetime_input[0]
191-
):
192-
return datetime_input[0]
193-
# Otherwise, use list comprehension to parse dates
194-
else:
195-
try:
196-
# Parse to naive datetime
197-
parsed_dates = [
198-
datetime.strptime(dt, "%Y-%m-%d %H:%M:%S") # noqa: DTZ007
199-
for dt in datetime_input
200-
]
201-
except ValueError:
202-
# Parse to date only
203-
try:
204-
parsed_dates = [
205-
datetime.strptime(dt, "%Y-%m-%d") # noqa: DTZ007
206-
for dt in datetime_input
207-
]
208-
except ValueError:
209-
return None
210-
# If the service only accepts dates for this input, not
211-
# datetimes (e.g. "daily"), return just the dates separated by a
212-
# "/", otherwise, return the datetime in UTC format.
213-
if date:
214-
return "/".join(dt.strftime("%Y-%m-%d") for dt in parsed_dates)
215-
else:
216-
parsed_locals = [
217-
dt.replace(tzinfo=local_timezone) for dt in parsed_dates
218-
]
219-
formatted = "/".join(
220-
dt.astimezone(ZoneInfo("UTC")).strftime("%Y-%m-%dT%H:%M:%SZ")
221-
for dt in parsed_locals
222-
)
223-
return formatted
224-
else:
215+
if len(datetime_input) > 2:
225216
raise ValueError("datetime_input should only include 1-2 values")
226217

218+
# Pass through duration ("P7D") and pre-formatted interval ("a/b") strings
219+
# untouched. Anchor the duration check so the bare letter ``P`` / ``p``
220+
# appearing inside a normal word doesn't accidentally bypass parsing.
221+
if len(datetime_input) == 1:
222+
single = datetime_input[0]
223+
if re.match(r"^[Pp]\d", single) or "/" in single:
224+
return single
225+
226+
parsed_dates = [_parse_datetime(dt) for dt in datetime_input]
227+
if any(dt is None for dt in parsed_dates):
228+
return None
229+
230+
if date:
231+
return "/".join(dt.strftime("%Y-%m-%d") for dt in parsed_dates)
232+
233+
# Localize naive datetimes to the runner's local zone before converting
234+
# to UTC; tz-aware datetimes are converted directly.
235+
utc_dates = [
236+
(dt if dt.tzinfo is not None else dt.replace(tzinfo=local_timezone)).astimezone(
237+
ZoneInfo("UTC")
238+
)
239+
for dt in parsed_dates
240+
]
241+
return "/".join(dt.strftime("%Y-%m-%dT%H:%M:%SZ") for dt in utc_dates)
242+
227243

228244
def _cql2_param(args: dict[str, Any]) -> str:
229245
"""

tests/waterdata_utils_test.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import requests
44

55
from dataretrieval.waterdata.utils import (
6+
_format_api_dates,
67
_get_args,
78
_walk_pages,
89
)
@@ -80,3 +81,55 @@ def test_walk_pages_multiple_mocked():
8081
assert mock_client.send.called
8182
assert mock_client.request.called
8283
assert mock_client.request.call_args[0][1] == "https://example.com/page2"
84+
85+
86+
# --- _format_api_dates -------------------------------------------------------
87+
88+
89+
def test_format_api_dates_iso8601_with_z():
90+
"""ISO 8601 datetimes with a 'Z' suffix must be parsed, not dropped to None."""
91+
assert _format_api_dates("2018-02-12T23:20:50Z") == "2018-02-12T23:20:50Z"
92+
93+
94+
def test_format_api_dates_iso8601_with_fractional_seconds():
95+
assert _format_api_dates("2018-02-12T23:20:50.123Z") == "2018-02-12T23:20:50Z"
96+
97+
98+
def test_format_api_dates_iso8601_with_offset():
99+
"""Numeric offsets must be converted to UTC."""
100+
assert _format_api_dates("2018-02-12T19:20:50-04:00") == "2018-02-12T23:20:50Z"
101+
102+
103+
def test_format_api_dates_iso8601_pair():
104+
"""A list of two ISO 8601 datetimes must be parsed into a UTC interval."""
105+
result = _format_api_dates(["2018-02-12T23:20:50Z", "2018-03-18T12:31:12Z"])
106+
assert result == "2018-02-12T23:20:50Z/2018-03-18T12:31:12Z"
107+
108+
109+
def test_format_api_dates_passthrough_interval():
110+
assert _format_api_dates("2018-02-12T00:00:00Z/..") == "2018-02-12T00:00:00Z/.."
111+
112+
113+
def test_format_api_dates_passthrough_duration():
114+
assert _format_api_dates("P7D") == "P7D"
115+
116+
117+
def test_format_api_dates_word_with_p_is_not_a_duration():
118+
"""Strings containing the letter 'p' must not be misclassified as durations."""
119+
assert _format_api_dates("Apr") is None
120+
121+
122+
def test_format_api_dates_date_only():
123+
assert _format_api_dates("2024-01-01", date=True) == "2024-01-01"
124+
125+
126+
def test_format_api_dates_date_only_pair():
127+
assert (
128+
_format_api_dates(["2024-01-01", "2024-02-01"], date=True)
129+
== "2024-01-01/2024-02-01"
130+
)
131+
132+
133+
def test_format_api_dates_space_separated_still_works():
134+
"""The legacy space-separated format must still parse."""
135+
assert _format_api_dates("2024-01-01 00:00:00", date=True) == "2024-01-01"

0 commit comments

Comments
 (0)