Skip to content

Commit 81d97cf

Browse files
thodson-usgsclaude
andcommitted
Extract _DATE_RANGE_PARAMS; trim docstrings/comments from /simplify
Three small follow-ups to the centralization: 1. Extract `_DATE_RANGE_PARAMS = frozenset({"datetime", "last_modified", "begin", "end", "time"})` once at module level. `_construct_api_requests` previously defined the same set twice (`single_params` for POST/GET routing, `time_periods` for the `_format_api_dates` call); the new `_NO_NORMALIZE_PARAMS` overlapped on the same five names. All three now reuse `_DATE_RANGE_PARAMS`. A future date param means one edit, not three. `_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {"monitoring_location_id"}`. 2. Trim `_normalize_str_iterable` docstring from ~37 lines to ~20: drop the over-narration of callers and downstream branching; keep the contract (accepted shapes, return shape, raises). 3. Tighten the `_NO_NORMALIZE_PARAMS` comment to one short paragraph (was 11 lines) and inline the four-branch pass-through cascade in `_get_args` into a single boolean `if ... or ... or ... or ...:` so the per-branch noise comments drop away. Behavior unchanged. 26 normalizer/validator + 22 waterdata_utils tests pass; full suite 267 passed + 2 skipped + 4 deselected (flaky live-API 502s); ruff lint + format clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 3b70023 commit 81d97cf

1 file changed

Lines changed: 31 additions & 60 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 31 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,13 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s
144144
# admits time-only forms like ``PT36H``.
145145
_DURATION_RE = re.compile(r"^[Pp]T?\d")
146146

147+
# OGC API parameters that carry a date/datetime value (single string,
148+
# two-element range, or interval/duration string) rather than a multi-value
149+
# string list. Used by ``_construct_api_requests`` to keep them out of the
150+
# POST/CQL2 multi-value path and to route them through ``_format_api_dates``,
151+
# and by ``_NO_NORMALIZE_PARAMS`` to bypass string-iterable normalization.
152+
_DATE_RANGE_PARAMS = frozenset({"datetime", "last_modified", "begin", "end", "time"})
153+
147154

148155
def _parse_datetime(value: str) -> datetime | None:
149156
"""Parse a single datetime string against the supported formats.
@@ -418,14 +425,11 @@ def _construct_api_requests(
418425
"""
419426
service_url = f"{OGC_API_URL}/collections/{service}/items"
420427

421-
# Single parameters can only have one value
422-
single_params = {"datetime", "last_modified", "begin", "end", "time"}
423-
424428
# Identify which parameters should be included in the POST content body
425429
post_params = {
426430
k: v
427431
for k, v in kwargs.items()
428-
if k not in single_params and isinstance(v, (list, tuple)) and len(v) > 1
432+
if k not in _DATE_RANGE_PARAMS and isinstance(v, (list, tuple)) and len(v) > 1
429433
}
430434

431435
# Everything else goes into the params dictionary for the URL
@@ -441,8 +445,7 @@ def _construct_api_requests(
441445
POST = bool(post_params)
442446

443447
# Convert dates to ISO08601 format
444-
time_periods = {"last_modified", "datetime", "time", "begin", "end"}
445-
for i in time_periods:
448+
for i in _DATE_RANGE_PARAMS:
446449
if i in params:
447450
dates = service == "daily" and i != "last_modified"
448451
params[i] = _format_api_dates(params[i], date=dates)
@@ -1160,63 +1163,35 @@ def _check_profiles(
11601163
_MONITORING_LOCATION_ID_RE = re.compile(r"[^-\s]+-[^-\s]+")
11611164

11621165

1163-
# Parameter names skipped by ``_get_args``'s string-iterable normalization.
1164-
# Scalar non-string knobs (``limit``, ``ssl_check``, …) and ``list[float]``
1165-
# params (``bbox``, ``boundingBox``) are detected by *runtime type* and pass
1166-
# through automatically. The names below need explicit listing because their
1167-
# values *are* string-iterables but have separate handling downstream:
1168-
#
1169-
# * ``monitoring_location_id`` — validated by
1170-
# ``_check_monitoring_location_id`` at the public-function entry.
1171-
# * Date-range params (``time``, ``last_modified``, ``begin``, ``end``,
1172-
# ``datetime``) — support ``pd.NaT``/``None`` half-bounded endpoints and
1173-
# interval/duration strings; parsing happens in ``_format_api_dates``.
1174-
_NO_NORMALIZE_PARAMS = frozenset(
1175-
{
1176-
"monitoring_location_id",
1177-
"time",
1178-
"last_modified",
1179-
"begin",
1180-
"end",
1181-
"datetime",
1182-
}
1183-
)
1166+
# Param names that ``_get_args`` must NOT push through ``_normalize_str_iterable``.
1167+
# Scalar non-string knobs and ``list[float]`` params are detected by runtime
1168+
# type; only string-iterable-shaped params with special handling need to be
1169+
# named here: ``monitoring_location_id`` (validated separately) and the date-
1170+
# range params (which may contain ``pd.NaT``/None or interval strings).
1171+
_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {"monitoring_location_id"}
11841172

11851173

11861174
def _normalize_str_iterable(
11871175
value: str | Iterable[str] | None,
11881176
param_name: str = "value",
11891177
) -> str | list[str] | None:
1190-
"""Validate and normalize a parameter that accepts a string or iterable of strings.
1191-
1192-
Called from ``_get_args`` for every multi-value string parameter on
1193-
every waterdata getter that uses ``_get_args`` (every OGC/Samples
1194-
function in ``dataretrieval/waterdata/api.py``). Accepts ``list``,
1195-
``tuple``, ``pandas.Series``, ``pandas.Index``, ``numpy.ndarray``,
1196-
generators — anything iterable whose elements are strings. The
1197-
downstream ``_construct_api_requests`` branches on ``isinstance(v,
1198-
(list, tuple))``, so iterables are materialized to a ``list`` here.
1199-
``Mapping`` types are rejected because iterating a mapping yields
1200-
keys, which would be a footgun.
1201-
1202-
Date-range params (``time``, ``last_modified``, ``begin``, ``end``,
1203-
``datetime``, ...) deliberately bypass this helper via
1204-
``_NO_NORMALIZE_PARAMS``; their single-string-or-two-element-range
1205-
semantics (including ``pd.NaT``/``None`` half-bounded endpoints) are
1206-
handled by ``_format_api_dates`` inside ``_construct_api_requests``.
1178+
"""Validate that ``value`` is None, a string, or an iterable of strings.
1179+
1180+
Non-string iterables (``list``, ``tuple``, ``pandas.Series``,
1181+
``pandas.Index``, ``numpy.ndarray``, generators) are materialized to a
1182+
``list`` so downstream code that branches on ``isinstance(v, (list,
1183+
tuple))`` keeps working. ``Mapping`` types are rejected because
1184+
iterating a mapping yields keys, not values.
12071185
12081186
Parameters
12091187
----------
12101188
value : None, str, or iterable of str
12111189
param_name : str, optional
1212-
Name of the parameter, used in error messages. Defaults to
1213-
``"value"``.
1190+
Used in error messages. Defaults to ``"value"``.
12141191
12151192
Returns
12161193
-------
12171194
None, str, or list of str
1218-
``None`` and ``str`` are returned unchanged; non-string iterables
1219-
are returned as a ``list``.
12201195
12211196
Raises
12221197
------
@@ -1330,17 +1305,13 @@ def _get_args(
13301305
for k, v in local_vars.items():
13311306
if k in to_exclude or v is None:
13321307
continue
1333-
if k in _NO_NORMALIZE_PARAMS or isinstance(v, str):
1334-
args[k] = v
1335-
continue
1336-
if not isinstance(v, Iterable):
1337-
# Scalar non-string knob (bool / int / float) — pass through.
1308+
if (
1309+
k in _NO_NORMALIZE_PARAMS
1310+
or isinstance(v, str)
1311+
or not isinstance(v, Iterable)
1312+
or (isinstance(v, (list, tuple)) and v and not isinstance(v[0], str))
1313+
):
13381314
args[k] = v
1339-
continue
1340-
if isinstance(v, (list, tuple)) and v and not isinstance(v[0], str):
1341-
# list[float] / list[int] (e.g. bbox) — pass through.
1342-
args[k] = v
1343-
continue
1344-
# String-iterable: validate elements and materialize to list.
1345-
args[k] = _normalize_str_iterable(v, k)
1315+
else:
1316+
args[k] = _normalize_str_iterable(v, k)
13461317
return args

0 commit comments

Comments
 (0)