Skip to content

Commit 7c32bea

Browse files
thodson-usgsclaude
andcommitted
Extract _DATE_RANGE_PARAMS; trim docstrings/comments from /simplify
Three small follow-ups to the centralization: 1. Extract `_DATE_RANGE_PARAMS = frozenset({"datetime", "last_modified", "begin", "end", "time"})` once at module level. `_construct_api_requests` previously defined the same set twice (`single_params` for POST/GET routing, `time_periods` for the `_format_api_dates` call); the new `_NO_NORMALIZE_PARAMS` overlapped on the same five names. All three now reuse `_DATE_RANGE_PARAMS`. A future date param means one edit, not three. `_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {"monitoring_location_id"}`. 2. Trim `_normalize_str_iterable` docstring from ~37 lines to ~20: drop the over-narration of callers and downstream branching; keep the contract (accepted shapes, return shape, raises). 3. Tighten the `_NO_NORMALIZE_PARAMS` comment to one short paragraph (was 11 lines) and inline the four-branch pass-through cascade in `_get_args` into a single boolean `if ... or ... or ... or ...:` so the per-branch noise comments drop away. Behavior unchanged. 26 normalizer/validator + 22 waterdata_utils tests pass; full suite 267 passed + 2 skipped + 4 deselected (flaky live-API 502s); ruff lint + format clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 0fd5730 commit 7c32bea

1 file changed

Lines changed: 31 additions & 60 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 31 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,13 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s
144144
# admits time-only forms like ``PT36H``.
145145
_DURATION_RE = re.compile(r"^[Pp]T?\d")
146146

147+
# OGC API parameters that carry a date/datetime value (single string,
148+
# two-element range, or interval/duration string) rather than a multi-value
149+
# string list. Used by ``_construct_api_requests`` to keep them out of the
150+
# POST/CQL2 multi-value path and to route them through ``_format_api_dates``,
151+
# and by ``_NO_NORMALIZE_PARAMS`` to bypass string-iterable normalization.
152+
_DATE_RANGE_PARAMS = frozenset({"datetime", "last_modified", "begin", "end", "time"})
153+
147154

148155
def _parse_datetime(value: str) -> datetime | None:
149156
"""Parse a single datetime string against the supported formats.
@@ -434,14 +441,11 @@ def _construct_api_requests(
434441
"""
435442
service_url = f"{OGC_API_URL}/collections/{service}/items"
436443

437-
# Single parameters can only have one value
438-
single_params = {"datetime", "last_modified", "begin", "end", "time"}
439-
440444
# Identify which parameters should be included in the POST content body
441445
post_params = {
442446
k: v
443447
for k, v in kwargs.items()
444-
if k not in single_params and isinstance(v, (list, tuple)) and len(v) > 1
448+
if k not in _DATE_RANGE_PARAMS and isinstance(v, (list, tuple)) and len(v) > 1
445449
}
446450

447451
# Everything else goes into the params dictionary for the URL
@@ -457,8 +461,7 @@ def _construct_api_requests(
457461
POST = bool(post_params)
458462

459463
# Convert dates to ISO08601 format
460-
time_periods = {"last_modified", "datetime", "time", "begin", "end"}
461-
for i in time_periods:
464+
for i in _DATE_RANGE_PARAMS:
462465
if i in params:
463466
dates = service == "daily" and i != "last_modified"
464467
params[i] = _format_api_dates(params[i], date=dates)
@@ -1176,63 +1179,35 @@ def _check_profiles(
11761179
_MONITORING_LOCATION_ID_RE = re.compile(r"[^-\s]+-[^-\s]+")
11771180

11781181

1179-
# Parameter names skipped by ``_get_args``'s string-iterable normalization.
1180-
# Scalar non-string knobs (``limit``, ``ssl_check``, …) and ``list[float]``
1181-
# params (``bbox``, ``boundingBox``) are detected by *runtime type* and pass
1182-
# through automatically. The names below need explicit listing because their
1183-
# values *are* string-iterables but have separate handling downstream:
1184-
#
1185-
# * ``monitoring_location_id`` — validated by
1186-
# ``_check_monitoring_location_id`` at the public-function entry.
1187-
# * Date-range params (``time``, ``last_modified``, ``begin``, ``end``,
1188-
# ``datetime``) — support ``pd.NaT``/``None`` half-bounded endpoints and
1189-
# interval/duration strings; parsing happens in ``_format_api_dates``.
1190-
_NO_NORMALIZE_PARAMS = frozenset(
1191-
{
1192-
"monitoring_location_id",
1193-
"time",
1194-
"last_modified",
1195-
"begin",
1196-
"end",
1197-
"datetime",
1198-
}
1199-
)
1182+
# Param names that ``_get_args`` must NOT push through ``_normalize_str_iterable``.
1183+
# Scalar non-string knobs and ``list[float]`` params are detected by runtime
1184+
# type; only string-iterable-shaped params with special handling need to be
1185+
# named here: ``monitoring_location_id`` (validated separately) and the date-
1186+
# range params (which may contain ``pd.NaT``/None or interval strings).
1187+
_NO_NORMALIZE_PARAMS = _DATE_RANGE_PARAMS | {"monitoring_location_id"}
12001188

12011189

12021190
def _normalize_str_iterable(
12031191
value: str | Iterable[str] | None,
12041192
param_name: str = "value",
12051193
) -> str | list[str] | None:
1206-
"""Validate and normalize a parameter that accepts a string or iterable of strings.
1207-
1208-
Called from ``_get_args`` for every multi-value string parameter on
1209-
every waterdata getter that uses ``_get_args`` (every OGC/Samples
1210-
function in ``dataretrieval/waterdata/api.py``). Accepts ``list``,
1211-
``tuple``, ``pandas.Series``, ``pandas.Index``, ``numpy.ndarray``,
1212-
generators — anything iterable whose elements are strings. The
1213-
downstream ``_construct_api_requests`` branches on ``isinstance(v,
1214-
(list, tuple))``, so iterables are materialized to a ``list`` here.
1215-
``Mapping`` types are rejected because iterating a mapping yields
1216-
keys, which would be a footgun.
1217-
1218-
Date-range params (``time``, ``last_modified``, ``begin``, ``end``,
1219-
``datetime``, ...) deliberately bypass this helper via
1220-
``_NO_NORMALIZE_PARAMS``; their single-string-or-two-element-range
1221-
semantics (including ``pd.NaT``/``None`` half-bounded endpoints) are
1222-
handled by ``_format_api_dates`` inside ``_construct_api_requests``.
1194+
"""Validate that ``value`` is None, a string, or an iterable of strings.
1195+
1196+
Non-string iterables (``list``, ``tuple``, ``pandas.Series``,
1197+
``pandas.Index``, ``numpy.ndarray``, generators) are materialized to a
1198+
``list`` so downstream code that branches on ``isinstance(v, (list,
1199+
tuple))`` keeps working. ``Mapping`` types are rejected because
1200+
iterating a mapping yields keys, not values.
12231201
12241202
Parameters
12251203
----------
12261204
value : None, str, or iterable of str
12271205
param_name : str, optional
1228-
Name of the parameter, used in error messages. Defaults to
1229-
``"value"``.
1206+
Used in error messages. Defaults to ``"value"``.
12301207
12311208
Returns
12321209
-------
12331210
None, str, or list of str
1234-
``None`` and ``str`` are returned unchanged; non-string iterables
1235-
are returned as a ``list``.
12361211
12371212
Raises
12381213
------
@@ -1346,17 +1321,13 @@ def _get_args(
13461321
for k, v in local_vars.items():
13471322
if k in to_exclude or v is None:
13481323
continue
1349-
if k in _NO_NORMALIZE_PARAMS or isinstance(v, str):
1350-
args[k] = v
1351-
continue
1352-
if not isinstance(v, Iterable):
1353-
# Scalar non-string knob (bool / int / float) — pass through.
1324+
if (
1325+
k in _NO_NORMALIZE_PARAMS
1326+
or isinstance(v, str)
1327+
or not isinstance(v, Iterable)
1328+
or (isinstance(v, (list, tuple)) and v and not isinstance(v[0], str))
1329+
):
13541330
args[k] = v
1355-
continue
1356-
if isinstance(v, (list, tuple)) and v and not isinstance(v[0], str):
1357-
# list[float] / list[int] (e.g. bbox) — pass through.
1358-
args[k] = v
1359-
continue
1360-
# String-iterable: validate elements and materialize to list.
1361-
args[k] = _normalize_str_iterable(v, k)
1331+
else:
1332+
args[k] = _normalize_str_iterable(v, k)
13621333
return args

0 commit comments

Comments
 (0)