Skip to content

Commit 38e0483

Browse files
thodson-usgsclaude
andcommitted
Tidy _check_monitoring_location_id (clearer dispatch, helper, type hints)
Internal refactor; behavior unchanged. - Add full type hints: ``str | Iterable[str] | None`` in, ``str | list[str] | None`` out, mirroring the runtime contract. - Split into explicit fast paths instead of wrapping-then-unwrapping: the string case validates and returns directly, eliminating the throwaway ``[monitoring_location_id]`` list and the final ``isinstance(str)`` re-check at return. - Invert the Mapping/Iterable check: reject ``Mapping`` (and non-iterables) up front, then ``list()`` the iterable. Reads more linearly than the compound ``isinstance(Iterable) and not isinstance(Mapping)``. - Extract ``_check_id_format(value)`` for the regex/ValueError pair so the format rule lives in one place and the call sites are one-liners. Tests unchanged; 16 validator tests + full suite still pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent fa8387e commit 38e0483

1 file changed

Lines changed: 29 additions & 21 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,29 +1156,32 @@ def _check_profiles(
11561156
_MONITORING_LOCATION_ID_RE = re.compile(r"^.+-.+$")
11571157

11581158

1159-
def _check_monitoring_location_id(monitoring_location_id):
1159+
def _check_monitoring_location_id(
1160+
monitoring_location_id: str | Iterable[str] | None,
1161+
) -> str | list[str] | None:
11601162
"""Validate and normalize a ``monitoring_location_id`` value.
11611163
11621164
Parameters
11631165
----------
11641166
monitoring_location_id : None, str, or iterable of str
1165-
Accepts ``None``, a single string, or any non-string iterable of
1166-
strings (``list``, ``tuple``, ``pandas.Series``, ``pandas.Index``,
1167-
``numpy.ndarray``, ...). Iterables are materialized to a ``list``
1168-
so downstream code that branches on ``isinstance(v, list)`` keeps
1169-
working.
1167+
``None``, a single AGENCY-ID string, or any non-string,
1168+
non-``Mapping`` iterable of such strings (``list``, ``tuple``,
1169+
``pandas.Series``, ``pandas.Index``, ``numpy.ndarray``, ...).
1170+
``Mapping`` types are rejected because iterating a mapping yields
1171+
keys, which would be a footgun.
11701172
11711173
Returns
11721174
-------
11731175
None, str, or list of str
1174-
``None`` and ``str`` inputs are returned unchanged; non-string
1175-
iterables are returned as a ``list``.
1176+
``None`` and ``str`` are returned unchanged; iterables are
1177+
materialized to a ``list`` so downstream code that branches on
1178+
``isinstance(v, list)`` keeps working.
11761179
11771180
Raises
11781181
------
11791182
TypeError
1180-
If the input is not ``None``, a string, or an iterable, or if any
1181-
iterable element is not a string.
1183+
If the input isn't ``None``, ``str``, or a non-``Mapping``
1184+
iterable; or if any iterable element isn't a string.
11821185
ValueError
11831186
If any identifier doesn't contain a hyphen separator
11841187
(per the OGC API spec: AGENCY-ID format, e.g. ``USGS-01646500``).
@@ -1187,32 +1190,37 @@ def _check_monitoring_location_id(monitoring_location_id):
11871190
return None
11881191

11891192
if isinstance(monitoring_location_id, str):
1190-
ids = [monitoring_location_id]
1191-
elif isinstance(monitoring_location_id, Iterable) and not isinstance(
1192-
monitoring_location_id, Mapping
1193+
_check_id_format(monitoring_location_id)
1194+
return monitoring_location_id
1195+
1196+
if isinstance(monitoring_location_id, Mapping) or not isinstance(
1197+
monitoring_location_id, Iterable
11931198
):
1194-
ids = list(monitoring_location_id)
1195-
else:
11961199
raise TypeError(
11971200
f"monitoring_location_id must be a string or iterable of strings, "
11981201
f"not {type(monitoring_location_id).__name__}. "
11991202
f"Expected format: 'AGENCY-ID', e.g., 'USGS-{monitoring_location_id}'."
12001203
)
12011204

1205+
ids = list(monitoring_location_id)
12021206
for id_ in ids:
12031207
if not isinstance(id_, str):
12041208
raise TypeError(
12051209
f"monitoring_location_id elements must be strings, "
12061210
f"not {type(id_).__name__}. "
12071211
f"Expected format: 'AGENCY-ID', e.g., 'USGS-{id_}'."
12081212
)
1209-
if not _MONITORING_LOCATION_ID_RE.match(id_):
1210-
raise ValueError(
1211-
f"Invalid monitoring_location_id: {id_!r}. "
1212-
f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'."
1213-
)
1213+
_check_id_format(id_)
1214+
return ids
12141215

1215-
return monitoring_location_id if isinstance(monitoring_location_id, str) else ids
1216+
1217+
def _check_id_format(value: str) -> None:
1218+
"""Raise ``ValueError`` if ``value`` is not in ``AGENCY-ID`` format."""
1219+
if not _MONITORING_LOCATION_ID_RE.match(value):
1220+
raise ValueError(
1221+
f"Invalid monitoring_location_id: {value!r}. "
1222+
f"Expected 'AGENCY-ID' format, e.g., 'USGS-01646500'."
1223+
)
12161224

12171225

12181226
def _get_args(

0 commit comments

Comments
 (0)