Skip to content

Commit 46100eb

Browse files
thodson-usgsclaude
andcommitted
feat(waterdata): enable arbitrary queryables as passthrough filters
The OGC data getters (`get_daily`, `get_continuous`, `get_peaks`, ...) exposed ~11 of each collection's ~50 queryables as named params; the rest — mostly the shared monitoring-location attributes (`state_name`, `county_code`, `site_type`, `altitude`, ...) now filterable on the data endpoints — were reachable only via the raw `filter` CQL. Accept any queryable as a passthrough kwarg: each OGC getter gains `**queryables`, and the shared `_get_args` flattens it so an extra filter such as `state_name="Wisconsin"` is normalized and sent exactly like a named param. The service itself validates names (an unknown one returns HTTP 400 → typed error), so no client-side queryable list is bundled. The passthrough is provisional (see the PR description for the trade-off vs. explicit per-property keyword arguments). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01Sjb14HkwuCydKSKMsaXsgd
1 parent e2c0032 commit 46100eb

3 files changed

Lines changed: 161 additions & 0 deletions

File tree

dataretrieval/waterdata/api.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ def get_daily(
7575
filter: str | None = None,
7676
filter_lang: FILTER_LANG | None = None,
7777
convert_type: bool = True,
78+
**queryables: Any,
7879
) -> tuple[pd.DataFrame, BaseMetadata]:
7980
"""Daily data provide one data value to represent water conditions for the
8081
day.
@@ -207,6 +208,13 @@ def get_daily(
207208
and the lexicographic-comparison pitfall.
208209
convert_type : boolean, optional
209210
If True, converts columns to appropriate types.
211+
**queryables : string or iterable of strings, optional
212+
Any other queryable property of this collection, passed through as a
213+
server-side filter (e.g. ``state_name="Wisconsin"``,
214+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
215+
queryable properties; an unknown name is rejected by the service with a
216+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
217+
may be superseded by explicit per-property keyword arguments.
210218
211219
Returns
212220
-------
@@ -296,6 +304,7 @@ def get_continuous(
296304
filter: str | None = None,
297305
filter_lang: FILTER_LANG | None = None,
298306
convert_type: bool = True,
307+
**queryables: Any,
299308
) -> tuple[pd.DataFrame, BaseMetadata]:
300309
"""
301310
Continuous data provide instantaneous water conditions.
@@ -422,6 +431,13 @@ def get_continuous(
422431
and the lexicographic-comparison pitfall.
423432
convert_type : boolean, optional
424433
If True, converts columns to appropriate types.
434+
**queryables : string or iterable of strings, optional
435+
Any other queryable property of this collection, passed through as a
436+
server-side filter (e.g. ``state_name="Wisconsin"``,
437+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
438+
queryable properties; an unknown name is rejected by the service with a
439+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
440+
may be superseded by explicit per-property keyword arguments.
425441
426442
Returns
427443
-------
@@ -521,6 +537,7 @@ def get_monitoring_locations(
521537
filter: str | None = None,
522538
filter_lang: FILTER_LANG | None = None,
523539
convert_type: bool = True,
540+
**queryables: Any,
524541
) -> tuple[pd.DataFrame, BaseMetadata]:
525542
"""Location information is basic information about the monitoring location
526543
including the name, identifier, agency responsible for data collection, and
@@ -739,6 +756,13 @@ def get_monitoring_locations(
739756
and the lexicographic-comparison pitfall.
740757
convert_type : boolean, optional
741758
If True, converts columns to appropriate types.
759+
**queryables : string or iterable of strings, optional
760+
Any other queryable property of this collection, passed through as a
761+
server-side filter (e.g. ``state_name="Wisconsin"``,
762+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
763+
queryable properties; an unknown name is rejected by the service with a
764+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
765+
may be superseded by explicit per-property keyword arguments.
742766
743767
Returns
744768
-------
@@ -809,6 +833,7 @@ def get_time_series_metadata(
809833
filter: str | None = None,
810834
filter_lang: FILTER_LANG | None = None,
811835
convert_type: bool = True,
836+
**queryables: Any,
812837
) -> tuple[pd.DataFrame, BaseMetadata]:
813838
"""Daily data and continuous measurements are grouped into time series,
814839
which represent a collection of observations of a single parameter,
@@ -976,6 +1001,13 @@ def get_time_series_metadata(
9761001
and the lexicographic-comparison pitfall.
9771002
convert_type : boolean, optional
9781003
If True, converts columns to appropriate types.
1004+
**queryables : string or iterable of strings, optional
1005+
Any other queryable property of this collection, passed through as a
1006+
server-side filter (e.g. ``state_name="Wisconsin"``,
1007+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
1008+
queryable properties; an unknown name is rejected by the service with a
1009+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
1010+
may be superseded by explicit per-property keyword arguments.
9791011
9801012
Returns
9811013
-------
@@ -1081,6 +1113,7 @@ def get_combined_metadata(
10811113
filter: str | None = None,
10821114
filter_lang: FILTER_LANG | None = None,
10831115
convert_type: bool = True,
1116+
**queryables: Any,
10841117
) -> tuple[pd.DataFrame, BaseMetadata]:
10851118
"""Get combined monitoring-location and time-series metadata.
10861119
@@ -1183,6 +1216,13 @@ def get_combined_metadata(
11831216
and the lexicographic-comparison pitfall.
11841217
convert_type : boolean, optional
11851218
If True, converts columns to appropriate types.
1219+
**queryables : string or iterable of strings, optional
1220+
Any other queryable property of this collection, passed through as a
1221+
server-side filter (e.g. ``state_name="Wisconsin"``,
1222+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
1223+
queryable properties; an unknown name is rejected by the service with a
1224+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
1225+
may be superseded by explicit per-property keyword arguments.
11861226
11871227
Returns
11881228
-------
@@ -1278,6 +1318,7 @@ def get_latest_continuous(
12781318
filter: str | None = None,
12791319
filter_lang: FILTER_LANG | None = None,
12801320
convert_type: bool = True,
1321+
**queryables: Any,
12811322
) -> tuple[pd.DataFrame, BaseMetadata]:
12821323
"""This endpoint provides the most recent observation for each time series
12831324
of continuous data. Continuous data are collected via automated sensors
@@ -1407,6 +1448,13 @@ def get_latest_continuous(
14071448
and the lexicographic-comparison pitfall.
14081449
convert_type : boolean, optional
14091450
If True, converts columns to appropriate types.
1451+
**queryables : string or iterable of strings, optional
1452+
Any other queryable property of this collection, passed through as a
1453+
server-side filter (e.g. ``state_name="Wisconsin"``,
1454+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
1455+
queryable properties; an unknown name is rejected by the service with a
1456+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
1457+
may be superseded by explicit per-property keyword arguments.
14101458
14111459
Returns
14121460
-------
@@ -1479,6 +1527,7 @@ def get_latest_daily(
14791527
filter: str | None = None,
14801528
filter_lang: FILTER_LANG | None = None,
14811529
convert_type: bool = True,
1530+
**queryables: Any,
14821531
) -> tuple[pd.DataFrame, BaseMetadata]:
14831532
"""Daily data provide one data value to represent water conditions for the
14841533
day.
@@ -1610,6 +1659,13 @@ def get_latest_daily(
16101659
and the lexicographic-comparison pitfall.
16111660
convert_type : boolean, optional
16121661
If True, converts columns to appropriate types.
1662+
**queryables : string or iterable of strings, optional
1663+
Any other queryable property of this collection, passed through as a
1664+
server-side filter (e.g. ``state_name="Wisconsin"``,
1665+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
1666+
queryable properties; an unknown name is rejected by the service with a
1667+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
1668+
may be superseded by explicit per-property keyword arguments.
16131669
16141670
Returns
16151671
-------
@@ -1683,6 +1739,7 @@ def get_field_measurements(
16831739
filter: str | None = None,
16841740
filter_lang: FILTER_LANG | None = None,
16851741
convert_type: bool = True,
1742+
**queryables: Any,
16861743
) -> tuple[pd.DataFrame, BaseMetadata]:
16871744
"""Field measurements are physically measured values collected during a
16881745
visit to the monitoring location. Field measurements consist of measurements
@@ -1805,6 +1862,13 @@ def get_field_measurements(
18051862
and the lexicographic-comparison pitfall.
18061863
convert_type : boolean, optional
18071864
If True, converts columns to appropriate types.
1865+
**queryables : string or iterable of strings, optional
1866+
Any other queryable property of this collection, passed through as a
1867+
server-side filter (e.g. ``state_name="Wisconsin"``,
1868+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
1869+
queryable properties; an unknown name is rejected by the service with a
1870+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
1871+
may be superseded by explicit per-property keyword arguments.
18081872
18091873
Returns
18101874
-------
@@ -1874,6 +1938,7 @@ def get_field_measurements_metadata(
18741938
filter: str | None = None,
18751939
filter_lang: FILTER_LANG | None = None,
18761940
convert_type: bool = True,
1941+
**queryables: Any,
18771942
) -> tuple[pd.DataFrame, BaseMetadata]:
18781943
"""Get field-measurement metadata: one row per (location, parameter) series.
18791944
@@ -1927,6 +1992,13 @@ def get_field_measurements_metadata(
19271992
and the lexicographic-comparison pitfall.
19281993
convert_type : boolean, optional
19291994
If True, converts columns to appropriate types.
1995+
**queryables : string or iterable of strings, optional
1996+
Any other queryable property of this collection, passed through as a
1997+
server-side filter (e.g. ``state_name="Wisconsin"``,
1998+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
1999+
queryable properties; an unknown name is rejected by the service with a
2000+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
2001+
may be superseded by explicit per-property keyword arguments.
19302002
19312003
Returns
19322004
-------
@@ -1999,6 +2071,7 @@ def get_peaks(
19992071
filter: str | None = None,
20002072
filter_lang: FILTER_LANG | None = None,
20012073
convert_type: bool = True,
2074+
**queryables: Any,
20022075
) -> tuple[pd.DataFrame, BaseMetadata]:
20032076
"""Get the annual peak streamflow / stage record for a monitoring location.
20042077
@@ -2057,6 +2130,13 @@ def get_peaks(
20572130
and the lexicographic-comparison pitfall.
20582131
convert_type : boolean, optional
20592132
If True, converts columns to appropriate types.
2133+
**queryables : string or iterable of strings, optional
2134+
Any other queryable property of this collection, passed through as a
2135+
server-side filter (e.g. ``state_name="Wisconsin"``,
2136+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
2137+
queryable properties; an unknown name is rejected by the service with a
2138+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
2139+
may be superseded by explicit per-property keyword arguments.
20602140
20612141
Returns
20622142
-------
@@ -2981,6 +3061,7 @@ def get_channel(
29813061
filter: str | None = None,
29823062
filter_lang: FILTER_LANG | None = None,
29833063
convert_type: bool = True,
3064+
**queryables: Any,
29843065
) -> tuple[pd.DataFrame, BaseMetadata]:
29853066
"""
29863067
Channel measurements taken as part of streamflow field measurements.
@@ -3110,6 +3191,13 @@ def get_channel(
31103191
and the lexicographic-comparison pitfall.
31113192
convert_type : boolean, optional
31123193
If True, converts columns to appropriate types.
3194+
**queryables : string or iterable of strings, optional
3195+
Any other queryable property of this collection, passed through as a
3196+
server-side filter (e.g. ``state_name="Wisconsin"``,
3197+
``site_type_code="ST"``). See :func:`get_queryables` for a collection's
3198+
queryable properties; an unknown name is rejected by the service with a
3199+
``DataRetrievalError`` (HTTP 400). This passthrough is provisional and
3200+
may be superseded by explicit per-property keyword arguments.
31133201
31143202
Returns
31153203
-------

dataretrieval/waterdata/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,18 @@ def _get_args(
166166
params such as ``water_year``, ``thresholds``, ``boundingBox``) so they
167167
keep their element types. See :func:`engine._get_args` for the full
168168
normalization contract.
169+
170+
A getter's ``**queryables`` passthrough kwargs are collected by ``locals()``
171+
under the ``queryables`` key; they are flattened in here, so an extra
172+
server-side filter such as ``state_name="Wisconsin"`` is normalized and sent
173+
exactly like a named param. See
174+
:func:`dataretrieval.waterdata.get_queryables` for each collection's
175+
filterable properties (the service rejects an unknown one with a 400).
169176
"""
177+
local_vars = dict(local_vars)
178+
queryables = local_vars.pop("queryables", None)
179+
if queryables:
180+
local_vars.update(queryables)
170181
return _engine_get_args(local_vars, exclude, no_normalize=_NO_NORMALIZE_PARAMS)
171182

172183

tests/waterdata_queryables_test.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import json
2727
import re
2828
from pathlib import Path
29+
from urllib.parse import parse_qs, urlsplit
2930

3031
import pytest
3132

@@ -96,6 +97,67 @@ def test_get_queryables_unknown_collection_raises(httpx_mock):
9697
waterdata.get_queryables("not-a-collection")
9798

9899

100+
# --- passthrough queryables (mocked) ---------------------------------------
101+
102+
_DAILY_ITEMS_RE = re.compile(
103+
r"^https://api\.waterdata\.usgs\.gov/ogcapi/v0/collections/daily/items"
104+
)
105+
_DAILY_SCHEMA_RE = re.compile(
106+
r"^https://api\.waterdata\.usgs\.gov/ogcapi/v0/collections/daily/schema$"
107+
)
108+
_EMPTY_FEATURES = {
109+
"type": "FeatureCollection",
110+
"features": [],
111+
"numberReturned": 0,
112+
"numberMatched": 0,
113+
"links": [],
114+
}
115+
116+
117+
def _mock_daily(httpx_mock):
118+
"""Mock the two endpoints a ``get_daily`` call touches: the items query and
119+
the schema fetch (used for output typing)."""
120+
httpx_mock.add_response(method="GET", url=_DAILY_SCHEMA_RE, json={"properties": {}})
121+
httpx_mock.add_response(method="GET", url=_DAILY_ITEMS_RE, json=_EMPTY_FEATURES)
122+
123+
124+
def _items_query(httpx_mock):
125+
"""Parsed query string of the ``/items`` request the getter sent."""
126+
req = next(r for r in httpx_mock.get_requests() if "/items" in str(r.url))
127+
return parse_qs(urlsplit(str(req.url)).query)
128+
129+
130+
def test_passthrough_queryables_sent_as_filters(httpx_mock):
131+
"""An OGC getter forwards queryables that aren't in its explicit signature
132+
(e.g. ``state_name``, ``site_type_code``) to the service as query filters,
133+
alongside the named params."""
134+
_mock_daily(httpx_mock)
135+
136+
waterdata.get_daily(
137+
monitoring_location_id="USGS-05427718",
138+
state_name="Wisconsin",
139+
site_type_code="ST",
140+
)
141+
142+
qs = _items_query(httpx_mock)
143+
assert qs["state_name"] == ["Wisconsin"]
144+
assert qs["site_type_code"] == ["ST"]
145+
assert qs["monitoring_location_id"] == ["USGS-05427718"]
146+
147+
148+
def test_passthrough_list_queryable_is_comma_joined(httpx_mock):
149+
"""A list-valued passthrough queryable is normalized and comma-joined like a
150+
named multi-value param."""
151+
_mock_daily(httpx_mock)
152+
153+
waterdata.get_daily(
154+
monitoring_location_id="USGS-05427718",
155+
site_type_code=["ST", "LK"],
156+
)
157+
158+
assert _items_query(httpx_mock)["site_type_code"] == ["ST,LK"]
159+
160+
99161
# --- live queryables monitor -----------------------------------------------
100162

101163

0 commit comments

Comments
 (0)