Skip to content

Commit 40ed9eb

Browse files
committed
Use GET with comma-separated values for multi-value waterdata queries
The OGC Water Data API supports comma-separated multi-value parameters for most collections, so list-valued kwargs (e.g. parameter_code= ["00060", "00010"]) can now be sent as a single GET request instead of a POST+CQL2 body. The monitoring-locations collection still rejects comma-separated values and so retains the POST+CQL2 path; a new _CQL2_REQUIRED_SERVICES frozenset is the single removal point for when the upstream API closes that gap. Date/time parameters are now formatted to ISO8601 once at the top of _construct_api_requests ahead of the routing decision, so both paths see a post-formatted string instead of the original list shape. Tests cover the GET multi-value path, the POST+CQL2 path (including the full CQL2 envelope shape), single-value scalars, numeric lists, and the two-element date-list interval case. Closes #210
1 parent c755f6b commit 40ed9eb

2 files changed

Lines changed: 111 additions & 28 deletions

File tree

dataretrieval/waterdata/utils.py

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@ def _switch_properties_id(properties: list[str] | None, id_name: str, service: s
153153
{"datetime", "last_modified", "begin", "begin_utc", "end", "end_utc", "time"}
154154
)
155155

156+
# Services that don't support comma-separated values for multi-value GET
157+
# parameters and require POST with CQL2 JSON instead.
158+
_CQL2_REQUIRED_SERVICES = frozenset({"monitoring-locations"})
159+
156160

157161
def _parse_datetime(value: str) -> datetime | None:
158162
"""Parse a single datetime string against the supported formats.
@@ -417,9 +421,11 @@ def _construct_api_requests(
417421
"""
418422
Constructs an HTTP request object for the specified water data API service.
419423
420-
Depending on the input parameters (whether there's lists of multiple
421-
argument values), the function determines whether to use a GET or POST
422-
request, formats parameters appropriately, and sets required headers.
424+
For most services, list parameters are comma-joined and sent as a single
425+
GET request (e.g. ``parameter_code=["00060","00010"]`` becomes
426+
``parameter_code=00060,00010`` in the URL). For services that do not
427+
support comma-separated values (currently only ``monitoring-locations``),
428+
a POST request with CQL2 JSON is used instead.
423429
424430
Parameters
425431
----------
@@ -445,37 +451,38 @@ def _construct_api_requests(
445451
Notes
446452
-----
447453
- Date/time parameters are automatically formatted to ISO8601.
448-
- If multiple values are provided for non-single parameters, a POST request
449-
is constructed.
450-
- The function sets appropriate headers for GET and POST requests.
451454
"""
452455
service_url = f"{OGC_API_URL}/collections/{service}/items"
453456

454-
# Identify which parameters should be included in the POST content body
455-
post_params = {
456-
k: v
457-
for k, v in kwargs.items()
458-
if k not in _DATE_RANGE_PARAMS and isinstance(v, (list, tuple)) and len(v) > 1
459-
}
457+
# Format date/time parameters to ISO8601 first — both routing paths need it.
458+
for key in _DATE_RANGE_PARAMS:
459+
if key in kwargs:
460+
kwargs[key] = _format_api_dates(
461+
kwargs[key],
462+
date=(service == "daily" and key != "last_modified"),
463+
)
460464

461-
# Everything else goes into the params dictionary for the URL
462-
params = {k: v for k, v in kwargs.items() if k not in post_params}
463-
# Set skipGeometry parameter (API expects camelCase)
464-
params["skipGeometry"] = skip_geometry
465+
if service in _CQL2_REQUIRED_SERVICES:
466+
# POST with CQL2 JSON: multi-value params go in the request body.
467+
# The date-range loop above has already collapsed any _DATE_RANGE_PARAMS
468+
# value to a string, so the list/tuple check below cannot match them.
469+
post_params = {
470+
k: v
471+
for k, v in kwargs.items()
472+
if isinstance(v, (list, tuple)) and len(v) > 1
473+
}
474+
params = {k: v for k, v in kwargs.items() if k not in post_params}
475+
else:
476+
# GET with comma-separated values: join list/tuple values into one string.
477+
post_params = {}
478+
params = {
479+
k: ",".join(str(x) for x in v) if isinstance(v, (list, tuple)) else v
480+
for k, v in kwargs.items()
481+
}
465482

466-
# If limit is none or greater than 50000, then set limit to max results. Otherwise,
467-
# use the limit
483+
params["skipGeometry"] = skip_geometry
468484
params["limit"] = 50000 if limit is None or limit > 50000 else limit
469485

470-
# Indicate if function needs to perform POST conversion
471-
POST = bool(post_params)
472-
473-
# Convert dates to ISO08601 format
474-
for i in _DATE_RANGE_PARAMS:
475-
if i in params:
476-
dates = service == "daily" and i != "last_modified"
477-
params[i] = _format_api_dates(params[i], date=dates)
478-
479486
# `len()` instead of truthiness: a numpy ndarray would raise on `if bbox:`.
480487
if bbox is not None and len(bbox) > 0:
481488
params["bbox"] = ",".join(map(str, bbox))
@@ -490,7 +497,7 @@ def _construct_api_requests(
490497

491498
headers = _default_headers()
492499

493-
if POST:
500+
if post_params:
494501
headers["Content-Type"] = "application/query-cql-json"
495502
request = requests.Request(
496503
method="POST",

tests/waterdata_test.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
import json
23
import sys
34
from unittest import mock
45

@@ -30,6 +31,7 @@
3031
from dataretrieval.waterdata.utils import (
3132
_check_monitoring_location_id,
3233
_check_profiles,
34+
_construct_api_requests,
3335
_normalize_str_iterable,
3436
)
3537

@@ -111,6 +113,80 @@ def test_check_profiles():
111113
_check_profiles(service="results", profile="foo")
112114

113115

116+
def test_construct_api_requests_multivalue_get():
117+
"""Multi-value params use GET with comma-separated values for daily service."""
118+
req = _construct_api_requests(
119+
"daily",
120+
monitoring_location_id=["USGS-05427718", "USGS-05427719"],
121+
parameter_code=["00060", "00065"],
122+
)
123+
assert req.method == "GET"
124+
assert "monitoring_location_id=USGS-05427718%2CUSGS-05427719" in req.url
125+
assert "parameter_code=00060%2C00065" in req.url
126+
127+
128+
def test_construct_api_requests_monitoring_locations_post():
129+
"""monitoring-locations uses POST+CQL2 for multi-value params (API limitation)."""
130+
req = _construct_api_requests(
131+
"monitoring-locations",
132+
hydrologic_unit_code=["010802050102", "010802050103"],
133+
)
134+
assert req.method == "POST"
135+
assert req.headers["Content-Type"] == "application/query-cql-json"
136+
137+
body = json.loads(req.body)
138+
# Top-level shape: AND over a list of per-param predicates.
139+
assert body["op"] == "and"
140+
assert isinstance(body["args"], list) and len(body["args"]) == 1
141+
142+
# The single predicate is an IN over hydrologic_unit_code with both values.
143+
predicate = body["args"][0]
144+
assert predicate["op"] == "in"
145+
assert predicate["args"][0] == {"property": "hydrologic_unit_code"}
146+
assert predicate["args"][1] == ["010802050102", "010802050103"]
147+
148+
149+
def test_construct_api_requests_single_value_stays_get():
150+
"""A length-1 list (or scalar) reaches the URL as a plain value, not a
151+
comma-separated form, so existing single-site callers see no change."""
152+
req = _construct_api_requests(
153+
"daily",
154+
monitoring_location_id="USGS-05427718",
155+
parameter_code="00060",
156+
)
157+
assert req.method == "GET"
158+
assert "monitoring_location_id=USGS-05427718" in req.url
159+
assert "%2C" not in req.url # no comma-encoded multi-value
160+
161+
162+
def test_construct_api_requests_numeric_list_joins_with_str():
163+
"""Numeric-list params (e.g. ``water_year=[2020, 2021]`` on get_peaks)
164+
must reach the URL as a comma-joined string, not crash on ``",".join``
165+
of ints. The generator-of-``str(x)`` exists exactly for this case."""
166+
req = _construct_api_requests(
167+
"peaks",
168+
monitoring_location_id="USGS-05427718",
169+
water_year=[2020, 2021],
170+
)
171+
assert req.method == "GET"
172+
assert "water_year=2020%2C2021" in req.url
173+
174+
175+
def test_construct_api_requests_two_element_date_list_becomes_interval():
176+
"""A two-element date list is interpreted as start/end of an OGC datetime
177+
interval (joined with '/'), NOT as two discrete dates. The OGC `datetime`
178+
parameter does not support "these N specific dates" — that would require
179+
a CQL filter. Verifying so this contract is locked in."""
180+
req = _construct_api_requests(
181+
"daily",
182+
monitoring_location_id="USGS-05427718",
183+
time=["2024-01-01", "2024-01-31"],
184+
)
185+
assert req.method == "GET"
186+
# `/` URL-encodes to %2F. Confirms _format_api_dates ran before the join.
187+
assert "time=2024-01-01%2F2024-01-31" in req.url
188+
189+
114190
def test_samples_results():
115191
"""Test results call for proper columns"""
116192
df, _ = get_samples(

0 commit comments

Comments
 (0)