Skip to content

Commit 466f5f6

Browse files
thodson-usgsclaude
andcommitted
Add waterdata.get_peaks for the annual peak-streamflow OGC collection
Wraps the new /ogcapi/v0/collections/peaks collection. Returns the annual peak record for a monitoring location — one row per (location, parameter, water year) — which is the standard input to flood- frequency analysis (log-Pearson Type III etc). The collection covers stage (parameter 00065) and discharge (00060); typical streamgages have a series for each. Implementation reuses the existing get_ogc_data infrastructure: - service = "peaks" - output_id = "peak_id" (the API's `id` field is renamed for users, matching the project's other get_* functions) R has no equivalent yet; the docstring was written from scratch following the project's existing get_* style. Two live tests cover the happy path (single-site, both parameters present) and a water-year filter. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 6df40f5 commit 466f5f6

4 files changed

Lines changed: 146 additions & 0 deletions

File tree

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
**05/06/2026:** Added `waterdata.get_peaks(...)` — wraps the new OGC `peaks` collection, returning the annual peak streamflow / stage record for a monitoring location (one row per water year, per parameter). Standard input to flood-frequency analysis. Supports calendar/water-year filters and the usual location/parameter/CQL knobs shared with the other OGC getters.
2+
13
**05/05/2026:** Added `waterdata.get_samples_summary(monitoringLocationIdentifier=...)` — wraps the Samples database `/summary/{id}` endpoint, returning per-characteristic result and activity counts plus first / most recent activity dates for a single monitoring location. Useful for taking inventory of available discrete-sample data before pulling observations with `get_samples`.
24

35
**05/01/2026:** The `nadp` module is now deprecated. Calling any of `get_annual_MDN_map`, `get_annual_NTN_map`, or `get_zip` will emit a `DeprecationWarning`. The module is scheduled for removal on or after **2026-11-01**. NADP is not a USGS data source; users should retrieve NADP data directly from https://nadp.slh.wisc.edu/.

dataretrieval/waterdata/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
get_latest_continuous,
2020
get_latest_daily,
2121
get_monitoring_locations,
22+
get_peaks,
2223
get_reference_table,
2324
get_samples,
2425
get_samples_summary,
@@ -50,6 +51,7 @@
5051
"get_latest_daily",
5152
"get_monitoring_locations",
5253
"get_nearest_continuous",
54+
"get_peaks",
5355
"get_reference_table",
5456
"get_samples",
5557
"get_samples_summary",

dataretrieval/waterdata/api.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1472,6 +1472,125 @@ def get_field_measurements(
14721472
return get_ogc_data(args, output_id, service)
14731473

14741474

1475+
def get_peaks(
1476+
monitoring_location_id: str | list[str] | None = None,
1477+
parameter_code: str | list[str] | None = None,
1478+
time_series_id: str | list[str] | None = None,
1479+
unit_of_measure: str | list[str] | None = None,
1480+
time: str | list[str] | None = None,
1481+
last_modified: str | list[str] | None = None,
1482+
water_year: int | list[int] | None = None,
1483+
year: int | list[int] | None = None,
1484+
month: int | list[int] | None = None,
1485+
day: int | list[int] | None = None,
1486+
peak_since: str | list[str] | None = None,
1487+
properties: str | list[str] | None = None,
1488+
skip_geometry: bool | None = None,
1489+
bbox: list[float] | None = None,
1490+
limit: int | None = None,
1491+
filter: str | None = None,
1492+
filter_lang: FILTER_LANG | None = None,
1493+
convert_type: bool = True,
1494+
) -> tuple[pd.DataFrame, BaseMetadata]:
1495+
"""Get the annual peak streamflow / stage record for a monitoring location.
1496+
1497+
Peaks are the largest values observed at a site each water year and are
1498+
the standard input to flood-frequency analysis (e.g. log-Pearson Type III
1499+
fits). The endpoint returns one row per (monitoring location, parameter,
1500+
water year), with the peak ``value`` and the ``time`` it occurred.
1501+
1502+
The collection covers both stage (parameter ``"00065"``, ``ft``) and
1503+
discharge (parameter ``"00060"``, ``ft^3/s``); a typical streamgage has a
1504+
series for each. Reference docs:
1505+
https://api.waterdata.usgs.gov/ogcapi/v0/openapi?f=html#/peaks
1506+
1507+
Parameters
1508+
----------
1509+
monitoring_location_id : string or list of strings, optional
1510+
A unique identifier representing a single monitoring location, in
1511+
``AGENCY-ID`` form (e.g. ``"USGS-02238500"``).
1512+
parameter_code : string or list of strings, optional
1513+
5-digit parameter code. Most peaks records are ``"00060"`` (discharge)
1514+
or ``"00065"`` (stage / gage height). Full list at
1515+
https://help.waterdata.usgs.gov/codes-and-parameters/parameters.
1516+
time_series_id : string or list of strings, optional
1517+
ID of the time series the peak belongs to.
1518+
unit_of_measure : string or list of strings, optional
1519+
Human-readable units (e.g. ``"ft^3/s"``, ``"ft"``).
1520+
time : string, optional
1521+
Datetime, interval, or duration filter on the peak's date.
1522+
See :func:`get_time_series_metadata` for the full grammar.
1523+
last_modified : string, optional
1524+
Same datetime grammar as ``time``; filters on the database
1525+
last-modified timestamp (useful for incremental ETL polling).
1526+
water_year, year, month, day : int or list of ints, optional
1527+
Calendar / water-year filters on the peak event. The water year ends
1528+
September 30 (e.g. WY2024 = Oct 1, 2023 – Sep 30, 2024).
1529+
peak_since : string or list of strings, optional
1530+
Server-side filter for "first time the value has been exceeded since"
1531+
flag (e.g. ``"1900"``, ``"site establishment"``).
1532+
properties : string or list of strings, optional
1533+
Subset of columns to return. Defaults to every available property.
1534+
skip_geometry : boolean, optional
1535+
Skip per-feature geometries; the returned object will be a plain
1536+
``DataFrame`` with no spatial information.
1537+
bbox : list of numbers, optional
1538+
Only features whose geometry intersects the bounding box are
1539+
selected. Format: ``[xmin, ymin, xmax, ymax]`` in CRS 4326
1540+
(longitude / latitude, west-south-east-north).
1541+
limit : numeric, optional
1542+
Page size; the maximum allowable value is 50000. Default
1543+
(``None``) requests the maximum allowable limit.
1544+
filter, filter_lang : optional
1545+
Server-side CQL filter passed through as the OGC ``filter`` /
1546+
``filter-lang`` query parameters. See
1547+
:mod:`dataretrieval.waterdata.filters` for syntax, auto-chunking,
1548+
and the lexicographic-comparison pitfall.
1549+
convert_type : boolean, optional
1550+
If True, converts columns to appropriate types.
1551+
1552+
Returns
1553+
-------
1554+
df : ``pandas.DataFrame`` or ``geopandas.GeoDataFrame``
1555+
Formatted data returned from the API query.
1556+
md : :obj:`dataretrieval.utils.Metadata`
1557+
A custom metadata object pertaining to the query.
1558+
1559+
Examples
1560+
--------
1561+
.. code::
1562+
1563+
>>> # Full annual peak record at one site (both stage and discharge)
1564+
>>> df, md = dataretrieval.waterdata.get_peaks(
1565+
... monitoring_location_id="USGS-02238500"
1566+
... )
1567+
1568+
>>> # Discharge peaks only
1569+
>>> df, md = dataretrieval.waterdata.get_peaks(
1570+
... monitoring_location_id="USGS-02238500",
1571+
... parameter_code="00060",
1572+
... )
1573+
1574+
>>> # Multi-site peaks for a parameter, narrowed to a water-year range
1575+
>>> df, md = dataretrieval.waterdata.get_peaks(
1576+
... monitoring_location_id=[
1577+
... "USGS-07069000",
1578+
... "USGS-07064000",
1579+
... "USGS-07068000",
1580+
... ],
1581+
... parameter_code="00060",
1582+
... water_year=[2020, 2021, 2022, 2023],
1583+
... )
1584+
1585+
"""
1586+
service = "peaks"
1587+
output_id = "peak_id"
1588+
1589+
args = _get_args(locals())
1590+
1591+
return get_ogc_data(args, output_id, service)
1592+
1593+
14751594
def get_reference_table(
14761595
collection: str,
14771596
limit: int | None = None,

tests/waterdata_test.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
get_latest_continuous,
1616
get_latest_daily,
1717
get_monitoring_locations,
18+
get_peaks,
1819
get_reference_table,
1920
get_samples,
2021
get_samples_summary,
@@ -335,6 +336,28 @@ def test_get_time_series_metadata():
335336
assert hasattr(md, "query_time")
336337

337338

339+
def test_get_peaks():
340+
df, md = get_peaks(monitoring_location_id="USGS-02238500", skip_geometry=True)
341+
assert "peak_id" in df.columns
342+
assert "value" in df.columns
343+
assert "water_year" in df.columns
344+
assert (df["monitoring_location_id"] == "USGS-02238500").all()
345+
assert set(df["parameter_code"].unique()).issubset({"00060", "00065"})
346+
assert hasattr(md, "url")
347+
assert hasattr(md, "query_time")
348+
349+
350+
def test_get_peaks_water_year_filter():
351+
df, _ = get_peaks(
352+
monitoring_location_id="USGS-02238500",
353+
parameter_code="00060",
354+
water_year=[2020, 2021, 2022],
355+
skip_geometry=True,
356+
)
357+
assert (df["parameter_code"] == "00060").all()
358+
assert set(df["water_year"].unique()).issubset({2020, 2021, 2022})
359+
360+
338361
def test_get_reference_table():
339362
df, md = get_reference_table("agency-codes")
340363
assert "agency_code" in df.columns

0 commit comments

Comments
 (0)