DOI-USGS
diff --git a/‎NEWS.md‎
Lines changed: 2 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎dataretrieval/nwis.py‎
Lines changed: 14 additions & 57 deletions b/‎dataretrieval/nwis.py‎
Lines changed: 14 additions & 57 deletions
diff --git a/‎dataretrieval/rdb.py‎
Lines changed: 90 additions & 0 deletions b/‎dataretrieval/rdb.py‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎dataretrieval/waterdata/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎dataretrieval/waterdata/__init__.py‎
Lines changed: 2 additions & 0 deletions
@@ -1,3 +1,5 @@
+**05/06/2026:** Added `waterdata.get_ratings(...)` — wraps the new Water Data STAC catalog (`api.waterdata.usgs.gov/stac/v0/search`) for USGS stage-discharge rating curves. Returns parsed `exsa` / `base` / `corr` rating tables as a dict of DataFrames keyed by feature ID, or just the list of available STAC features when `download_and_parse=False`. Mirrors R's `read_waterdata_ratings`.
+
 **05/06/2026:** Added `waterdata.get_field_measurements_metadata(...)` — wraps the OGC `field-measurements-metadata` collection. Returns one row per (location, parameter) field-measurement series describing its period of record, units, etc., without the underlying observations. Discrete-measurement analogue to `get_time_series_metadata`. Mirrors R's `read_waterdata_field_meta`.
 
 **05/05/2026:** Added `waterdata.get_combined_metadata(...)` — wraps the Water Data API's `combined-metadata` collection, which joins the monitoring-locations catalog with the time-series-metadata catalog and returns one row per (location, parameter, statistic) inventory entry. This is the most flexible "what data is available" endpoint in the API: any location attribute (state, HUC, site type, drainage area, well-construction depth, …) can be combined with any time-series attribute (parameter code, statistic, data type, period of record, …) in a single query. Mirrors R's `read_waterdata_combined_meta`.
 
@@ -7,12 +7,12 @@
 from __future__ import annotations
 
 import warnings
-from io import StringIO
 from json import JSONDecodeError
 
 import pandas as pd
 import requests
 
+from dataretrieval.rdb import read_rdb
 from dataretrieval.utils import BaseMetadata
 
 from .utils import query
@@ -44,6 +44,14 @@
 # NAD83
 _CRS = "EPSG:4269"
 
+_NWIS_RDB_DTYPES = {
+    "site_no": str,
+    "dec_long_va": float,
+    "dec_lat_va": float,
+    "parm_cd": str,
+    "parameter_cd": str,
+}
+
 
 def _parse_json_or_raise(response: requests.Response) -> pd.DataFrame:
     """Parse a JSON NWIS response, raising a helpful error on HTML responses."""
@@ -1018,64 +1026,13 @@ def _read_json(json):
 
 
 def _read_rdb(rdb):
-    """
-    Convert NWIS rdb table into a ``pandas.dataframe``.
-
-    Parameters
-    ----------
-    rdb: string
-        A string representation of an rdb table
-
-    Returns
-    -------
-    df: ``pandas.dataframe``
-        A formatted pandas data frame
+    """Parse an NWIS RDB response and apply NWIS-specific post-processing.
 
+    Thin wrapper around :func:`dataretrieval.rdb.read_rdb` that adds the
+    NWIS column-dtype hints and runs :func:`format_response` (datetime
+    index, multi-site MultiIndex, optional GeoDataFrame).
     """
-    if "<html>" in rdb.lower() or "<!doctype html>" in rdb.lower():
-        raise ValueError(
-            "Received HTML response instead of RDB. This often indicates "
-            "that the service has been moved or is currently unavailable."
-        )
-
-    count = 0
-    lines = rdb.splitlines()
-
-    for line in lines:
-        # ignore comment lines
-        if line.startswith("#"):
-            count = count + 1
-
-        else:
-            break
-
-    if count >= len(lines):
-        # All lines are comments — the service returned no data rows (e.g.
-        # "No sites found matching all criteria").  This is a legitimate empty
-        # result, so return an empty DataFrame rather than raising.
-        return pd.DataFrame()
-
-    fields = lines[count].split("\t")
-    fields = [field.replace(",", "").strip() for field in fields if field.strip()]
-    dtypes = {
-        "site_no": str,
-        "dec_long_va": float,
-        "dec_lat_va": float,
-        "parm_cd": str,
-        "parameter_cd": str,
-    }
-
-    df = pd.read_csv(
-        StringIO(rdb),
-        delimiter="\t",
-        skiprows=count + 2,
-        names=fields,
-        na_values="NaN",
-        dtype=dtypes,
-    )
-
-    df = format_response(df)
-    return df
+    return format_response(read_rdb(rdb, dtypes=_NWIS_RDB_DTYPES))
 
 
 def _check_sites_value_types(sites):
 
@@ -0,0 +1,90 @@
+"""Parser for the USGS RDB tab-separated text format.
+
+RDB (Relational DataBase) is the text format used by NWIS web services
+and by the Water Data STAC catalog's rating-curve assets. Every RDB
+file has the same shape:
+
+- One or more ``#``-prefixed comment lines carrying provenance metadata
+  (data source, retrieval timestamp, station name, parameter codes, etc.).
+- A tab-separated header row naming each column.
+- A second tab-separated row giving column format specs (e.g. ``5s 15s``);
+  it is informational only and skipped during parsing.
+- Tab-separated data rows.
+
+This module exposes the parsing primitives that both ``dataretrieval.nwis``
+and ``dataretrieval.waterdata.ratings`` use. Callers layer their own
+post-processing (NWIS-specific datetime indexing, ratings-specific
+``df.attrs`` provenance, etc.) on top of the raw frame.
+"""
+
+from __future__ import annotations
+
+from io import StringIO
+
+import pandas as pd
+
+
+def read_rdb(text: str, dtypes: dict[str, type] | None = None) -> pd.DataFrame:
+    """Parse an RDB text response into a ``pandas.DataFrame``.
+
+    Parameters
+    ----------
+    text : str
+        The RDB text response from a USGS web service.
+    dtypes : dict[str, type] or None, optional
+        Optional column-name to dtype hints, forwarded to
+        ``pandas.read_csv``. Unknown column names are silently ignored, so
+        callers may safely pass a dict of all columns they might be
+        interested in.
+
+    Returns
+    -------
+    pandas.DataFrame
+        The parsed data. An RDB consisting only of comment lines (e.g. a
+        "no sites found" response) returns an empty DataFrame rather than
+        raising.
+
+    Raises
+    ------
+    ValueError
+        If the response body looks like HTML, which usually means the
+        service has been moved, is degraded, or returned an error page.
+    """
+    if "<html>" in text.lower() or "<!doctype html>" in text.lower():
+        raise ValueError(
+            "Received HTML response instead of RDB. This often indicates "
+            "that the service has been moved or is currently unavailable."
+        )
+
+    lines = text.splitlines()
+    header_idx = next(
+        (i for i, line in enumerate(lines) if not line.startswith("#")),
+        len(lines),
+    )
+    if header_idx == len(lines):
+        # All lines are comments — a legitimate empty result.
+        return pd.DataFrame()
+
+    fields = [f.replace(",", "").strip() for f in lines[header_idx].split("\t")]
+    fields = [f for f in fields if f]
+
+    return pd.read_csv(
+        StringIO(text),
+        delimiter="\t",
+        skiprows=header_idx + 2,  # +1 for header, +1 for the format-spec row
+        names=fields,
+        na_values="NaN",
+        dtype=dtypes,
+    )
+
+
+def extract_rdb_comment(text: str) -> list[str]:
+    """Return the RDB ``#``-prefixed comment block, raw and in original order.
+
+    Each entry includes its leading ``#`` and any whitespace, matching what
+    R's ``dataRetrieval`` returns from ``comment(df)``. The comment block
+    carries provenance metadata that is otherwise lost during parsing —
+    data source, retrieval timestamp, parameter codes, rating id and
+    last-shifted timestamp for ratings, etc.
+    """
+    return [line for line in text.splitlines() if line.startswith("#")]
@@ -30,6 +30,7 @@
 )
 from .filters import FILTER_LANG
 from .nearest import get_nearest_continuous
+from .ratings import get_ratings
 from .types import (
     CODE_SERVICES,
     PROFILE_LOOKUP,
@@ -54,6 +55,7 @@
     "get_latest_daily",
     "get_monitoring_locations",
     "get_nearest_continuous",
+    "get_ratings",
     "get_reference_table",
     "get_samples",
     "get_samples_summary",
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	+05/06/2026: Added `waterdata.get_ratings(...)` — wraps the new Water Data STAC catalog (`api.waterdata.usgs.gov/stac/v0/search`) for USGS stage-discharge rating curves. Returns parsed `exsa` / `base` / `corr` rating tables as a dict of DataFrames keyed by feature ID, or just the list of available STAC features when `download_and_parse=False`. Mirrors R's `read_waterdata_ratings`.
	`2`	`+`
`1`	`3`	05/06/2026: Added `waterdata.get_field_measurements_metadata(...)` — wraps the OGC `field-measurements-metadata` collection. Returns one row per (location, parameter) field-measurement series describing its period of record, units, etc., without the underlying observations. Discrete-measurement analogue to `get_time_series_metadata`. Mirrors R's `read_waterdata_field_meta`.
`2`	`4`
`3`	`5`	05/05/2026: Added `waterdata.get_combined_metadata(...)` — wraps the Water Data API's `combined-metadata` collection, which joins the monitoring-locations catalog with the time-series-metadata catalog and returns one row per (location, parameter, statistic) inventory entry. This is the most flexible "what data is available" endpoint in the API: any location attribute (state, HUC, site type, drainage area, well-construction depth, …) can be combined with any time-series attribute (parameter code, statistic, data type, period of record, …) in a single query. Mirrors R's `read_waterdata_combined_meta`.