DOI-USGS
diff --git a/‎NEWS.md‎
Lines changed: 4 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎dataretrieval/nwis.py‎
Lines changed: 13 additions & 5 deletions b/‎dataretrieval/nwis.py‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎dataretrieval/utils.py‎
Lines changed: 102 additions & 0 deletions b/‎dataretrieval/utils.py‎
Lines changed: 102 additions & 0 deletions
@@ -1,5 +1,9 @@
 **05/14/2026:** Fixed two latent bugs in the paginated `waterdata` request loop (`_walk_pages` and `get_stats_data`). Previously, when `requests.Session.request(...)` itself raised mid-pagination (network error, timeout), the except block called `_error_body()` on the *prior page's* response, so the logged "error" described the wrong request and could itself crash on non-JSON bodies. Separately, no status-code check was performed on subsequent paginated responses, so a 5xx body that didn't include `numberReturned` was silently treated as an empty page — pagination quietly stopped and the user got truncated data with no error logged. The loop now status-checks each page like the initial request and reports the actual exception. The "best-effort" behavior (return whatever pages were collected) is preserved.
 
+**05/07/2026:** Bumped the declared minimum Python version from **3.8** to **3.9** (`pyproject.toml`'s `requires-python` and the ruff target). This brings the manifest in line with what was already being tested — CI's matrix has long covered only 3.9, 3.13, and 3.14, the `waterdata` test module already skipped itself on Python < 3.10, and several modules already use 3.9-only stdlib (e.g. `zoneinfo`). Users on 3.8 will no longer be able to install the package; please upgrade.
+
+**05/07/2026:** `waterdata.get_samples()` and `wqp.get_results()` now append a derived `<prefix>DateTime` UTC column for every Date/Time/TimeZone triplet in the response (e.g. `Activity_StartDate` + `Activity_StartTime` + `Activity_StartTimeZone` → `Activity_StartDateTime`). Both the WQX3 (`<X>Date`/`<X>Time`/`<X>TimeZone`) and legacy WQP (`<X>Date`/`<X>Time/Time`/`<X>Time/TimeZoneCode`) shapes are recognized; abbreviations like EST/EDT/CST/PST resolve to a UTC `Timestamp`, unknown codes resolve to `NaT`, and the original triplet columns are preserved. Returned rows are also now sorted by `Activity_StartDateTime` (or the legacy `ActivityStartDateTime`) — the underlying APIs return rows in an unstable order. Mirrors R's `create_dateTime` and end-of-pipeline sort. Closes #266.
+
 **05/06/2026:** Each remaining active function in `dataretrieval.nwis` now emits a per-function `DeprecationWarning` naming the `waterdata` replacement to migrate to (visible the first time users call each getter). The `nwis` module is scheduled for removal on or after **2027-05-06**.
 
 **05/06/2026:** Added `waterdata.get_ratings(...)` — wraps the new Water Data STAC catalog (`api.waterdata.usgs.gov/stac/v0/search`) for USGS stage-discharge rating curves. Returns parsed `exsa` / `base` / `corr` rating tables as a dict of DataFrames keyed by feature ID, or just the list of available STAC features when `download_and_parse=False`. Mirrors R's `read_waterdata_ratings`.
 
@@ -291,10 +291,13 @@ def get_discharge_peaks(
 
 
 def get_gwlevels(**kwargs):
-    """Defunct: use ``waterdata.get_field_measurements()``."""
+    """Defunct: use ``waterdata.get_continuous()``, ``waterdata.get_daily()``,
+    or ``waterdata.get_field_measurements()``."""
     raise NameError(
-        "`nwis.get_gwlevels` has been replaced "
-        "with `waterdata.get_field_measurements()`."
+        "`nwis.get_gwlevels` has been replaced. Use "
+        "`waterdata.get_continuous()` for continuous (typically 15-minute) "
+        "values, `waterdata.get_daily()` for daily values, or "
+        "`waterdata.get_field_measurements()` for discrete/manual readings."
     )
 
 
@@ -885,7 +888,8 @@ def get_record(
         - 'site' : site description
         - 'measurements' : (defunct) use `waterdata.get_field_measurements`
         - 'peaks': discharge peaks
-        - 'gwlevels': (defunct) use `waterdata.get_field_measurements`
+        - 'gwlevels': (defunct) use `waterdata.get_continuous`,
+          `waterdata.get_daily`, or `waterdata.get_field_measurements`
         - 'pmcodes': (defunct) use `get_reference_table`
         - 'water_use': (defunct) no replacement available
         - 'ratings': get rating table
@@ -933,7 +937,11 @@ def get_record(
 
     defunct_replacements = {
         "measurements": "`waterdata.get_field_measurements`",
-        "gwlevels": "`waterdata.get_field_measurements`",
+        "gwlevels": (
+            "`waterdata.get_continuous` (continuous), "
+            "`waterdata.get_daily`, or `waterdata.get_field_measurements` "
+            "(discrete)"
+        ),
         "pmcodes": "`waterdata.get_reference_table`",
         "water_use": "no replacement available",
     }
 
@@ -94,6 +94,108 @@ def format_datetime(df, date_field, time_field, tz_field):
     return df
 
 
+# (time-suffix, tz-suffix) pairs that follow a "<prefix>Date" column.
+_TIME_TZ_SUFFIXES = (
+    # WQX3 / Samples, e.g.
+    #   Activity_StartDate / Activity_StartTime / Activity_StartTimeZone
+    ("Time", "TimeZone"),
+    # Legacy WQP (slash-separated), e.g.
+    #   ActivityStartDate / ActivityStartTime/Time / ActivityStartTime/TimeZoneCode
+    ("Time/Time", "Time/TimeZoneCode"),
+)
+
+
+def _build_utc_datetime(
+    date_series: pd.Series, time_series: pd.Series, tz_series: pd.Series
+) -> pd.Series:
+    """Combine date + time + tz-abbreviation columns into a UTC pandas Series.
+
+    Unknown timezone codes (and rows missing any of the three values) yield
+    ``NaT``. The input columns are not mutated.
+    """
+    offsets = tz_series.map(tz)
+    combined = (
+        date_series.astype("string")
+        + " "
+        + time_series.astype("string")
+        + " "
+        + offsets.astype("string")
+    )
+    return pd.to_datetime(
+        combined, format="%Y-%m-%d %H:%M:%S %z", utc=True, errors="coerce"
+    )
+
+
+def _attach_datetime_columns(df: pd.DataFrame) -> pd.DataFrame:
+    """Add ``<prefix>DateTime`` UTC columns for any Date/Time/TimeZone triplets
+    and sort the frame by the activity-start datetime.
+
+    Detects two naming patterns that appear in USGS Samples and Water Quality
+    Portal CSV responses:
+
+    * **WQX3** — ``<prefix>Date``, ``<prefix>Time``, ``<prefix>TimeZone``
+    * **Legacy WQP** — ``<prefix>Date``, ``<prefix>Time/Time``,
+      ``<prefix>Time/TimeZoneCode``
+
+    For every triplet present, a new ``<prefix>DateTime`` column is appended
+    holding a UTC ``Timestamp`` (offsets resolved via
+    :data:`dataretrieval.codes.tz`). The original Date/Time/TimeZone columns
+    are left intact, and an existing ``<prefix>DateTime`` column is never
+    overwritten.
+
+    Rows are sorted (and the index reset) by the canonical activity-start
+    datetime when present — ``Activity_StartDateTime`` (WQX3) or
+    ``ActivityStartDateTime`` (legacy WQP) — falling back to the first
+    detected ``*Date`` column. Mirrors R ``dataRetrieval``'s
+    end-of-pipeline sort in ``importWQP.R``.
+
+    Parameters
+    ----------
+    df : ``pandas.DataFrame``
+        DataFrame returned from a Samples or WQP CSV endpoint.
+
+    Returns
+    -------
+    df : ``pandas.DataFrame``
+        A new DataFrame with derivable ``<prefix>DateTime`` columns appended
+        and rows sorted by the activity-start datetime (if any date column
+        was detected).
+    """
+    columns = set(df.columns)
+    new_columns = {}
+    first_date_col = None
+    for col in df.columns:
+        if not col.endswith("Date"):
+            continue
+        if first_date_col is None:
+            first_date_col = col
+        prefix = col.removesuffix("Date")
+        target = prefix + "DateTime"
+        if target in columns or target in new_columns:
+            continue
+        for time_suffix, tz_suffix in _TIME_TZ_SUFFIXES:
+            time_col = prefix + time_suffix
+            tz_col = prefix + tz_suffix
+            if time_col in columns and tz_col in columns:
+                new_columns[target] = _build_utc_datetime(
+                    df[col], df[time_col], df[tz_col]
+                )
+                break
+    if new_columns:
+        # Concat in one shot — per-column assignment on a wide CSV-derived
+        # frame triggers pandas' fragmentation PerformanceWarning.
+        df = pd.concat([df, pd.DataFrame(new_columns, index=df.index)], axis=1)
+    if "Activity_StartDateTime" in df.columns:
+        sort_key = "Activity_StartDateTime"
+    elif "ActivityStartDateTime" in df.columns:
+        sort_key = "ActivityStartDateTime"
+    else:
+        sort_key = first_date_col
+    if sort_key is not None:
+        df = df.sort_values(by=sort_key, ignore_index=True)
+    return df
+
+
 class BaseMetadata:
     """Base class for metadata.