|
| 1 | +"""National Ground-Water Monitoring Network (NGWMN) getters. |
| 2 | +
|
| 3 | +The NGWMN exposes its data through a dedicated OGC API |
| 4 | +(``https://api.waterdata.usgs.gov/ngwmn/ogcapi``) with five collections: |
| 5 | +``sites``, ``waterLevelObs``, ``lithologyObs``, ``constructionObs``, and |
| 6 | +``providers``. Each getter below delegates to the shared OGC engine |
| 7 | +(:func:`~dataretrieval.ogc.engine.get_ogc_data`) with |
| 8 | +``base_url=NGWMN_OGC_API_URL``, so multi-value chunking, pagination, |
| 9 | +retry/resume, and result shaping all behave exactly as they do for the main |
| 10 | +Water Data getters. |
| 11 | +
|
| 12 | +Unlike the main Water Data collections, NGWMN aggregates monitoring locations |
| 13 | +from many agencies, so ``monitoring_location_id`` values use other agency |
| 14 | +prefixes besides ``USGS-`` (e.g. ``MBMG-702934``, ``AKDNR-535134236016630``). |
| 15 | +
|
| 16 | +See https://api.waterdata.usgs.gov/ngwmn/ogcapi for the API reference. |
| 17 | +""" |
| 18 | + |
| 19 | +from __future__ import annotations |
| 20 | + |
| 21 | +from collections.abc import Iterable |
| 22 | + |
| 23 | +import pandas as pd |
| 24 | + |
| 25 | +from dataretrieval.ogc.engine import BASE_URL, _get_args, get_ogc_data |
| 26 | +from dataretrieval.utils import BaseMetadata |
| 27 | + |
| 28 | +# The National Ground-Water Monitoring Network exposes its own OGC API at a |
| 29 | +# separate, unversioned base. |
| 30 | +NGWMN_OGC_API_URL = f"{BASE_URL}/ngwmn/ogcapi" |
| 31 | + |
| 32 | +# The NGWMN OGC API exposes the feature id under the generic ``id`` column |
| 33 | +# (there is no service-specific id name as there is for the main collections). |
| 34 | +_NGWMN_OUTPUT_ID = "id" |
| 35 | + |
| 36 | + |
| 37 | +def get_sites( |
| 38 | + monitoring_location_id: str | Iterable[str] | None = None, |
| 39 | + agency_code: str | Iterable[str] | None = None, |
| 40 | + monitoring_location_number: str | Iterable[str] | None = None, |
| 41 | + altitude: str | Iterable[str] | None = None, |
| 42 | + national_aquifer_code: str | Iterable[str] | None = None, |
| 43 | + national_aquifer_description: str | Iterable[str] | None = None, |
| 44 | + country_code: str | Iterable[str] | None = None, |
| 45 | + country_name: str | Iterable[str] | None = None, |
| 46 | + state_name: str | Iterable[str] | None = None, |
| 47 | + county_name: str | Iterable[str] | None = None, |
| 48 | + aquifer_name: str | Iterable[str] | None = None, |
| 49 | + site_type: str | Iterable[str] | None = None, |
| 50 | + aquifer_type_code: str | Iterable[str] | None = None, |
| 51 | + qw_sys_name: str | Iterable[str] | None = None, |
| 52 | + qw_sn_flag: str | Iterable[str] | None = None, |
| 53 | + qw_baseline_flag: str | Iterable[str] | None = None, |
| 54 | + qw_well_chars: str | Iterable[str] | None = None, |
| 55 | + qw_well_type: str | Iterable[str] | None = None, |
| 56 | + qw_well_purpose: str | Iterable[str] | None = None, |
| 57 | + wl_sys_name: str | Iterable[str] | None = None, |
| 58 | + wl_sn_flag: str | Iterable[str] | None = None, |
| 59 | + wl_baseline_flag: str | Iterable[str] | None = None, |
| 60 | + wl_well_chars: str | Iterable[str] | None = None, |
| 61 | + wl_well_type: str | Iterable[str] | None = None, |
| 62 | + wl_well_purpose: str | Iterable[str] | None = None, |
| 63 | + properties: str | Iterable[str] | None = None, |
| 64 | + skip_geometry: bool | None = None, |
| 65 | + bbox: list[float] | None = None, |
| 66 | + limit: int | None = None, |
| 67 | + convert_type: bool = True, |
| 68 | +) -> tuple[pd.DataFrame, BaseMetadata]: |
| 69 | + """Get NGWMN monitoring-location (site) metadata. |
| 70 | +
|
| 71 | + Site records describe each NGWMN monitoring location — its identifier, |
| 72 | + responsible agency, location, aquifer, and whether it participates in the |
| 73 | + network's water-quality (``qw_*``) and water-level (``wl_*``) sub-networks. |
| 74 | +
|
| 75 | + Parameters |
| 76 | + ---------- |
| 77 | + monitoring_location_id : str or iterable of str, optional |
| 78 | + One or more agency-qualified site identifiers in ``AGENCY-ID`` form |
| 79 | + (e.g. ``"USGS-423114090161101"``, ``"MBMG-702934"``). |
| 80 | + agency_code : str or iterable of str, optional |
| 81 | + Code of the agency that manages the site. |
| 82 | + monitoring_location_number : str or iterable of str, optional |
| 83 | + Agency-assigned site number. |
| 84 | + altitude : str or iterable of str, optional |
| 85 | + Land-surface altitude at the site. |
| 86 | + national_aquifer_code, national_aquifer_description : str or iterable, optional |
| 87 | + National aquifer code / description. |
| 88 | + country_code, country_name : str or iterable, optional |
| 89 | + Country filters. |
| 90 | + state_name, county_name : str or iterable, optional |
| 91 | + State / county filters. |
| 92 | + aquifer_name, site_type, aquifer_type_code : str or iterable, optional |
| 93 | + Aquifer name, site type, and aquifer-type code. |
| 94 | + qw_sys_name, qw_sn_flag, qw_baseline_flag : str or iterable, optional |
| 95 | + Water-quality sub-network membership flags. |
| 96 | + qw_well_chars, qw_well_type, qw_well_purpose : str or iterable, optional |
| 97 | + Water-quality well characteristics, type, and purpose. |
| 98 | + wl_sys_name, wl_sn_flag, wl_baseline_flag : str or iterable, optional |
| 99 | + Water-level sub-network membership flags. |
| 100 | + wl_well_chars, wl_well_type, wl_well_purpose : str or iterable, optional |
| 101 | + Water-level well characteristics, type, and purpose. |
| 102 | + properties : str or iterable of str, optional |
| 103 | + Subset of columns to return. ``None`` (default) returns all columns. |
| 104 | + skip_geometry : bool, optional |
| 105 | + When ``True``, omit the geometry column. ``None`` (default) leaves the |
| 106 | + server default (geometry included). |
| 107 | + bbox : list of float, optional |
| 108 | + Bounding box ``[minx, miny, maxx, maxy]`` (CRS 4326) to spatially |
| 109 | + filter sites. |
| 110 | + limit : int, optional |
| 111 | + Per-page size; pagination still follows ``next`` links to completion. |
| 112 | + convert_type : bool, optional |
| 113 | + Whether to coerce column dtypes (default ``True``). |
| 114 | +
|
| 115 | + Returns |
| 116 | + ------- |
| 117 | + pandas.DataFrame or geopandas.GeoDataFrame |
| 118 | + Site metadata, one row per monitoring location. |
| 119 | + BaseMetadata |
| 120 | + Metadata object with the request URL and query time. |
| 121 | +
|
| 122 | + Examples |
| 123 | + -------- |
| 124 | + .. code:: |
| 125 | +
|
| 126 | + >>> # All NGWMN sites in Wisconsin |
| 127 | + >>> df, md = dataretrieval.ngwmn.get_sites(state_name="Wisconsin") |
| 128 | +
|
| 129 | + >>> # Specific sites, geometry omitted |
| 130 | + >>> df, md = dataretrieval.ngwmn.get_sites( |
| 131 | + ... monitoring_location_id=["USGS-423114090161101", "MBMG-702934"], |
| 132 | + ... skip_geometry=True, |
| 133 | + ... ) |
| 134 | + """ |
| 135 | + service = "sites" |
| 136 | + args = _get_args(locals()) |
| 137 | + return get_ogc_data( |
| 138 | + args, service, output_id=_NGWMN_OUTPUT_ID, base_url=NGWMN_OGC_API_URL |
| 139 | + ) |
| 140 | + |
| 141 | + |
| 142 | +def get_water_level( |
| 143 | + monitoring_location_id: str | Iterable[str] | None = None, |
| 144 | + monitoring_location_obs_number: str | Iterable[str] | None = None, |
| 145 | + sample_time: str | Iterable[str] | None = None, |
| 146 | + data_provided_by: str | Iterable[str] | None = None, |
| 147 | + water_depth_below_land_surface_ft: str | Iterable[str] | None = None, |
| 148 | + water_level_above_site_datum_ft: str | Iterable[str] | None = None, |
| 149 | + monitoring_location_vertical_datum: str | Iterable[str] | None = None, |
| 150 | + water_level_above_navd88_ft: str | Iterable[str] | None = None, |
| 151 | + datetime: str | Iterable[str] | None = None, |
| 152 | + properties: str | Iterable[str] | None = None, |
| 153 | + limit: int | None = None, |
| 154 | + convert_type: bool = True, |
| 155 | +) -> tuple[pd.DataFrame, BaseMetadata]: |
| 156 | + """Get NGWMN water-level observations. |
| 157 | +
|
| 158 | + Parameters |
| 159 | + ---------- |
| 160 | + monitoring_location_id : str or iterable of str, optional |
| 161 | + One or more agency-qualified site identifiers (``AGENCY-ID`` form). |
| 162 | + monitoring_location_obs_number : str or iterable of str, optional |
| 163 | + Per-site observation number; use to subset a site's observations. |
| 164 | + sample_time : str or iterable of str, optional |
| 165 | + Exact sample-time value(s) to match. For a time *range*, use |
| 166 | + ``datetime`` instead. |
| 167 | + data_provided_by : str or iterable of str, optional |
| 168 | + Source organization for the observation. |
| 169 | + water_depth_below_land_surface_ft : str or iterable, optional |
| 170 | + Depth-to-water value filter (feet below land surface). |
| 171 | + water_level_above_site_datum_ft : str or iterable, optional |
| 172 | + Water-level value filter (feet above the site datum). |
| 173 | + water_level_above_navd88_ft : str or iterable, optional |
| 174 | + Water-level value filter (feet above NAVD 88). |
| 175 | + monitoring_location_vertical_datum : str or iterable of str, optional |
| 176 | + Vertical datum of the reported water level. |
| 177 | + datetime : str or iterable of str, optional |
| 178 | + Temporal filter — a single instant or a two-element ``[start, end]`` |
| 179 | + range (ISO-8601 dates/datetimes); ``".."`` denotes an open end. |
| 180 | + properties : str or iterable of str, optional |
| 181 | + Subset of columns to return. ``None`` (default) returns all columns. |
| 182 | + limit : int, optional |
| 183 | + Per-page size; pagination still follows ``next`` links to completion. |
| 184 | + convert_type : bool, optional |
| 185 | + Whether to coerce column dtypes (default ``True``). |
| 186 | +
|
| 187 | + Returns |
| 188 | + ------- |
| 189 | + pandas.DataFrame |
| 190 | + Water-level observations, one row per measurement. |
| 191 | + BaseMetadata |
| 192 | + Metadata object with the request URL and query time. |
| 193 | +
|
| 194 | + Examples |
| 195 | + -------- |
| 196 | + .. code:: |
| 197 | +
|
| 198 | + >>> site = "USGS-272838082142201" |
| 199 | + >>> df, md = dataretrieval.ngwmn.get_water_level( |
| 200 | + ... monitoring_location_id=site |
| 201 | + ... ) |
| 202 | +
|
| 203 | + >>> # Restrict to a date range |
| 204 | + >>> df, md = dataretrieval.ngwmn.get_water_level( |
| 205 | + ... monitoring_location_id=site, datetime=["2022-01-01", "2024-01-01"] |
| 206 | + ... ) |
| 207 | +
|
| 208 | + >>> # Multiple sites across agencies |
| 209 | + >>> df, md = dataretrieval.ngwmn.get_water_level( |
| 210 | + ... monitoring_location_id=["USGS-272838082142201", "MBMG-702934"] |
| 211 | + ... ) |
| 212 | + """ |
| 213 | + service = "waterLevelObs" |
| 214 | + args = _get_args(locals()) |
| 215 | + return get_ogc_data( |
| 216 | + args, service, output_id=_NGWMN_OUTPUT_ID, base_url=NGWMN_OGC_API_URL |
| 217 | + ) |
| 218 | + |
| 219 | + |
| 220 | +def get_lithology( |
| 221 | + monitoring_location_id: str | Iterable[str] | None = None, |
| 222 | + monitoring_location_obs_number: str | Iterable[str] | None = None, |
| 223 | + properties: str | Iterable[str] | None = None, |
| 224 | + limit: int | None = None, |
| 225 | + convert_type: bool = True, |
| 226 | +) -> tuple[pd.DataFrame, BaseMetadata]: |
| 227 | + """Get NGWMN lithology observations. |
| 228 | +
|
| 229 | + Lithology records describe the geologic materials logged at a monitoring |
| 230 | + location, with depth intervals and controlled lithology concepts. |
| 231 | +
|
| 232 | + Parameters |
| 233 | + ---------- |
| 234 | + monitoring_location_id : str or iterable of str, optional |
| 235 | + One or more agency-qualified site identifiers (``AGENCY-ID`` form). |
| 236 | + monitoring_location_obs_number : str or iterable of str, optional |
| 237 | + Per-site observation number; use to subset a site's records. |
| 238 | + properties : str or iterable of str, optional |
| 239 | + Subset of columns to return. ``None`` (default) returns all columns. |
| 240 | + limit : int, optional |
| 241 | + Per-page size; pagination still follows ``next`` links to completion. |
| 242 | + convert_type : bool, optional |
| 243 | + Whether to coerce column dtypes (default ``True``). |
| 244 | +
|
| 245 | + Returns |
| 246 | + ------- |
| 247 | + pandas.DataFrame |
| 248 | + Lithology observations, one row per logged interval. |
| 249 | + BaseMetadata |
| 250 | + Metadata object with the request URL and query time. |
| 251 | +
|
| 252 | + Examples |
| 253 | + -------- |
| 254 | + .. code:: |
| 255 | +
|
| 256 | + >>> df, md = dataretrieval.ngwmn.get_lithology( |
| 257 | + ... monitoring_location_id="AKDNR-535134236016630" |
| 258 | + ... ) |
| 259 | + """ |
| 260 | + service = "lithologyObs" |
| 261 | + args = _get_args(locals()) |
| 262 | + return get_ogc_data( |
| 263 | + args, service, output_id=_NGWMN_OUTPUT_ID, base_url=NGWMN_OGC_API_URL |
| 264 | + ) |
| 265 | + |
| 266 | + |
| 267 | +def get_well_construction( |
| 268 | + monitoring_location_id: str | Iterable[str] | None = None, |
| 269 | + monitoring_location_obs_number: str | Iterable[str] | None = None, |
| 270 | + material: str | Iterable[str] | None = None, |
| 271 | + properties: str | Iterable[str] | None = None, |
| 272 | + limit: int | None = None, |
| 273 | + convert_type: bool = True, |
| 274 | +) -> tuple[pd.DataFrame, BaseMetadata]: |
| 275 | + """Get NGWMN well-construction observations. |
| 276 | +
|
| 277 | + Construction records describe a well's physical build-out — casing, |
| 278 | + screens, and similar elements — with depth intervals, materials, and |
| 279 | + diameters. |
| 280 | +
|
| 281 | + Parameters |
| 282 | + ---------- |
| 283 | + monitoring_location_id : str or iterable of str, optional |
| 284 | + One or more agency-qualified site identifiers (``AGENCY-ID`` form). |
| 285 | + monitoring_location_obs_number : str or iterable of str, optional |
| 286 | + Per-site observation number; use to subset a site's records. |
| 287 | + material : str or iterable of str, optional |
| 288 | + Construction-material filter. |
| 289 | + properties : str or iterable of str, optional |
| 290 | + Subset of columns to return. ``None`` (default) returns all columns. |
| 291 | + limit : int, optional |
| 292 | + Per-page size; pagination still follows ``next`` links to completion. |
| 293 | + convert_type : bool, optional |
| 294 | + Whether to coerce column dtypes (default ``True``). |
| 295 | +
|
| 296 | + Returns |
| 297 | + ------- |
| 298 | + pandas.DataFrame |
| 299 | + Well-construction observations, one row per construction element. |
| 300 | + BaseMetadata |
| 301 | + Metadata object with the request URL and query time. |
| 302 | +
|
| 303 | + Examples |
| 304 | + -------- |
| 305 | + .. code:: |
| 306 | +
|
| 307 | + >>> df, md = dataretrieval.ngwmn.get_well_construction( |
| 308 | + ... monitoring_location_id="USGS-272838082142201" |
| 309 | + ... ) |
| 310 | + """ |
| 311 | + service = "constructionObs" |
| 312 | + args = _get_args(locals()) |
| 313 | + return get_ogc_data( |
| 314 | + args, service, output_id=_NGWMN_OUTPUT_ID, base_url=NGWMN_OGC_API_URL |
| 315 | + ) |
| 316 | + |
| 317 | + |
| 318 | +def get_providers( |
| 319 | + state: str | Iterable[str] | None = None, |
| 320 | + agency_code: str | Iterable[str] | None = None, |
| 321 | + organization_type: str | Iterable[str] | None = None, |
| 322 | + properties: str | Iterable[str] | None = None, |
| 323 | + limit: int | None = None, |
| 324 | + convert_type: bool = True, |
| 325 | +) -> tuple[pd.DataFrame, BaseMetadata]: |
| 326 | + """Get NGWMN data-provider records. |
| 327 | +
|
| 328 | + Providers are the organizations that contribute data to the network. |
| 329 | +
|
| 330 | + Parameters |
| 331 | + ---------- |
| 332 | + state : str, optional |
| 333 | + Two-letter state/territory code, e.g. ``"WI"``. (Like the main Water |
| 334 | + Data getters, a multi-value *enum* filter such as ``state`` is sent |
| 335 | + comma-joined, which this collection treats as a single literal; query |
| 336 | + one state at a time, or concatenate per-state results.) |
| 337 | + agency_code : str or iterable of str, optional |
| 338 | + Provider agency code. |
| 339 | + organization_type : str or iterable of str, optional |
| 340 | + Provider organization type, e.g. ``"NWIS"``. |
| 341 | + properties : str or iterable of str, optional |
| 342 | + Subset of columns to return. ``None`` (default) returns all columns. |
| 343 | + limit : int, optional |
| 344 | + Per-page size; pagination still follows ``next`` links to completion. |
| 345 | + convert_type : bool, optional |
| 346 | + Whether to coerce column dtypes (default ``True``). |
| 347 | +
|
| 348 | + Returns |
| 349 | + ------- |
| 350 | + pandas.DataFrame |
| 351 | + Provider records, one row per provider. |
| 352 | + BaseMetadata |
| 353 | + Metadata object with the request URL and query time. |
| 354 | +
|
| 355 | + Examples |
| 356 | + -------- |
| 357 | + .. code:: |
| 358 | +
|
| 359 | + >>> df, md = dataretrieval.ngwmn.get_providers(state="WI") |
| 360 | +
|
| 361 | + >>> df, md = dataretrieval.ngwmn.get_providers( |
| 362 | + ... organization_type="NWIS", state="WI" |
| 363 | + ... ) |
| 364 | + """ |
| 365 | + service = "providers" |
| 366 | + args = _get_args(locals()) |
| 367 | + return get_ogc_data( |
| 368 | + args, service, output_id=_NGWMN_OUTPUT_ID, base_url=NGWMN_OGC_API_URL |
| 369 | + ) |
0 commit comments