|
10 | 10 | import logging |
11 | 11 | from io import StringIO |
12 | 12 | from typing import get_args |
| 13 | +from urllib.parse import quote |
13 | 14 |
|
14 | 15 | import pandas as pd |
15 | 16 | import requests |
@@ -1800,6 +1801,77 @@ def get_samples( |
1800 | 1801 | return df, BaseMetadata(response) |
1801 | 1802 |
|
1802 | 1803 |
|
| 1804 | +def get_samples_summary( |
| 1805 | + monitoringLocationIdentifier: str, |
| 1806 | + ssl_check: bool = True, |
| 1807 | +) -> tuple[pd.DataFrame, BaseMetadata]: |
| 1808 | + """Get a summary of discrete water-quality samples at a single monitoring location. |
| 1809 | +
|
| 1810 | + Wraps the Samples database summary service described at |
| 1811 | + https://api.waterdata.usgs.gov/samples-data/docs. The service returns one |
| 1812 | + row per (characteristic group, characteristic, user-supplied characteristic) |
| 1813 | + combination with result and activity counts and the first / most recent |
| 1814 | + activity dates — useful for taking inventory of what discrete-sample data |
| 1815 | + exists at a site before pulling the underlying observations with |
| 1816 | + :func:`get_samples`. |
| 1817 | +
|
| 1818 | + The summary service is single-site only: it accepts exactly one monitoring |
| 1819 | + location per request. |
| 1820 | +
|
| 1821 | + Parameters |
| 1822 | + ---------- |
| 1823 | + monitoringLocationIdentifier : string |
| 1824 | + A monitoring location identifier has two parts, separated by a dash |
| 1825 | + (``-``): the agency code and the location number. Examples: |
| 1826 | + ``"USGS-040851385"``, ``"AZ014-320821110580701"``, |
| 1827 | + ``"CAX01-15304600"``. Bare location numbers without an agency prefix |
| 1828 | + are accepted by the service but return an empty result, so a prefix |
| 1829 | + is effectively required. |
| 1830 | + ssl_check : bool, optional |
| 1831 | + Check the SSL certificate. Default is True. |
| 1832 | +
|
| 1833 | + Returns |
| 1834 | + ------- |
| 1835 | + df : ``pandas.DataFrame`` |
| 1836 | + Formatted data returned from the API query. |
| 1837 | + md : :obj:`dataretrieval.utils.Metadata` |
| 1838 | + Custom ``dataretrieval`` metadata object pertaining to the query. |
| 1839 | +
|
| 1840 | + Examples |
| 1841 | + -------- |
| 1842 | + .. code:: |
| 1843 | +
|
| 1844 | + >>> # What discrete-sample data is available at this site? |
| 1845 | + >>> df, md = dataretrieval.waterdata.get_samples_summary( |
| 1846 | + ... monitoringLocationIdentifier="USGS-04074950" |
| 1847 | + ... ) |
| 1848 | +
|
| 1849 | + """ |
| 1850 | + if not isinstance(monitoringLocationIdentifier, str): |
| 1851 | + raise TypeError( |
| 1852 | + "monitoringLocationIdentifier must be a string; the Samples " |
| 1853 | + "summary service accepts exactly one monitoring location per " |
| 1854 | + f"request, got {type(monitoringLocationIdentifier).__name__}." |
| 1855 | + ) |
| 1856 | + |
| 1857 | + url = f"{SAMPLES_URL}/summary/{quote(monitoringLocationIdentifier, safe='')}" |
| 1858 | + params = {"mimeType": "text/csv"} |
| 1859 | + |
| 1860 | + req = PreparedRequest() |
| 1861 | + req.prepare_url(url, params=params) |
| 1862 | + logger.info("Request: %s", req.url) |
| 1863 | + |
| 1864 | + response = requests.get( |
| 1865 | + url, params=params, verify=ssl_check, headers=_default_headers() |
| 1866 | + ) |
| 1867 | + |
| 1868 | + response.raise_for_status() |
| 1869 | + |
| 1870 | + df = pd.read_csv(StringIO(response.text), delimiter=",") |
| 1871 | + |
| 1872 | + return df, BaseMetadata(response) |
| 1873 | + |
| 1874 | + |
1803 | 1875 | def get_stats_por( |
1804 | 1876 | approval_status: str | None = None, |
1805 | 1877 | computation_type: str | list[str] | None = None, |
|
0 commit comments