|
19 | 19 | PROFILE_LOOKUP, |
20 | 20 | PROFILES, |
21 | 21 | SERVICES, |
| 22 | + STATISTICS_SERVICES, |
| 23 | +) |
| 24 | +from dataretrieval.waterdata.utils import ( |
| 25 | + SAMPLES_URL, |
| 26 | + get_ogc_data, |
| 27 | + get_stats_data |
22 | 28 | ) |
23 | | -from dataretrieval.waterdata.utils import SAMPLES_URL, get_ogc_data |
24 | 29 |
|
25 | 30 | # Set up logger for this module |
26 | 31 | logger = logging.getLogger(__name__) |
@@ -1641,6 +1646,109 @@ def get_samples( |
1641 | 1646 |
|
1642 | 1647 | return df, BaseMetadata(response) |
1643 | 1648 |
|
| 1649 | +def get_statistics( |
| 1650 | + service: STATISTICS_SERVICES = "observationNormals", |
| 1651 | + approval_status: Optional[str] = None, |
| 1652 | + computation_type: Optional[str] = None, |
| 1653 | + country_code: Optional[str] = None, |
| 1654 | + state_code: Optional[str] = None, |
| 1655 | + county_code: Optional[str] = None, |
| 1656 | + start_date: Optional[Union[str, datetime]] = None, |
| 1657 | + end_date: Optional[Union[str, datetime]] = None, |
| 1658 | + monitoring_location_id: Optional[str] = None, |
| 1659 | + page_size: int = 1000, |
| 1660 | + parent_timeseries_id: Optional[str] = None, |
| 1661 | + site_type_code: Optional[str] = None, |
| 1662 | + site_type_name: Optional[str] = None, |
| 1663 | + parameter_code: Optional[str] = None, |
| 1664 | + ) -> Tuple[pd.DataFrame, BaseMetadata]: |
| 1665 | + """Get water data statistics from the USGS Water Data API. |
| 1666 | + This service provides endpoints for access to computations on the |
| 1667 | + historical record regarding water conditions, including minimum, maximum, |
| 1668 | + mean, median, and percentiles for day of year, month, month-year, and |
| 1669 | + water/calendar years. For more information regarding the calculation of |
| 1670 | + statistics and other details, please visit the Statistics documentation |
| 1671 | + page: https://waterdata.usgs.gov/statistics-documentation/. |
| 1672 | + |
| 1673 | + Note: This API is under active beta development and subject to |
| 1674 | + change. Improved handling of significant figures will be |
| 1675 | + addressed in a future release. |
| 1676 | +
|
| 1677 | + Parameters |
| 1678 | + ---------- |
| 1679 | + service: string, One of the following options: "observationNormals" |
| 1680 | + or "observationIntervals". "observationNormals" returns |
| 1681 | + day-of-year and month-of-year statistics matching your query, |
| 1682 | + while "observationIntervals" returns monthly and annual statistics |
| 1683 | + matching your query. |
| 1684 | + approval_status: string, optional |
| 1685 | + Whether to include approved and/or provisional observations. |
| 1686 | + At this time, only approved observations are returned. |
| 1687 | + computation_type: string, optional |
| 1688 | + Desired statistical computation method. Available values are: |
| 1689 | + arithmetic_mean, maximum, median, minimum, percentile. |
| 1690 | + country_code: string, optional |
| 1691 | + Country query parameter. API defaults to "US". |
| 1692 | + state_code: string, optional |
| 1693 | + State query parameter. Takes the format "US:XX", where XX is |
| 1694 | + the two-digit state code. API defaults to "US:42" (Pennsylvania). |
| 1695 | + county_code: string, optional |
| 1696 | + County query parameter. Takes the format "US:XX:YYY", where XX is |
| 1697 | + the two-digit state code and YYY is the three-digit county code. |
| 1698 | + API defaults to "US:42:103" (Pennsylvania, Pike County). |
| 1699 | + start_date: string or datetime, optional |
| 1700 | + Start date for the query. Its format depends upon the service: |
| 1701 | + for "observationNormals", it is in the month-day format (MM-DD), |
| 1702 | + for "observationIntervals", it is in the year-month-day format |
| 1703 | + (YYYY-MM-DD). |
| 1704 | + end_date: string or datetime, optional |
| 1705 | + End date for the query. Its format depends upon the service: |
| 1706 | + for "observationNormals", it is in the month-day format (MM-DD), |
| 1707 | + for "observationIntervals", it is in the year-month-day format |
| 1708 | + (YYYY-MM-DD). |
| 1709 | + monitoring_location_id : string or list of strings, optional |
| 1710 | + A unique identifier representing a single monitoring location. This |
| 1711 | + corresponds to the id field in the monitoring-locations endpoint. |
| 1712 | + Monitoring location IDs are created by combining the agency code of the |
| 1713 | + agency responsible for the monitoring location (e.g. USGS) with the ID |
| 1714 | + number of the monitoring location (e.g. 02238500), separated by a hyphen |
| 1715 | + (e.g. USGS-02238500). |
| 1716 | + page_size : int, optional |
| 1717 | + The number of results to return per page, where one result represents a |
| 1718 | + monitoring location. The default is 1000. |
| 1719 | + parent_time_series_id: string, optional |
| 1720 | + The parent_time_series_id returns statistics tied to a particular datbase entry. |
| 1721 | + site_type_code: string, optional |
| 1722 | + Site type code query parameter. You can see a list of valid site type codes here: |
| 1723 | + https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. |
| 1724 | + Example: "GW" (Groundwater site) |
| 1725 | + site_type_name: string, optional |
| 1726 | + Site type name query parameter. You can see a list of valid site type names here: |
| 1727 | + https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items. |
| 1728 | + Example: "Well" |
| 1729 | + parameter_code : string or list of strings, optional |
| 1730 | + Parameter codes are 5-digit codes used to identify the constituent |
| 1731 | + measured and the units of measure. A complete list of parameter codes |
| 1732 | + and associated groupings can be found at |
| 1733 | + https://help.waterdata.usgs.gov/codes-and-parameters/parameters. |
| 1734 | + """ |
| 1735 | + valid_services = get_args(STATISTICS_SERVICES) |
| 1736 | + if service not in valid_services: |
| 1737 | + raise ValueError( |
| 1738 | + f"Invalid service: '{service}'. Valid options are: {valid_services}." |
| 1739 | + ) |
| 1740 | + |
| 1741 | + params = { |
| 1742 | + k: v |
| 1743 | + for k, v in locals().items() |
| 1744 | + if k not in ["service"] and v is not None |
| 1745 | + } |
| 1746 | + |
| 1747 | + return get_stats_data( |
| 1748 | + args=params, |
| 1749 | + service=service, |
| 1750 | + ) |
| 1751 | + |
1644 | 1752 |
|
1645 | 1753 | def _check_profiles( |
1646 | 1754 | service: SERVICES, |
|
0 commit comments