1919 PROFILE_LOOKUP ,
2020 PROFILES ,
2121 SERVICES ,
22- STATISTICS_SERVICES ,
2322)
2423from dataretrieval .waterdata .utils import (
2524 SAMPLES_URL ,
@@ -1646,8 +1645,7 @@ def get_samples(
16461645
16471646 return df , BaseMetadata (response )
16481647
1649- def get_statistics (
1650- service : STATISTICS_SERVICES = "observationNormals" ,
1648+ def get_por_stats (
16511649 approval_status : Optional [str ] = None ,
16521650 computation_type : Optional [Union [str , list [str ]]] = None ,
16531651 country_code : Optional [Union [str , list [str ]]] = None ,
@@ -1661,6 +1659,7 @@ def get_statistics(
16611659 site_type_code : Optional [Union [str , list [str ]]] = None ,
16621660 site_type_name : Optional [Union [str , list [str ]]] = None ,
16631661 parameter_code : Optional [Union [str , list [str ]]] = None ,
1662+ expand_percentiles : bool = True
16641663 ) -> Tuple [pd .DataFrame , BaseMetadata ]:
16651664 """Get water data statistics from the USGS Water Data API.
16661665 This service provides endpoints for access to computations on the
@@ -1697,15 +1696,9 @@ def get_statistics(
16971696 the two-digit state code and YYY is the three-digit county code.
16981697 API defaults to "US:42:103" (Pennsylvania, Pike County).
16991698 start_date: string or datetime, optional
1700- Start date for the query. Its format depends upon the service:
1701- for "observationNormals", it is in the month-day format (MM-DD),
1702- for "observationIntervals", it is in the year-month-day format
1703- (YYYY-MM-DD).
1699+ Start day for the query in the month-day format (MM-DD).
17041700 end_date: string or datetime, optional
1705- End date for the query. Its format depends upon the service:
1706- for "observationNormals", it is in the month-day format (MM-DD),
1707- for "observationIntervals", it is in the year-month-day format
1708- (YYYY-MM-DD).
1701+ End day for the query in the month-day format (MM-DD).
17091702 monitoring_location_id : string or list of strings, optional
17101703 A unique identifier representing a single monitoring location. This
17111704 corresponds to the id field in the monitoring-locations endpoint.
@@ -1731,22 +1724,129 @@ def get_statistics(
17311724 measured and the units of measure. A complete list of parameter codes
17321725 and associated groupings can be found at
17331726 https://help.waterdata.usgs.gov/codes-and-parameters/parameters.
1727+ expand_percentiles : boolean
1728+ Percentile data for a given day of year or month of year by default
1729+ are returned from the service as lists of string values and percentile
1730+ thresholds in the "values" and "percentiles" columns, respectively.
1731+ When `expand_percentiles` is set to True (default), each value and
1732+ percentile threshold specific to a computation id are returned as
1733+ individual rows in the dataframe. Missing percentile values expressed
1734+ as 'nan' in the list of string values are removed from the dataframe
1735+ to save space.
17341736 """
1735- valid_services = get_args (STATISTICS_SERVICES )
1736- if service not in valid_services :
1737- raise ValueError (
1738- f"Invalid service: '{ service } '. Valid options are: { valid_services } ."
1737+ params = {
1738+ k : v
1739+ for k , v in locals ().items ()
1740+ if k not in ["expand_percentiles" ] and v is not None
1741+ }
1742+
1743+ return get_stats_data (
1744+ args = params ,
1745+ service = "observationNormals" ,
1746+ expand_percentiles = expand_percentiles
17391747 )
1748+
1749+ def get_date_range_stats (
1750+ approval_status : Optional [str ] = None ,
1751+ computation_type : Optional [Union [str , list [str ]]] = None ,
1752+ country_code : Optional [Union [str , list [str ]]] = None ,
1753+ state_code : Optional [Union [str , list [str ]]] = None ,
1754+ county_code : Optional [Union [str , list [str ]]] = None ,
1755+ start_date : Optional [str ] = None ,
1756+ end_date : Optional [str ] = None ,
1757+ monitoring_location_id : Optional [Union [str , list [str ]]] = None ,
1758+ page_size : int = 1000 ,
1759+ parent_timeseries_id : Optional [Union [str , list [str ]]] = None ,
1760+ site_type_code : Optional [Union [str , list [str ]]] = None ,
1761+ site_type_name : Optional [Union [str , list [str ]]] = None ,
1762+ parameter_code : Optional [Union [str , list [str ]]] = None ,
1763+ expand_percentiles : bool = True
1764+ ) -> Tuple [pd .DataFrame , BaseMetadata ]:
1765+ """Get water data statistics from the USGS Water Data API.
1766+ This service provides endpoints for access to computations on the
1767+ historical record regarding water conditions, including minimum, maximum,
1768+ mean, median, and percentiles for day of year, month, month-year, and
1769+ water/calendar years. For more information regarding the calculation of
1770+ statistics and other details, please visit the Statistics documentation
1771+ page: https://waterdata.usgs.gov/statistics-documentation/.
17401772
1773+ Note: This API is under active beta development and subject to
1774+ change. Improved handling of significant figures will be
1775+ addressed in a future release.
1776+
1777+ Parameters
1778+ ----------
1779+ service: string, One of the following options: "observationNormals"
1780+ or "observationIntervals". "observationNormals" returns
1781+ day-of-year and month-of-year statistics matching your query,
1782+ while "observationIntervals" returns monthly and annual statistics
1783+ matching your query.
1784+ approval_status: string, optional
1785+ Whether to include approved and/or provisional observations.
1786+ At this time, only approved observations are returned.
1787+ computation_type: string, optional
1788+ Desired statistical computation method. Available values are:
1789+ arithmetic_mean, maximum, median, minimum, percentile.
1790+ country_code: string, optional
1791+ Country query parameter. API defaults to "US".
1792+ state_code: string, optional
1793+ State query parameter. Takes the format "US:XX", where XX is
1794+ the two-digit state code. API defaults to "US:42" (Pennsylvania).
1795+ county_code: string, optional
1796+ County query parameter. Takes the format "US:XX:YYY", where XX is
1797+ the two-digit state code and YYY is the three-digit county code.
1798+ API defaults to "US:42:103" (Pennsylvania, Pike County).
1799+ start_date: string or datetime, optional
1800+ Start date for the query in the year-month-day format
1801+ (YYYY-MM-DD).
1802+ end_date: string or datetime, optional
1803+ End date for the query in the year-month-day format
1804+ (YYYY-MM-DD).
1805+ monitoring_location_id : string or list of strings, optional
1806+ A unique identifier representing a single monitoring location. This
1807+ corresponds to the id field in the monitoring-locations endpoint.
1808+ Monitoring location IDs are created by combining the agency code of the
1809+ agency responsible for the monitoring location (e.g. USGS) with the ID
1810+ number of the monitoring location (e.g. 02238500), separated by a hyphen
1811+ (e.g. USGS-02238500).
1812+ page_size : int, optional
1813+ The number of results to return per page, where one result represents a
1814+ monitoring location. The default is 1000.
1815+ parent_time_series_id: string, optional
1816+ The parent_time_series_id returns statistics tied to a particular datbase entry.
1817+ site_type_code: string, optional
1818+ Site type code query parameter. You can see a list of valid site type codes here:
1819+ https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items.
1820+ Example: "GW" (Groundwater site)
1821+ site_type_name: string, optional
1822+ Site type name query parameter. You can see a list of valid site type names here:
1823+ https://api.waterdata.usgs.gov/ogcapi/v0/collections/site-types/items.
1824+ Example: "Well"
1825+ parameter_code : string or list of strings, optional
1826+ Parameter codes are 5-digit codes used to identify the constituent
1827+ measured and the units of measure. A complete list of parameter codes
1828+ and associated groupings can be found at
1829+ https://help.waterdata.usgs.gov/codes-and-parameters/parameters.
1830+ expand_percentiles : boolean
1831+ Percentile data for a given day of year or month of year by default
1832+ are returned from the service as lists of string values and percentile
1833+ thresholds in the "values" and "percentiles" columns, respectively.
1834+ When `expand_percentiles` is set to True (default), each value and
1835+ percentile threshold specific to a computation id are returned as
1836+ individual rows in the dataframe. Missing percentile values expressed
1837+ as 'nan' in the list of string values are removed from the dataframe
1838+ to save space.
1839+ """
17411840 params = {
17421841 k : v
17431842 for k , v in locals ().items ()
1744- if k not in ["service" , "valid_services " ] and v is not None
1843+ if k not in ["expand_percentiles " ] and v is not None
17451844 }
17461845
17471846 return get_stats_data (
17481847 args = params ,
1749- service = service
1848+ service = "observationIntervals" ,
1849+ expand_percentiles = expand_percentiles
17501850 )
17511851
17521852
0 commit comments