|
28 | 28 | METADATA_COLLECTIONS, |
29 | 29 | PROFILES, |
30 | 30 | SERVICES, |
| 31 | + WATERDATA_SERVICES, |
31 | 32 | ) |
32 | 33 | from dataretrieval.waterdata.utils import ( |
| 34 | + _OUTPUT_ID_BY_SERVICE, |
| 35 | + GEOPANDAS, |
33 | 36 | SAMPLES_URL, |
| 37 | + _as_str_list, |
34 | 38 | _check_profiles, |
| 39 | + _construct_cql_request, |
35 | 40 | _default_headers, |
| 41 | + _finalize_ogc, |
36 | 42 | _get_args, |
37 | 43 | _raise_for_non_200, |
| 44 | + _run_sync, |
| 45 | + _switch_properties_id, |
| 46 | + _walk_pages, |
38 | 47 | get_ogc_data, |
39 | 48 | get_stats_data, |
40 | 49 | ) |
@@ -252,12 +261,11 @@ def get_daily( |
252 | 261 | ... ) |
253 | 262 | """ |
254 | 263 | service = "daily" |
255 | | - output_id = "daily_id" |
256 | 264 |
|
257 | 265 | # Build argument dictionary, omitting None values |
258 | 266 | args = _get_args(locals()) |
259 | 267 |
|
260 | | - return get_ogc_data(args, output_id, service) |
| 268 | + return get_ogc_data(args, service) |
261 | 269 |
|
262 | 270 |
|
263 | 271 | def get_continuous( |
@@ -440,12 +448,11 @@ def get_continuous( |
440 | 448 | ... ) |
441 | 449 | """ |
442 | 450 | service = "continuous" |
443 | | - output_id = "continuous_id" |
444 | 451 |
|
445 | 452 | # Build argument dictionary, omitting None values |
446 | 453 | args = _get_args(locals()) |
447 | 454 |
|
448 | | - return get_ogc_data(args, output_id, service) |
| 455 | + return get_ogc_data(args, service) |
449 | 456 |
|
450 | 457 |
|
451 | 458 | def get_monitoring_locations( |
@@ -738,12 +745,11 @@ def get_monitoring_locations( |
738 | 745 | ... ) |
739 | 746 | """ |
740 | 747 | service = "monitoring-locations" |
741 | | - output_id = "monitoring_location_id" |
742 | 748 |
|
743 | 749 | # Build argument dictionary, omitting None values |
744 | 750 | args = _get_args(locals()) |
745 | 751 |
|
746 | | - return get_ogc_data(args, output_id, service) |
| 752 | + return get_ogc_data(args, service) |
747 | 753 |
|
748 | 754 |
|
749 | 755 | def get_time_series_metadata( |
@@ -961,12 +967,11 @@ def get_time_series_metadata( |
961 | 967 | ... ) |
962 | 968 | """ |
963 | 969 | service = "time-series-metadata" |
964 | | - output_id = "time_series_id" |
965 | 970 |
|
966 | 971 | # Build argument dictionary, omitting None values |
967 | 972 | args = _get_args(locals()) |
968 | 973 |
|
969 | | - return get_ogc_data(args, output_id, service) |
| 974 | + return get_ogc_data(args, service) |
970 | 975 |
|
971 | 976 |
|
972 | 977 | def get_combined_metadata( |
@@ -1194,11 +1199,10 @@ def get_combined_metadata( |
1194 | 1199 |
|
1195 | 1200 | """ |
1196 | 1201 | service = "combined-metadata" |
1197 | | - output_id = "combined_meta_id" |
1198 | 1202 |
|
1199 | 1203 | args = _get_args(locals()) |
1200 | 1204 |
|
1201 | | - return get_ogc_data(args, output_id, service) |
| 1205 | + return get_ogc_data(args, service) |
1202 | 1206 |
|
1203 | 1207 |
|
1204 | 1208 | def get_latest_continuous( |
@@ -1388,12 +1392,11 @@ def get_latest_continuous( |
1388 | 1392 | ... ) |
1389 | 1393 | """ |
1390 | 1394 | service = "latest-continuous" |
1391 | | - output_id = "latest_continuous_id" |
1392 | 1395 |
|
1393 | 1396 | # Build argument dictionary, omitting None values |
1394 | 1397 | args = _get_args(locals()) |
1395 | 1398 |
|
1396 | | - return get_ogc_data(args, output_id, service) |
| 1399 | + return get_ogc_data(args, service) |
1397 | 1400 |
|
1398 | 1401 |
|
1399 | 1402 | def get_latest_daily( |
@@ -1584,12 +1587,11 @@ def get_latest_daily( |
1584 | 1587 | ... ) |
1585 | 1588 | """ |
1586 | 1589 | service = "latest-daily" |
1587 | | - output_id = "latest_daily_id" |
1588 | 1590 |
|
1589 | 1591 | # Build argument dictionary, omitting None values |
1590 | 1592 | args = _get_args(locals()) |
1591 | 1593 |
|
1592 | | - return get_ogc_data(args, output_id, service) |
| 1594 | + return get_ogc_data(args, service) |
1593 | 1595 |
|
1594 | 1596 |
|
1595 | 1597 | def get_field_measurements( |
@@ -1774,12 +1776,11 @@ def get_field_measurements( |
1774 | 1776 | ... ) |
1775 | 1777 | """ |
1776 | 1778 | service = "field-measurements" |
1777 | | - output_id = "field_measurement_id" |
1778 | 1779 |
|
1779 | 1780 | # Build argument dictionary, omitting None values |
1780 | 1781 | args = _get_args(locals()) |
1781 | 1782 |
|
1782 | | - return get_ogc_data(args, output_id, service) |
| 1783 | + return get_ogc_data(args, service) |
1783 | 1784 |
|
1784 | 1785 |
|
1785 | 1786 | def get_field_measurements_metadata( |
@@ -1892,11 +1893,10 @@ def get_field_measurements_metadata( |
1892 | 1893 |
|
1893 | 1894 | """ |
1894 | 1895 | service = "field-measurements-metadata" |
1895 | | - output_id = "field_series_id" |
1896 | 1896 |
|
1897 | 1897 | args = _get_args(locals()) |
1898 | 1898 |
|
1899 | | - return get_ogc_data(args, output_id, service) |
| 1899 | + return get_ogc_data(args, service) |
1900 | 1900 |
|
1901 | 1901 |
|
1902 | 1902 | def get_peaks( |
@@ -2012,11 +2012,10 @@ def get_peaks( |
2012 | 2012 |
|
2013 | 2013 | """ |
2014 | 2014 | service = "peaks" |
2015 | | - output_id = "peak_id" |
2016 | 2015 |
|
2017 | 2016 | args = _get_args(locals()) |
2018 | 2017 |
|
2019 | | - return get_ogc_data(args, output_id, service) |
| 2018 | + return get_ogc_data(args, service) |
2020 | 2019 |
|
2021 | 2020 |
|
2022 | 2021 | def get_reference_table( |
@@ -2846,8 +2845,148 @@ def get_channel( |
2846 | 2845 | ... ) |
2847 | 2846 | """ |
2848 | 2847 | service = "channel-measurements" |
2849 | | - output_id = "channel_measurements_id" |
2850 | 2848 |
|
2851 | 2849 | args = _get_args(locals()) |
2852 | 2850 |
|
2853 | | - return get_ogc_data(args, output_id, service) |
| 2851 | + return get_ogc_data(args, service) |
| 2852 | + |
| 2853 | + |
| 2854 | +def get_cql( |
| 2855 | + service: WATERDATA_SERVICES, |
| 2856 | + cql: str | dict, |
| 2857 | + *, |
| 2858 | + properties: str | Iterable[str] | None = None, |
| 2859 | + bbox: list[float] | None = None, |
| 2860 | + limit: int | None = None, |
| 2861 | + skip_geometry: bool | None = None, |
| 2862 | + convert_type: bool = True, |
| 2863 | +) -> tuple[pd.DataFrame, BaseMetadata]: |
| 2864 | + """Query a Water Data OGC API collection with an arbitrary CQL2 filter. |
| 2865 | +
|
| 2866 | + Sends ``cql`` as a CQL2 filter against ``service`` and returns the matching |
| 2867 | + features, shaped like the typed getters (``get_daily``, ``get_continuous``, |
| 2868 | + …): the wire ``id`` renamed to the service's id column, columns ordered and |
| 2869 | + sorted, and dtypes coerced. Use it when you need a predicate the typed |
| 2870 | + getters can't express — a top-level ``or``, ``like`` with ``%`` wildcards, |
| 2871 | + comparison operators, nested boolean trees, or a geometry predicate beyond a |
| 2872 | + bounding box; prefer a typed getter when one covers the query. |
| 2873 | +
|
| 2874 | + The request is a single POST with the ``cql`` body sent verbatim, so there |
| 2875 | + are no multi-value arguments to chunk: narrow a query whose URL or body |
| 2876 | + would exceed the server's size cap rather than relying on automatic |
| 2877 | + chunking. |
| 2878 | +
|
| 2879 | + The CQL2 grammar is documented at |
| 2880 | + https://api.waterdata.usgs.gov/docs/ogcapi/complex-queries/. |
| 2881 | +
|
| 2882 | + Parameters |
| 2883 | + ---------- |
| 2884 | + service : str |
| 2885 | + OGC collection name. Must be one of |
| 2886 | + :data:`dataretrieval.waterdata.types.WATERDATA_SERVICES` |
| 2887 | + (e.g. ``"daily"``, ``"monitoring-locations"``). |
| 2888 | + cql : str or dict |
| 2889 | + CQL2 query. A ``dict`` is JSON-serialized for transport; a ``str`` is |
| 2890 | + sent through unchanged. The query goes into the HTTP POST body with |
| 2891 | + ``Content-Type: application/query-cql-json``. |
| 2892 | + properties : str or iterable of str, optional |
| 2893 | + Server-side property whitelist (passed as ``properties=`` on the URL). |
| 2894 | + Reduces payload size. ``"id"`` resolves to the service's ``output_id`` |
| 2895 | + (e.g. ``daily_id``) the same way it does in the typed wrappers. |
| 2896 | + bbox : list of float, optional |
| 2897 | + Bounding box ``[xmin, ymin, xmax, ymax]`` in CRS 4326. Combines with the |
| 2898 | + CQL filter as an additional spatial predicate. |
| 2899 | + limit : int, optional |
| 2900 | + Page size, clamped server-side to 50,000. |
| 2901 | + skip_geometry : bool, optional |
| 2902 | + If True, the server omits geometry from each feature |
| 2903 | + (``skipGeometry=true``). |
| 2904 | + convert_type : bool, default True |
| 2905 | + Coerce date/datetime/numeric columns to typed dtypes after the |
| 2906 | + DataFrame is built. |
| 2907 | +
|
| 2908 | + Returns |
| 2909 | + ------- |
| 2910 | + df : pandas.DataFrame or geopandas.GeoDataFrame |
| 2911 | + Result of the query. GeoDataFrame when ``geopandas`` is installed and |
| 2912 | + geometry is present. |
| 2913 | + md : :class:`dataretrieval.utils.BaseMetadata` |
| 2914 | + Request metadata (URL, query time, response headers). |
| 2915 | +
|
| 2916 | + Examples |
| 2917 | + -------- |
| 2918 | + .. code:: |
| 2919 | +
|
| 2920 | + >>> # Daily values for two parameter codes at two sites |
| 2921 | + >>> # (compound AND-of-INs). |
| 2922 | + >>> from dataretrieval import waterdata |
| 2923 | + >>> cql = { |
| 2924 | + ... "op": "and", |
| 2925 | + ... "args": [ |
| 2926 | + ... { |
| 2927 | + ... "op": "in", |
| 2928 | + ... "args": [ |
| 2929 | + ... {"property": "parameter_code"}, |
| 2930 | + ... ["00060", "00065"], |
| 2931 | + ... ], |
| 2932 | + ... }, |
| 2933 | + ... { |
| 2934 | + ... "op": "in", |
| 2935 | + ... "args": [ |
| 2936 | + ... {"property": "monitoring_location_id"}, |
| 2937 | + ... ["USGS-07367300", "USGS-03277200"], |
| 2938 | + ... ], |
| 2939 | + ... }, |
| 2940 | + ... ], |
| 2941 | + ... } |
| 2942 | + >>> df, md = waterdata.get_cql(service="daily", cql=cql) |
| 2943 | +
|
| 2944 | + >>> # Monitoring locations whose HUC starts with "02070010" |
| 2945 | + >>> # (LIKE with the CQL2 ``%`` wildcard). |
| 2946 | + >>> df, md = waterdata.get_cql( |
| 2947 | + ... service="monitoring-locations", |
| 2948 | + ... cql='{"op": "like", "args": [' |
| 2949 | + ... '{"property": "hydrologic_unit_code"},' |
| 2950 | + ... ' "02070010%"]}', |
| 2951 | + ... ) |
| 2952 | + """ |
| 2953 | + if service not in _OUTPUT_ID_BY_SERVICE: |
| 2954 | + raise ValueError( |
| 2955 | + f"Unknown service {service!r}. Valid services: " |
| 2956 | + f"{sorted(_OUTPUT_ID_BY_SERVICE)}." |
| 2957 | + ) |
| 2958 | + output_id = _OUTPUT_ID_BY_SERVICE[service] |
| 2959 | + |
| 2960 | + # ``dict`` is the pythonic input — serialize on the way out. ``str`` is sent |
| 2961 | + # verbatim so callers who already have a CQL2 doc (e.g. imported from a |
| 2962 | + # config file) don't need to re-parse it. |
| 2963 | + body = json.dumps(cql, separators=(",", ":")) if isinstance(cql, dict) else cql |
| 2964 | + |
| 2965 | + properties_list = _as_str_list(properties, "properties") |
| 2966 | + |
| 2967 | + # Translate user-facing names (``daily_id``/``id``) to the wire ``id`` the |
| 2968 | + # OGC API expects, matching the typed getters. |
| 2969 | + wire_properties = _switch_properties_id(properties_list, output_id, service) |
| 2970 | + |
| 2971 | + req = _construct_cql_request( |
| 2972 | + service, |
| 2973 | + body, |
| 2974 | + properties=wire_properties, |
| 2975 | + bbox=bbox, |
| 2976 | + limit=limit, |
| 2977 | + skip_geometry=skip_geometry, |
| 2978 | + ) |
| 2979 | + |
| 2980 | + async def _run() -> tuple[pd.DataFrame, httpx.Response]: |
| 2981 | + return await _walk_pages(geopd=GEOPANDAS, req=req) |
| 2982 | + |
| 2983 | + df, response = _run_sync(_run, service=service) |
| 2984 | + |
| 2985 | + return _finalize_ogc( |
| 2986 | + df, |
| 2987 | + response, |
| 2988 | + properties=properties_list, |
| 2989 | + output_id=output_id, |
| 2990 | + convert_type=convert_type, |
| 2991 | + service=service, |
| 2992 | + ) |
0 commit comments