|
11 | 11 | from pyproj import CRS, Transformer |
12 | 12 |
|
13 | 13 | from .index import GeometryIndex |
| 14 | +from .zonal import _zonal_stats_iterative, _zonal_stats_rasterize |
14 | 15 |
|
15 | 16 |
|
16 | 17 | @xr.register_dataarray_accessor("xvec") |
@@ -918,6 +919,119 @@ def to_geodataframe( |
918 | 919 | ) |
919 | 920 | return df |
920 | 921 |
|
| 922 | + def zonal_stats( |
| 923 | + self, |
| 924 | + polygons: Sequence[shapely.Geometry], |
| 925 | + x_coords: Hashable, |
| 926 | + y_coords: Hashable, |
| 927 | + stats: str = "mean", |
| 928 | + name: Hashable = "geometry", |
| 929 | + index: bool = None, |
| 930 | + method: str = "rasterize", |
| 931 | + all_touched: bool = False, |
| 932 | + n_jobs: int = -1, |
| 933 | + **kwargs, |
| 934 | + ): |
| 935 | + """Extract the values from a dataset indexed by a set of geometries |
| 936 | +
|
| 937 | + The CRS of the raster and that of polygons need to be equal. |
| 938 | + Xvec does not verify their equality. |
| 939 | +
|
| 940 | + Parameters |
| 941 | + ---------- |
| 942 | + polygons : Sequence[shapely.Geometry] |
| 943 | + An arrray-like (1-D) of shapely geometries, like a numpy array or |
| 944 | + :class:`geopandas.GeoSeries`. |
| 945 | + x_coords : Hashable |
| 946 | + name of the coordinates containing ``x`` coordinates (i.e. the first value |
| 947 | + in the coordinate pair encoding the vertex of the polygon) |
| 948 | + y_coords : Hashable |
| 949 | + name of the coordinates containing ``y`` coordinates (i.e. the second value |
| 950 | + in the coordinate pair encoding the vertex of the polygon) |
| 951 | + stats : string |
| 952 | + Spatial aggregation statistic method, by default "mean". It supports the |
| 953 | + following statistcs: ['mean', 'median', 'min', 'max', 'sum'] |
| 954 | + name : Hashable, optional |
| 955 | + Name of the dimension that will hold the ``polygons``, by default "geometry" |
| 956 | + index : bool, optional |
| 957 | + If `polygons` is a GeoSeries, ``index=True`` will attach its index as another |
| 958 | + coordinate to the geometry dimension in the resulting object. If |
| 959 | + ``index=None``, the index will be stored if the `polygons.index` is a named |
| 960 | + or non-default index. If ``index=False``, it will never be stored. This is |
| 961 | + useful as an attribute link between the resulting array and the GeoPandas |
| 962 | + object from which the polygons are sourced. |
| 963 | + method : str, optional |
| 964 | + The method of data extraction. The default is ``"rasterize"``, which uses |
| 965 | + :func:`rasterio.features.rasterize` and is faster, but can lead to loss |
| 966 | + of information in case of small polygons. Other option is ``"iterate"``, which |
| 967 | + iterates over polygons and uses :func:`rasterio.features.geometry_mask`. |
| 968 | + all_touched : bool, optional |
| 969 | + If True, all pixels touched by geometries will be considered. If False, only |
| 970 | + pixels whose center is within the polygon or that are selected by |
| 971 | + Bresenham’s line algorithm will be considered. |
| 972 | + n_jobs : int, optional |
| 973 | + Number of parallel threads to use. It is recommended to set this to the |
| 974 | + number of physical cores of the CPU. ``-1`` uses all available cores. Applies |
| 975 | + only if ``method="iterate"``. |
| 976 | + **kwargs : optional |
| 977 | + Keyword arguments to be passed to the aggregation function |
| 978 | + (e.g., ``Dataset.mean(**kwargs)``). |
| 979 | +
|
| 980 | + Returns |
| 981 | + ------- |
| 982 | + Dataset |
| 983 | + A subset of the original object with N-1 dimensions indexed by |
| 984 | + the the GeometryIndex. |
| 985 | +
|
| 986 | + """ |
| 987 | + # TODO: allow multiple stats at the same time (concat along a new axis), |
| 988 | + # TODO: possibly as a list of tuples to include names? |
| 989 | + # TODO: allow callable in stat (via .reduce()) |
| 990 | + if method == "rasterize": |
| 991 | + result = _zonal_stats_rasterize( |
| 992 | + self, |
| 993 | + polygons=polygons, |
| 994 | + x_coords=x_coords, |
| 995 | + y_coords=y_coords, |
| 996 | + stats=stats, |
| 997 | + name=name, |
| 998 | + all_touched=all_touched, |
| 999 | + **kwargs, |
| 1000 | + ) |
| 1001 | + elif method == "iterate": |
| 1002 | + result = _zonal_stats_iterative( |
| 1003 | + self, |
| 1004 | + polygons=polygons, |
| 1005 | + x_coords=x_coords, |
| 1006 | + y_coords=y_coords, |
| 1007 | + stats=stats, |
| 1008 | + name=name, |
| 1009 | + all_touched=all_touched, |
| 1010 | + n_jobs=n_jobs, |
| 1011 | + **kwargs, |
| 1012 | + ) |
| 1013 | + else: |
| 1014 | + raise ValueError( |
| 1015 | + f"method '{method}' is not supported. Allowed options are 'rasterize' " |
| 1016 | + "and 'iterate'." |
| 1017 | + ) |
| 1018 | + |
| 1019 | + # save the index as a data variable |
| 1020 | + if isinstance(polygons, pd.Series): |
| 1021 | + if index is None: |
| 1022 | + if polygons.index.name is not None or not polygons.index.equals( |
| 1023 | + pd.RangeIndex(0, len(polygons)) |
| 1024 | + ): |
| 1025 | + index = True |
| 1026 | + if index: |
| 1027 | + index_name = polygons.index.name if polygons.index.name else "index" |
| 1028 | + result = result.assign_coords({index_name: (name, polygons.index)}) |
| 1029 | + |
| 1030 | + # standardize the shape - each method comes with a different one |
| 1031 | + return result.transpose( |
| 1032 | + name, *tuple(d for d in self._obj.dims if d not in [x_coords, y_coords]) |
| 1033 | + ) |
| 1034 | + |
921 | 1035 | def extract_points( |
922 | 1036 | self, |
923 | 1037 | points: Sequence[shapely.Geometry], |
@@ -965,7 +1079,7 @@ def extract_points( |
965 | 1079 | ``index=None``, the index will be stored if the `points.index` is a named |
966 | 1080 | or non-default index. If ``index=False``, it will never be stored. This is |
967 | 1081 | useful as an attribute link between the resulting array and the GeoPandas |
968 | | - object from which the points are sourced from. |
| 1082 | + object from which the points are sourced. |
969 | 1083 |
|
970 | 1084 | Returns |
971 | 1085 | ------- |
|
0 commit comments