Skip to content

Commit 0ef76c8

Browse files
authored
Fixes #392: document and test 3D time-series zonal stats via Dataset (#856)
Add docstring example and tests showing how to compute zonal statistics on 3D time-series DataArrays by converting to a Dataset with `.to_dataset(dim='time')` and passing to `stats()`.
1 parent 356f73d commit 0ef76c8

File tree

2 files changed

+179
-0
lines changed

2 files changed

+179
-0
lines changed

xrspatial/tests/test_zonal.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,169 @@ def test_zonal_stats_inputs_unmodified(backend, data_zones, data_values_2d, resu
619619
assert_input_data_unmodified(data_values_2d, copied_data_values_2d)
620620

621621

622+
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning")
623+
@pytest.mark.filterwarnings("ignore:invalid value encountered in divide:RuntimeWarning")
624+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
625+
def test_stats_3d_timeseries_via_dataset(backend):
626+
"""Convert a 3D time-series DataArray to a Dataset and verify per-timestep stats."""
627+
if 'dask' in backend and not dask_array_available():
628+
pytest.skip("Requires Dask")
629+
630+
zones_data = np.array([[0, 0, 1, 1, 2, 2, 3, 3],
631+
[0, 0, 1, 1, 2, 2, 3, 3],
632+
[0, 0, 1, 1, 2, np.nan, 3, 3]])
633+
values_data = np.asarray([
634+
[0, 0, 1, 1, 2, 2, 3, np.inf],
635+
[0, 0, 1, 1, 2, np.nan, 3, 0],
636+
[np.inf, 0, 1, 1, 2, 2, 3, 3]
637+
])
638+
639+
# Stack original (t0) and doubled (t1) into a 3D DataArray
640+
values_3d = xr.DataArray(
641+
np.stack([values_data, values_data * 2], axis=0),
642+
dims=['time', 'y', 'x'],
643+
coords={'time': ['t0', 't1']},
644+
)
645+
646+
if 'dask' in backend:
647+
zones = xr.DataArray(da.from_array(zones_data, chunks=(3, 4)), dims=['y', 'x'])
648+
values_3d = values_3d.chunk({'y': 3, 'x': 4})
649+
else:
650+
zones = xr.DataArray(zones_data, dims=['y', 'x'])
651+
652+
ds = values_3d.to_dataset(dim='time')
653+
df_result = stats(zones=zones, values=ds)
654+
655+
if 'dask' in backend:
656+
# dask doesn't support majority stat
657+
expected = {
658+
'zone': [0, 1, 2, 3],
659+
't0_mean': [0, 1, 2, 2.4],
660+
't0_max': [0, 1, 2, 3],
661+
't0_min': [0, 1, 2, 0],
662+
't0_sum': [0, 6, 8, 12],
663+
't0_std': [0, 0, 0, 1.2],
664+
't0_var': [0, 0, 0, 1.44],
665+
't0_count': [5, 6, 4, 5],
666+
't1_mean': [0, 2, 4, 4.8],
667+
't1_max': [0, 2, 4, 6],
668+
't1_min': [0, 2, 4, 0],
669+
't1_sum': [0, 12, 16, 24],
670+
't1_std': [0, 0, 0, 2.4],
671+
't1_var': [0, 0, 0, 5.76],
672+
't1_count': [5, 6, 4, 5],
673+
}
674+
else:
675+
expected = {
676+
'zone': [0, 1, 2, 3],
677+
't0_mean': [0, 1, 2, 2.4],
678+
't0_max': [0, 1, 2, 3],
679+
't0_min': [0, 1, 2, 0],
680+
't0_sum': [0, 6, 8, 12],
681+
't0_std': [0, 0, 0, 1.2],
682+
't0_var': [0, 0, 0, 1.44],
683+
't0_count': [5, 6, 4, 5],
684+
't0_majority': [0, 1, 2, 3],
685+
't1_mean': [0, 2, 4, 4.8],
686+
't1_max': [0, 2, 4, 6],
687+
't1_min': [0, 2, 4, 0],
688+
't1_sum': [0, 12, 16, 24],
689+
't1_std': [0, 0, 0, 2.4],
690+
't1_var': [0, 0, 0, 5.76],
691+
't1_count': [5, 6, 4, 5],
692+
't1_majority': [0, 2, 4, 6],
693+
}
694+
695+
check_results(backend, df_result, expected)
696+
697+
698+
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning")
699+
@pytest.mark.filterwarnings("ignore:invalid value encountered in divide:RuntimeWarning")
700+
@pytest.mark.parametrize("backend", ['numpy'])
701+
def test_stats_3d_timeseries_via_dataset_zone_ids(backend):
702+
"""Zone filtering works with Dataset from 3D time-series DataArray."""
703+
zones_data = np.array([[0, 0, 1, 1, 2, 2, 3, 3],
704+
[0, 0, 1, 1, 2, 2, 3, 3],
705+
[0, 0, 1, 1, 2, np.nan, 3, 3]])
706+
values_data = np.asarray([
707+
[0, 0, 1, 1, 2, 2, 3, np.inf],
708+
[0, 0, 1, 1, 2, np.nan, 3, 0],
709+
[np.inf, 0, 1, 1, 2, 2, 3, 3]
710+
])
711+
712+
values_3d = xr.DataArray(
713+
np.stack([values_data, values_data * 2], axis=0),
714+
dims=['time', 'y', 'x'],
715+
coords={'time': ['t0', 't1']},
716+
)
717+
zones = xr.DataArray(zones_data, dims=['y', 'x'])
718+
ds = values_3d.to_dataset(dim='time')
719+
720+
df_result = stats(zones=zones, values=ds, zone_ids=[0, 3])
721+
722+
expected = {
723+
'zone': [0, 3],
724+
't0_mean': [0, 2.4],
725+
't0_max': [0, 3],
726+
't0_min': [0, 0],
727+
't0_sum': [0, 12],
728+
't0_std': [0, 1.2],
729+
't0_var': [0, 1.44],
730+
't0_count': [5, 5],
731+
't0_majority': [0, 3],
732+
't1_mean': [0, 4.8],
733+
't1_max': [0, 6],
734+
't1_min': [0, 0],
735+
't1_sum': [0, 24],
736+
't1_std': [0, 2.4],
737+
't1_var': [0, 5.76],
738+
't1_count': [5, 5],
739+
't1_majority': [0, 6],
740+
}
741+
742+
check_results(backend, df_result, expected)
743+
744+
745+
@pytest.mark.parametrize("backend", ['numpy'])
746+
def test_stats_3d_timeseries_via_dataset_custom_stats(backend):
747+
"""Custom stats_funcs work with Dataset from 3D time-series DataArray."""
748+
zones_data = np.array([[0, 0, 1, 1, 2, 2, 3, 3],
749+
[0, 0, 1, 1, 2, 2, 3, 3],
750+
[0, 0, 1, 1, 2, np.nan, 3, 3]])
751+
values_data = np.asarray([
752+
[0, 0, 1, 1, 2, 2, 3, np.inf],
753+
[0, 0, 1, 1, 2, np.nan, 3, 0],
754+
[np.inf, 0, 1, 1, 2, 2, 3, 3]
755+
])
756+
757+
values_3d = xr.DataArray(
758+
np.stack([values_data, values_data * 2], axis=0),
759+
dims=['time', 'y', 'x'],
760+
coords={'time': ['t0', 't1']},
761+
)
762+
zones = xr.DataArray(zones_data, dims=['y', 'x'])
763+
ds = values_3d.to_dataset(dim='time')
764+
765+
custom_stats = {
766+
'double_sum': _double_sum,
767+
'range': _range,
768+
}
769+
df_result = stats(
770+
zones=zones, values=ds, stats_funcs=custom_stats,
771+
zone_ids=[1, 2], nodata_values=0,
772+
)
773+
774+
expected = {
775+
'zone': [1, 2],
776+
't0_double_sum': [12, 16],
777+
't0_range': [0, 0],
778+
't1_double_sum': [24, 32],
779+
't1_range': [0, 0],
780+
}
781+
782+
check_results(backend, df_result, expected)
783+
784+
622785
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy'])
623786
def test_count_crosstab_2d(backend, data_zones, data_values_2d, result_count_crosstab_2d):
624787
# copy input data to verify they're unchanged after running the function

xrspatial/zonal.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,8 @@ def stats(
464464
When a Dataset is passed, stats are computed for each variable
465465
and columns are prefixed with the variable name
466466
(e.g. ``elevation_mean``).
467+
For 3D time-series DataArrays, convert to a Dataset first using
468+
``.to_dataset(dim='time')`` and pass the resulting Dataset.
467469
468470
zone_ids : list of ints, or floats
469471
List of zones to be included in calculation. If no zone_ids provided,
@@ -571,6 +573,20 @@ def stats(
571573
1 10 27.0 49 5 675 14.21267 202.0 25
572574
2 20 72.0 94 50 1800 14.21267 202.0 25
573575
3 30 77.0 99 55 1925 14.21267 202.0 25
576+
577+
stats() works with 3D time-series DataArrays via Dataset conversion
578+
579+
.. sourcecode:: python
580+
581+
>>> # Convert a 3D time-series DataArray to a Dataset,
582+
>>> # then pass to stats() to get per-timestep statistics.
583+
>>> values_3d = xr.DataArray(
584+
... np.random.rand(2, 10, 10),
585+
... dims=['time', 'dim_0', 'dim_1'],
586+
... coords={'time': [2020, 2021]})
587+
>>> ds = values_3d.to_dataset(dim='time')
588+
>>> stats_df = stats(zones=zones, values=ds)
589+
>>> # Columns: zone, 2020_mean, 2020_max, ..., 2021_mean, 2021_max, ...
574590
"""
575591

576592
# Dataset support: run stats per variable and merge into one DataFrame

0 commit comments

Comments
 (0)