Skip to content

Commit 271bf56

Browse files
committed
Fixes #902: add dask+cupy backend for zonal.stats(), add edge-case tests
Add _stats_dask_cupy() that converts dask+cupy blocks to numpy via map_blocks(x.get()) then delegates to the existing _stats_dask_numpy pipeline. Wire it into the ArrayTypeFunctionMapping dispatcher. Add five new edge-case test groups (18 test cases across backends): - all-NaN zone: documents per-backend empty-zone behavior - single-cell zones: std/var must be 0, not NaN - negative zone IDs: exercises sort-and-stride with negatives - nodata wipes zone: all finite values match nodata_values - zone in subset of blocks: zone present in only some dask chunks
1 parent 39dedec commit 271bf56

File tree

2 files changed

+232
-8
lines changed

2 files changed

+232
-8
lines changed

xrspatial/tests/test_zonal.py

Lines changed: 213 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -404,10 +404,10 @@ def check_results(
404404
)
405405

406406

407-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
407+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy'])
408408
def test_default_stats(backend, data_zones, data_values_2d, result_default_stats,
409409
result_default_stats_no_majority):
410-
if backend == 'cupy' and not has_cuda_and_cupy():
410+
if 'cupy' in backend and not has_cuda_and_cupy():
411411
pytest.skip("Requires CUDA and CuPy")
412412

413413
if 'dask' in backend and not dask_array_available():
@@ -449,10 +449,10 @@ def test_default_stats_dataarray(
449449

450450
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning")
451451
@pytest.mark.filterwarnings("ignore:invalid value encountered in divide:RuntimeWarning")
452-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
452+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy'])
453453
def test_zone_ids_stats(backend, data_zones, data_values_2d, result_zone_ids_stats,
454454
result_zone_ids_stats_no_majority):
455-
if backend == 'cupy' and not has_cuda_and_cupy():
455+
if 'cupy' in backend and not has_cuda_and_cupy():
456456
pytest.skip("Requires CUDA and CuPy")
457457

458458
if 'dask' in backend and not dask_array_available():
@@ -642,7 +642,215 @@ def test_zonal_stats_against_qgis(elevation_raster_no_nans, raster, qgis_zonal_s
642642
check_results('numpy', xrspatial_df_result, qgis_zonal_stats, atol=1e-5)
643643

644644

645-
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy'])
645+
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning")
646+
@pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning")
647+
@pytest.mark.filterwarnings("ignore:invalid value encountered in divide:RuntimeWarning")
648+
@pytest.mark.filterwarnings("ignore:Degrees of freedom:RuntimeWarning")
649+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy'])
650+
def test_stats_all_nan_zone(backend):
651+
"""Zone where every value is NaN should not crash.
652+
653+
Backend quirks: numpy keeps the empty zone with all-NaN stats; the dask
654+
path uses nansum for count/sum so those become 0; cupy drops the empty
655+
zone from the result entirely.
656+
"""
657+
if 'cupy' in backend and not has_cuda_and_cupy():
658+
pytest.skip("Requires CUDA and CuPy")
659+
if 'dask' in backend and not dask_array_available():
660+
pytest.skip("Requires Dask")
661+
662+
zones_data = np.array([[1, 1],
663+
[2, 2]])
664+
values_data = np.array([[np.nan, np.nan], # zone 1: all NaN
665+
[5.0, 7.0]]) # zone 2: normal
666+
667+
zones = create_test_raster(zones_data, backend, chunks=(2, 2))
668+
values = create_test_raster(values_data, backend, chunks=(2, 2))
669+
670+
funcs = ['mean', 'max', 'min', 'sum', 'count']
671+
df_result = stats(zones=zones, values=values, stats_funcs=funcs)
672+
673+
if 'cupy' in backend and 'dask' not in backend:
674+
# cupy drops zones with no valid values
675+
expected = {
676+
'zone': [2],
677+
'mean': [6.0],
678+
'max': [7.0],
679+
'min': [5.0],
680+
'sum': [12.0],
681+
'count': [2],
682+
}
683+
elif 'dask' in backend:
684+
# dask uses nansum reduction, so count/sum of all-NaN become 0
685+
expected = {
686+
'zone': [1, 2],
687+
'mean': [np.nan, 6.0],
688+
'max': [np.nan, 7.0],
689+
'min': [np.nan, 5.0],
690+
'sum': [0.0, 12.0],
691+
'count': [0, 2],
692+
}
693+
else:
694+
# numpy keeps empty zone with NaN for every stat
695+
expected = {
696+
'zone': [1, 2],
697+
'mean': [np.nan, 6.0],
698+
'max': [np.nan, 7.0],
699+
'min': [np.nan, 5.0],
700+
'sum': [np.nan, 12.0],
701+
'count': [np.nan, 2],
702+
}
703+
check_results(backend, df_result, expected)
704+
705+
706+
@pytest.mark.filterwarnings("ignore:invalid value encountered in divide:RuntimeWarning")
707+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy'])
708+
def test_stats_single_cell_zones(backend):
709+
"""Each zone has exactly one cell — std and var must be 0, not NaN."""
710+
if 'cupy' in backend and not has_cuda_and_cupy():
711+
pytest.skip("Requires CUDA and CuPy")
712+
if 'dask' in backend and not dask_array_available():
713+
pytest.skip("Requires Dask")
714+
715+
zones_data = np.array([[1, 2, 3]])
716+
values_data = np.array([[10.0, 20.0, 30.0]])
717+
718+
zones = create_test_raster(zones_data, backend, chunks=(1, 3))
719+
values = create_test_raster(values_data, backend, chunks=(1, 3))
720+
721+
funcs = ['mean', 'max', 'min', 'std', 'var', 'count']
722+
df_result = stats(zones=zones, values=values, stats_funcs=funcs)
723+
724+
expected = {
725+
'zone': [1, 2, 3],
726+
'mean': [10.0, 20.0, 30.0],
727+
'max': [10.0, 20.0, 30.0],
728+
'min': [10.0, 20.0, 30.0],
729+
'std': [0.0, 0.0, 0.0],
730+
'var': [0.0, 0.0, 0.0],
731+
'count': [1, 1, 1],
732+
}
733+
check_results(backend, df_result, expected)
734+
735+
736+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy'])
737+
def test_stats_negative_zone_ids(backend):
738+
"""Negative integers are valid zone IDs."""
739+
if 'cupy' in backend and not has_cuda_and_cupy():
740+
pytest.skip("Requires CUDA and CuPy")
741+
if 'dask' in backend and not dask_array_available():
742+
pytest.skip("Requires Dask")
743+
744+
zones_data = np.array([[-1, -1, 2, 2],
745+
[-1, -1, 2, 2]])
746+
values_data = np.array([[1.0, 3.0, 10.0, 20.0],
747+
[5.0, 7.0, 30.0, 40.0]])
748+
749+
zones = create_test_raster(zones_data, backend, chunks=(2, 2))
750+
values = create_test_raster(values_data, backend, chunks=(2, 2))
751+
752+
funcs = ['mean', 'max', 'min', 'sum', 'count']
753+
df_result = stats(zones=zones, values=values, stats_funcs=funcs)
754+
755+
expected = {
756+
'zone': [-1, 2],
757+
'mean': [4.0, 25.0],
758+
'max': [7.0, 40.0],
759+
'min': [1.0, 10.0],
760+
'sum': [16.0, 100.0],
761+
'count': [4, 4],
762+
}
763+
check_results(backend, df_result, expected)
764+
765+
766+
@pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning")
767+
@pytest.mark.filterwarnings("ignore:Mean of empty slice:RuntimeWarning")
768+
@pytest.mark.filterwarnings("ignore:invalid value encountered in divide:RuntimeWarning")
769+
@pytest.mark.filterwarnings("ignore:Degrees of freedom:RuntimeWarning")
770+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy'])
771+
def test_stats_nodata_wipes_zone(backend):
772+
"""When nodata_values filters out every finite value in a zone, stats are NaN.
773+
774+
Same per-backend quirks as test_stats_all_nan_zone.
775+
"""
776+
if 'cupy' in backend and not has_cuda_and_cupy():
777+
pytest.skip("Requires CUDA and CuPy")
778+
if 'dask' in backend and not dask_array_available():
779+
pytest.skip("Requires Dask")
780+
781+
zones_data = np.array([[1, 1],
782+
[2, 2]])
783+
values_data = np.array([[5.0, 5.0], # zone 1: all values equal to nodata
784+
[3.0, 7.0]]) # zone 2: normal
785+
786+
zones = create_test_raster(zones_data, backend, chunks=(2, 2))
787+
values = create_test_raster(values_data, backend, chunks=(2, 2))
788+
789+
funcs = ['mean', 'max', 'min', 'sum', 'count']
790+
df_result = stats(zones=zones, values=values, stats_funcs=funcs, nodata_values=5)
791+
792+
if 'cupy' in backend and 'dask' not in backend:
793+
expected = {
794+
'zone': [2],
795+
'mean': [5.0],
796+
'max': [7.0],
797+
'min': [3.0],
798+
'sum': [10.0],
799+
'count': [2],
800+
}
801+
elif 'dask' in backend:
802+
expected = {
803+
'zone': [1, 2],
804+
'mean': [np.nan, 5.0],
805+
'max': [np.nan, 7.0],
806+
'min': [np.nan, 3.0],
807+
'sum': [0.0, 10.0],
808+
'count': [0, 2],
809+
}
810+
else:
811+
expected = {
812+
'zone': [1, 2],
813+
'mean': [np.nan, 5.0],
814+
'max': [np.nan, 7.0],
815+
'min': [np.nan, 3.0],
816+
'sum': [np.nan, 10.0],
817+
'count': [np.nan, 2],
818+
}
819+
check_results(backend, df_result, expected)
820+
821+
822+
@pytest.mark.skipif(not dask_array_available(), reason="Requires Dask")
823+
@pytest.mark.parametrize("backend", ['dask+numpy', 'dask+cupy'])
824+
def test_stats_zone_in_subset_of_blocks(backend):
825+
"""A zone present in only some dask blocks must still be reduced correctly."""
826+
if 'cupy' in backend and not has_cuda_and_cupy():
827+
pytest.skip("Requires CUDA and CuPy")
828+
829+
# 2x6 grid, chunked into two 2x3 blocks.
830+
# Zone 1 only in left block, zone 3 only in right block, zone 2 spans both.
831+
zones_data = np.array([[1, 1, 2, 2, 3, 3],
832+
[1, 1, 2, 2, 3, 3]], dtype=float)
833+
values_data = np.array([[2.0, 4.0, 10.0, 20.0, 100.0, 200.0],
834+
[6.0, 8.0, 30.0, 40.0, 300.0, 400.0]])
835+
836+
zones = create_test_raster(zones_data, backend, chunks=(2, 3))
837+
values = create_test_raster(values_data, backend, chunks=(2, 3))
838+
839+
funcs = ['mean', 'max', 'min', 'sum', 'count']
840+
df_result = stats(zones=zones, values=values, stats_funcs=funcs)
841+
842+
expected = {
843+
'zone': [1, 2, 3],
844+
'mean': [5.0, 25.0, 250.0],
845+
'max': [8.0, 40.0, 400.0],
846+
'min': [2.0, 10.0, 100.0],
847+
'sum': [20.0, 100.0, 1000.0],
848+
'count': [4, 4, 4],
849+
}
850+
check_results(backend, df_result, expected)
851+
852+
853+
@pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy'])
646854
def test_zonal_stats_inputs_unmodified(backend, data_zones, data_values_2d, result_default_stats):
647855
if backend == 'cupy' and not has_cuda_and_cupy():
648856
pytest.skip("Requires CUDA and CuPy")

xrspatial/zonal.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,24 @@ def _stats_cupy(
454454
return stats_df
455455

456456

457+
def _stats_dask_cupy(
458+
zones,
459+
values,
460+
zone_ids,
461+
stats_funcs,
462+
nodata_values,
463+
):
464+
zones_cpu = zones.map_blocks(
465+
lambda x: x.get(), dtype=zones.dtype, meta=np.array(()),
466+
)
467+
values_cpu = values.map_blocks(
468+
lambda x: x.get(), dtype=values.dtype, meta=np.array(()),
469+
)
470+
return _stats_dask_numpy(
471+
zones_cpu, values_cpu, zone_ids, stats_funcs, nodata_values,
472+
)
473+
474+
457475
def stats(
458476
zones: xr.DataArray,
459477
values: xr.DataArray,
@@ -684,9 +702,7 @@ def stats(
684702
numpy_func=lambda *args: _stats_numpy(*args, return_type=return_type),
685703
dask_func=_stats_dask_numpy,
686704
cupy_func=_stats_cupy,
687-
dask_cupy_func=lambda *args: not_implemented_func(
688-
*args, messages='stats() does not support dask with cupy backed DataArray' # noqa
689-
),
705+
dask_cupy_func=_stats_dask_cupy,
690706
)
691707
result = mapper(values)(
692708
zones.data, values.data, zone_ids, stats_funcs_dict, nodata_values,

0 commit comments

Comments
 (0)