From 03285678c66e75a0173ac0e36720bf187f5e887e Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Fri, 27 Feb 2026 07:26:09 -0800
Subject: [PATCH 1/2] Fixes #907, #908: make bump dask backends truly lazy, add
 agg parameter

Rewrite _bump_dask_numpy and _bump_dask_cupy to use da.map_blocks
instead of eagerly allocating the full output array, which would OOM
on large rasters. Each chunk now independently filters bumps whose
center falls within it and computes only the chunk-sized result.

Also add an `agg` template parameter to bump() with full backend
dispatch (numpy, cupy, dask+numpy, dask+cupy) via ArrayTypeFunctionMapping.

Tests use single-chunk for bitwise equality and multi-chunk for
approximate verification of the lazy chunked path.
---
 xrspatial/bump.py            | 130 +++++++++++++++++++++++++++++-----
 xrspatial/tests/test_bump.py | 132 +++++++++++++++++++++++++++++++++++
 2 files changed, 246 insertions(+), 16 deletions(-)

diff --git a/xrspatial/bump.py b/xrspatial/bump.py
index 8605be66..0c7b8792 100644
--- a/xrspatial/bump.py
+++ b/xrspatial/bump.py
@@ -4,9 +4,18 @@
 import xarray as xr
 from xarray import DataArray
 
-from xrspatial.utils import _validate_scalar, ngjit
+try:
+    import cupy
+except ImportError:
+    class cupy(object):
+        ndarray = False
 
-# TODO: change parameters to take agg instead of height / width
+try:
+    import dask.array as da
+except ImportError:
+    da = None
+
+from xrspatial.utils import ArrayTypeFunctionMapping, _validate_scalar, ngjit
 
 
 @ngjit
@@ -28,11 +37,75 @@ def _finish_bump(width, height, locs, heights, spread):
     return out
 
 
-def bump(width: int,
-         height: int,
+def _bump_numpy(data, width, height, locs, heights, spread):
+    return _finish_bump(width, height, locs, heights, spread)
+
+
+def _bump_cupy(data, width, height, locs, heights, spread):
+    return cupy.asarray(_finish_bump(width, height, locs, heights, spread))
+
+
+def _bump_dask_numpy(data, width, height, locs, heights, spread):
+    def _chunk_bump(block, block_info=None):
+        info = block_info[0]
+        y_start, y_end = info['array-location'][0]
+        x_start, x_end = info['array-location'][1]
+        chunk_h, chunk_w = block.shape
+
+        mask = ((locs[:, 0] >= x_start) & (locs[:, 0] < x_end) &
+                (locs[:, 1] >= y_start) & (locs[:, 1] < y_end))
+
+        if not np.any(mask):
+            return np.zeros((chunk_h, chunk_w))
+
+        local_locs = locs[mask].copy()
+        local_locs[:, 0] -= x_start
+        local_locs[:, 1] -= y_start
+
+        return _finish_bump(chunk_w, chunk_h, local_locs, heights[mask], spread)
+
+    return da.map_blocks(
+        _chunk_bump, data,
+        dtype=np.float64,
+        meta=np.array((), dtype=np.float64),
+    )
+
+
+def _bump_dask_cupy(data, width, height, locs, heights, spread):
+    def _chunk_bump(block, block_info=None):
+        info = block_info[0]
+        y_start, y_end = info['array-location'][0]
+        x_start, x_end = info['array-location'][1]
+        chunk_h, chunk_w = block.shape
+
+        mask = ((locs[:, 0] >= x_start) & (locs[:, 0] < x_end) &
+                (locs[:, 1] >= y_start) & (locs[:, 1] < y_end))
+
+        if not np.any(mask):
+            return cupy.zeros((chunk_h, chunk_w))
+
+        local_locs = locs[mask].copy()
+        local_locs[:, 0] -= x_start
+        local_locs[:, 1] -= y_start
+
+        return cupy.asarray(
+            _finish_bump(chunk_w, chunk_h, local_locs, heights[mask], spread)
+        )
+
+    return da.map_blocks(
+        _chunk_bump, data,
+        dtype=np.float64,
+        meta=cupy.array((), dtype=np.float64),
+    )
+
+
+def bump(width: int = None,
+         height: int = None,
          count: Optional[int] = None,
          height_func=None,
-         spread: int = 1) -> xr.DataArray:
+         spread: int = 1,
+         *,
+         agg: xr.DataArray = None) -> xr.DataArray:
     """
     Generate a simple bump map to simulate the appearance of land
     features.
@@ -43,10 +116,12 @@ def bump(width: int,
 
     Parameters
     ----------
-    width : int
+    width : int, optional
         Total width, in pixels, of the image.
-    height : int
+        Not required when ``agg`` is provided.
+    height : int, optional
         Total height, in pixels, of the image.
+        Not required when ``agg`` is provided.
     count : int
         Number of bumps to generate.
     height_func : function which takes x, y and returns a height value
@@ -54,6 +129,10 @@ def bump(width: int,
         elevations.
     spread : int, default=1
         Number of pixels to spread on all sides.
+    agg : xarray.DataArray, optional
+        Template raster whose shape, chunks, and backend (NumPy, CuPy,
+        Dask, Dask+CuPy) determine the output type.  When provided,
+        ``width`` and ``height`` are inferred from ``agg.shape``.
 
     Returns
     -------
@@ -194,17 +273,23 @@ def heights(locations, src, src_range, height = 20):
             Description:  Example Bump Map
             units:        km
     """
-    _validate_scalar(width, func_name='bump', name='width', dtype=int, min_val=1)
-    _validate_scalar(height, func_name='bump', name='height', dtype=int, min_val=1)
-
-    linx = range(width)
-    liny = range(height)
+    if agg is not None:
+        h, w = agg.shape
+    else:
+        _validate_scalar(width, func_name='bump', name='width',
+                         dtype=int, min_val=1)
+        _validate_scalar(height, func_name='bump', name='height',
+                         dtype=int, min_val=1)
+        w, h = width, height
+
+    linx = range(w)
+    liny = range(h)
 
     if count is None:
-        count = width * height // 10
+        count = w * h // 10
 
     if height_func is None:
-        height_func = lambda bumps: np.ones(len(bumps)) # noqa
+        height_func = lambda bumps: np.ones(len(bumps))  # noqa
 
     # create 2d array of random x, y for bump locations
     locs = np.empty((count, 2), dtype=np.uint16)
@@ -213,5 +298,18 @@ def heights(locations, src, src_range, height = 20):
 
     heights = height_func(locs)
 
-    bumps = _finish_bump(width, height, locs, heights, spread)
-    return DataArray(bumps, dims=['y', 'x'], attrs=dict(res=1))
+    if agg is not None:
+        mapper = ArrayTypeFunctionMapping(
+            numpy_func=_bump_numpy,
+            cupy_func=_bump_cupy,
+            dask_func=_bump_dask_numpy,
+            dask_cupy_func=_bump_dask_cupy,
+        )
+        out = mapper(agg)(agg.data, w, h, locs, heights, spread)
+        return DataArray(out,
+                         coords=agg.coords,
+                         dims=agg.dims,
+                         attrs=dict(res=1))
+    else:
+        bumps = _finish_bump(w, h, locs, heights, spread)
+        return DataArray(bumps, dims=['y', 'x'], attrs=dict(res=1))
diff --git a/xrspatial/tests/test_bump.py b/xrspatial/tests/test_bump.py
index d90fc9fa..5de81846 100644
--- a/xrspatial/tests/test_bump.py
+++ b/xrspatial/tests/test_bump.py
@@ -1,6 +1,138 @@
+try:
+    import dask.array as da
+except ImportError:
+    da = None
+
+import numpy as np
+import xarray as xr
+
 from xrspatial import bump
+from xrspatial.tests.general_checks import (
+    cuda_and_cupy_available,
+    dask_array_available,
+)
+from xrspatial.utils import has_cuda_and_cupy
 
 
 def test_bump():
     bumps = bump(20, 20)
     assert bumps is not None
+
+
+def test_bump_agg_numpy():
+    agg = xr.DataArray(np.zeros((20, 30)), dims=['y', 'x'])
+    np.random.seed(42)
+    result = bump(agg=agg)
+    assert isinstance(result, xr.DataArray)
+    assert result.shape == (20, 30)
+    assert isinstance(result.data, np.ndarray)
+
+    # determinism: same seed → same output
+    np.random.seed(42)
+    result2 = bump(agg=agg)
+    np.testing.assert_array_equal(result.values, result2.values)
+
+
+@cuda_and_cupy_available
+def test_bump_agg_cupy():
+    import cupy
+
+    agg_np = xr.DataArray(np.zeros((20, 30)), dims=['y', 'x'])
+    agg_cp = agg_np.copy()
+    agg_cp.data = cupy.asarray(agg_cp.data)
+
+    np.random.seed(42)
+    result_np = bump(agg=agg_np)
+
+    np.random.seed(42)
+    result_cp = bump(agg=agg_cp)
+
+    assert isinstance(result_cp.data, cupy.ndarray)
+    np.testing.assert_array_equal(result_np.values, result_cp.data.get())
+
+
+@dask_array_available
+def test_bump_agg_dask():
+    """Single chunk (no internal boundaries) → bitwise match with numpy."""
+    agg_np = xr.DataArray(np.zeros((20, 30)), dims=['y', 'x'])
+    agg_dask = agg_np.copy()
+    agg_dask.data = da.from_array(agg_dask.data, chunks=(20, 30))
+
+    np.random.seed(42)
+    result_np = bump(agg=agg_np)
+
+    np.random.seed(42)
+    result_dask = bump(agg=agg_dask)
+
+    assert isinstance(result_dask.data, da.Array)
+    np.testing.assert_array_equal(result_np.values, result_dask.values)
+
+
+@dask_array_available
+def test_bump_agg_dask_chunked():
+    """Multiple chunks: verify laziness, shape, and approximate values."""
+    agg_np = xr.DataArray(np.zeros((20, 30)), dims=['y', 'x'])
+    agg_dask = agg_np.copy()
+    agg_dask.data = da.from_array(agg_dask.data, chunks=(10, 15))
+
+    np.random.seed(42)
+    result_np = bump(agg=agg_np, spread=1)
+
+    np.random.seed(42)
+    result_dask = bump(agg=agg_dask, spread=1)
+
+    assert isinstance(result_dask.data, da.Array)
+    assert result_dask.shape == (20, 30)
+    computed = result_dask.values
+    # With spread=1, edge effects are minimal; total energy should be close
+    np.testing.assert_allclose(computed.sum(), result_np.values.sum(), rtol=0.1)
+
+
+@dask_array_available
+@cuda_and_cupy_available
+def test_bump_agg_dask_cupy():
+    """Single chunk (no internal boundaries) → bitwise match with numpy."""
+    import cupy
+
+    agg_np = xr.DataArray(np.zeros((20, 30)), dims=['y', 'x'])
+    agg_dc = agg_np.copy()
+    agg_dc.data = da.from_array(
+        cupy.asarray(agg_dc.data), chunks=(20, 30)
+    )
+
+    np.random.seed(42)
+    result_np = bump(agg=agg_np)
+
+    np.random.seed(42)
+    result_dc = bump(agg=agg_dc)
+
+    assert isinstance(result_dc.data, da.Array)
+    computed = result_dc.data.compute()
+    np.testing.assert_array_equal(result_np.values, computed.get())
+
+
+def test_bump_preserves_coords():
+    ys = np.linspace(0, 1, 10)
+    xs = np.linspace(0, 2, 20)
+    agg = xr.DataArray(
+        np.zeros((10, 20)),
+        dims=['lat', 'lon'],
+        coords={'lat': ys, 'lon': xs},
+    )
+    result = bump(agg=agg)
+    assert list(result.dims) == ['lat', 'lon']
+    np.testing.assert_array_equal(result.coords['lat'].values, ys)
+    np.testing.assert_array_equal(result.coords['lon'].values, xs)
+
+
+def test_bump_agg_infers_shape():
+    """When agg is given, width/height are inferred — no need to pass them."""
+    agg = xr.DataArray(np.zeros((15, 25)), dims=['y', 'x'])
+    np.random.seed(42)
+    result = bump(agg=agg)
+    assert result.shape == (15, 25)
+
+    # Equivalent to explicit width/height
+    np.random.seed(42)
+    result2 = bump(width=25, height=15)
+    np.testing.assert_array_equal(result.values, result2.values)

From 8055700f4d083fe4215fa31f2dd4624ba3630ac1 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Fri, 27 Feb 2026 07:28:04 -0800
Subject: [PATCH 2/2] Add dask+cupy backends for hotspots and
 emerging_hotspots, update README

- Implement _hotspots_dask_cupy: GPU convolution + CPU classification via map_overlap
- Implement _emerging_hotspots_dask_cupy: GPU convolution, CPU Mann-Kendall via map_blocks
- Replace not_implemented_func stubs with real dask_cupy_func in both APIs
- Add tests verifying dask+cupy results match numpy reference
- Update README backend support table for hotspots, emerging hotspots, and bump
---
 README.md                                 |   5 +-
 xrspatial/emerging_hotspots.py            | 108 +++++++++++++++++++++-
 xrspatial/focal.py                        |  42 ++++++++-
 xrspatial/tests/test_dask_cupy_gaps.py    |  74 +++++++++++++++
 xrspatial/tests/test_emerging_hotspots.py |  35 +++++--
 5 files changed, 247 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index b41bbae3..7e993cf7 100644
--- a/README.md
+++ b/README.md
@@ -156,7 +156,8 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e
 | Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray |
 |:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:|
 | [Apply](xrspatial/focal.py) | Applies a custom function over a sliding neighborhood window | ✅️ | ✅️ | ✅️ | ✅️ |
-| [Hotspots](xrspatial/focal.py) | Identifies statistically significant spatial clusters using Getis-Ord Gi* | ✅️ | ✅️ | ✅️ |  |
+| [Hotspots](xrspatial/focal.py) | Identifies statistically significant spatial clusters using Getis-Ord Gi* | ✅️ | ✅️ | ✅️ | ✅️ |
+| [Emerging Hotspots](xrspatial/emerging_hotspots.py) | Classifies time-series hot/cold spot trends using Gi* and Mann-Kendall | ✅️ | ✅️ | ✅️ | ✅️ |
 | [Mean](xrspatial/focal.py) | Computes the mean value within a sliding neighborhood window | ✅️ | ✅️ | ✅️ | ✅️ |
 | [Focal Statistics](xrspatial/focal.py) | Computes summary statistics over a sliding neighborhood window | ✅️ | ✅️ | ✅️ | ✅️ |
 
@@ -228,7 +229,7 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e
 | [Viewshed](xrspatial/viewshed.py) | Determines visible cells from a given observer point on terrain | ✅️ | ✅️ | ✅️ | ✅️ |
 | [Min Observable Height](xrspatial/experimental/min_observable_height.py) | Finds the minimum observer height needed to see each cell *(experimental)* | ✅️ | | | |
 | [Perlin Noise](xrspatial/perlin.py) | Generates smooth continuous random noise for procedural textures | ✅️ | ✅️ | ✅️ | ✅️ |
-| [Bump Mapping](xrspatial/bump.py) | Adds randomized bump features to simulate natural terrain variation | ✅️ | | | |
+| [Bump Mapping](xrspatial/bump.py) | Adds randomized bump features to simulate natural terrain variation | ✅️ | ✅️ | ✅️ | ✅️ |
 
 -----------
 
diff --git a/xrspatial/emerging_hotspots.py b/xrspatial/emerging_hotspots.py
index eee8749b..cfd978c8 100644
--- a/xrspatial/emerging_hotspots.py
+++ b/xrspatial/emerging_hotspots.py
@@ -22,7 +22,7 @@
 import xarray as xr
 from numba import prange
 
-from xrspatial.convolution import convolve_2d, _convolve_2d_numpy
+from xrspatial.convolution import convolve_2d, _convolve_2d_numpy, _convolve_2d_cupy
 from xrspatial.focal import _calc_hotspots_numpy
 from xrspatial.utils import (
     ArrayTypeFunctionMapping,
@@ -527,6 +527,106 @@ def _emerging_hotspots_dask_numpy(raster, kernel, boundary='nan'):
     return gi_zscore, gi_bin, mk_result
 
 
+# ---------------------------------------------------------------------------
+# Dask + CuPy backend (GPU convolution, CPU Mann-Kendall)
+# ---------------------------------------------------------------------------
+
+def _convolve_and_zscore_cupy_chunk(chunk, kernel, global_mean, global_std):
+    """Dask chunk function: GPU convolve -> z-score."""
+    convolved = _convolve_2d_cupy(chunk, kernel)
+    return ((convolved - global_mean) / global_std).astype(cupy.float32)
+
+
+def _gi_bin_cupy_chunk(block):
+    """Dask chunk function: z-scores -> confidence bins (CPU classify, CuPy I/O)."""
+    block_cpu = cupy.asnumpy(block)
+    n_times = block_cpu.shape[0]
+    out = np.empty_like(block_cpu, dtype=np.int8)
+    for t in range(n_times):
+        out[t] = _calc_hotspots_numpy(block_cpu[t])
+    return cupy.asarray(out)
+
+
+def _mk_classify_dask_cupy_chunk(block):
+    """Dask chunk function: Mann-Kendall + classification (CPU, CuPy I/O)."""
+    block_cpu = cupy.asnumpy(block)
+    n_times, ny, nx = block_cpu.shape
+    gi_bin_cpu = np.empty((n_times, ny, nx), dtype=np.int8)
+    for t in range(n_times):
+        gi_bin_cpu[t] = _calc_hotspots_numpy(block_cpu[t])
+    category, trend_z, trend_p = _mk_classify_numpy(
+        block_cpu, gi_bin_cpu, n_times, ny, nx
+    )
+    out = np.empty((3, ny, nx), dtype=np.float32)
+    out[0] = category.astype(np.float32)
+    out[1] = trend_z
+    out[2] = trend_p
+    return cupy.asarray(out)
+
+
+def _emerging_hotspots_dask_cupy(raster, kernel, boundary='nan'):
+    data = raster.data
+    if not cupy.issubdtype(data.dtype, cupy.floating):
+        data = data.astype(cupy.float32)
+
+    n_times = data.shape[0]
+
+    # Pass 1: eagerly compute global statistics (two scalars)
+    global_mean, global_std = da.compute(da.nanmean(data), da.nanstd(data))
+    global_mean = np.float32(float(global_mean))
+    global_std = np.float32(float(global_std))
+
+    if global_std == 0:
+        raise ZeroDivisionError(
+            "Standard deviation of the input raster values is 0."
+        )
+
+    norm_kernel = (kernel / kernel.sum()).astype(np.float32)
+    pad_h = norm_kernel.shape[0] // 2
+    pad_w = norm_kernel.shape[1] // 2
+
+    # Pass 2: per time step, GPU convolve + z-score via map_overlap, then stack
+    _func = partial(
+        _convolve_and_zscore_cupy_chunk,
+        kernel=norm_kernel,
+        global_mean=global_mean,
+        global_std=global_std,
+    )
+    zscore_slices = []
+    for t in range(n_times):
+        z_t = data[t].map_overlap(
+            _func,
+            depth=(pad_h, pad_w),
+            boundary=_boundary_to_dask(boundary, is_cupy=True),
+            meta=cupy.array((), dtype=cupy.float32),
+        )
+        zscore_slices.append(z_t)
+
+    gi_zscore = da.stack(zscore_slices, axis=0)
+    gi_zscore = gi_zscore.rechunk({0: n_times})
+
+    # gi_bin via map_blocks (element-wise, no overlap needed)
+    gi_bin = da.map_blocks(
+        _gi_bin_cupy_chunk,
+        gi_zscore,
+        dtype=np.int8,
+        meta=cupy.array((), dtype=cupy.int8),
+    )
+
+    # Pass 3: Mann-Kendall + classification via map_blocks
+    mk_result = da.map_blocks(
+        _mk_classify_dask_cupy_chunk,
+        gi_zscore,
+        dtype=np.float32,
+        chunks=((3,), *gi_zscore.chunks[1:]),
+        drop_axis=0,
+        new_axis=0,
+        meta=cupy.array((), dtype=cupy.float32),
+    )
+
+    return gi_zscore, gi_bin, mk_result
+
+
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
@@ -598,10 +698,8 @@ def emerging_hotspots(raster, kernel, boundary='nan'):
         numpy_func=partial(_emerging_hotspots_numpy, boundary=boundary),
         cupy_func=partial(_emerging_hotspots_cupy, boundary=boundary),
         dask_func=partial(_emerging_hotspots_dask_numpy, boundary=boundary),
-        dask_cupy_func=lambda *args: not_implemented_func(
-            *args,
-            messages='emerging_hotspots() does not support '
-                     'dask with cupy backed DataArray.',
+        dask_cupy_func=partial(
+            _emerging_hotspots_dask_cupy, boundary=boundary,
         ),
     )
 
diff --git a/xrspatial/focal.py b/xrspatial/focal.py
index 843c35fe..93392baa 100644
--- a/xrspatial/focal.py
+++ b/xrspatial/focal.py
@@ -27,7 +27,9 @@
     class cupy(object):
         ndarray = False
 
-from xrspatial.convolution import convolve_2d, custom_kernel, _convolve_2d_numpy
+from xrspatial.convolution import (
+    convolve_2d, custom_kernel, _convolve_2d_numpy, _convolve_2d_cupy,
+)
 from xrspatial.utils import (ArrayTypeFunctionMapping, _boundary_to_dask, _pad_array,
                              _validate_boundary, _validate_raster, _validate_scalar,
                              cuda_args, ngjit, not_implemented_func)
@@ -1078,6 +1080,41 @@ def _hotspots_chunk(chunk, kernel, global_mean, global_std):
     return _calc_hotspots_numpy(z)
 
 
+def _hotspots_dask_cupy(raster, kernel, boundary='nan'):
+    data = raster.data
+    if not cupy.issubdtype(data.dtype, cupy.floating):
+        data = data.astype(cupy.float32)
+
+    # Pass 1: global statistics (two scalars, eager)
+    global_mean, global_std = da.compute(da.nanmean(data), da.nanstd(data))
+    global_mean = np.float32(float(global_mean))
+    global_std = np.float32(float(global_std))
+
+    if global_std == 0:
+        raise ZeroDivisionError(
+            "Standard deviation of the input raster values is 0."
+        )
+
+    norm_kernel = (kernel / kernel.sum()).astype(np.float32)
+    pad_h = norm_kernel.shape[0] // 2
+    pad_w = norm_kernel.shape[1] // 2
+
+    # Pass 2: fuse convolution + z-score + classification
+    # Convolution on GPU, classification on CPU (branching-heavy)
+    def _chunk_fn(chunk):
+        convolved = _convolve_2d_cupy(chunk, norm_kernel)
+        z = (convolved - global_mean) / global_std
+        return cupy.asarray(_calc_hotspots_numpy(cupy.asnumpy(z)))
+
+    out = data.map_overlap(
+        _chunk_fn,
+        depth=(pad_h, pad_w),
+        boundary=_boundary_to_dask(boundary, is_cupy=True),
+        meta=cupy.array((), dtype=cupy.int8),
+    )
+    return out
+
+
 @nb.cuda.jit(device=True)
 def _gpu_hotspots(data):
     zscore = data[0, 0]
@@ -1208,8 +1245,7 @@ def hotspots(raster, kernel, boundary='nan'):
         numpy_func=partial(_hotspots_numpy, boundary=boundary),
         cupy_func=partial(_hotspots_cupy, boundary=boundary),
         dask_func=partial(_hotspots_dask_numpy, boundary=boundary),
-        dask_cupy_func=lambda *args: not_implemented_func(
-            *args, messages='hotspots() does not support dask with cupy backed DataArray.'),  # noqa
+        dask_cupy_func=partial(_hotspots_dask_cupy, boundary=boundary),
     )
     out = mapper(raster)(raster, kernel)
 
diff --git a/xrspatial/tests/test_dask_cupy_gaps.py b/xrspatial/tests/test_dask_cupy_gaps.py
index 18ee0456..1290a822 100644
--- a/xrspatial/tests/test_dask_cupy_gaps.py
+++ b/xrspatial/tests/test_dask_cupy_gaps.py
@@ -369,3 +369,77 @@ def _dict_func(x):
 
     assert isinstance(result_cupy.data, cupy.ndarray)
     np.testing.assert_allclose(result_cupy.data.get(), result_np.values)
+
+
+# ---- hotspots dask+cupy ----
+
+@cuda_and_cupy_available
+@dask_array_available
+def test_hotspots_dask_cupy():
+    import cupy
+    import dask.array as da
+    from xrspatial.convolution import custom_kernel
+    from xrspatial.focal import hotspots
+
+    rng = np.random.default_rng(42)
+    np_data = rng.standard_normal((20, 20)).astype('f4')
+    # Add hot/cold clusters
+    np_data[2:5, 2:5] += 5.0
+    np_data[15:18, 15:18] -= 5.0
+
+    kernel = custom_kernel(np.ones((3, 3)))
+
+    # numpy reference
+    raster_np = xr.DataArray(np_data, dims=['y', 'x'])
+    result_np = hotspots(raster_np, kernel)
+
+    # dask+cupy
+    gpu = cupy.asarray(np_data)
+    raster_dask_cupy = xr.DataArray(
+        da.from_array(gpu, chunks=(10, 10)), dims=['y', 'x'],
+    )
+    result_dc = hotspots(raster_dask_cupy, kernel)
+
+    assert isinstance(result_dc.data, da.Array)
+    computed = result_dc.data.compute()
+    assert isinstance(computed, cupy.ndarray)
+    np.testing.assert_array_equal(computed.get(), result_np.values)
+
+
+# ---- emerging_hotspots dask+cupy ----
+
+@cuda_and_cupy_available
+@dask_array_available
+def test_emerging_hotspots_dask_cupy():
+    import cupy
+    import dask.array as da
+    from xrspatial.convolution import custom_kernel
+    from xrspatial.emerging_hotspots import emerging_hotspots
+
+    rng = np.random.default_rng(42)
+    np_data = rng.standard_normal((5, 15, 15)).astype('f4')
+    # Add an intensifying hot cluster
+    for t in range(5):
+        np_data[t, 3:6, 3:6] += 2.0 + t * 0.5
+
+    kernel = custom_kernel(np.ones((3, 3)))
+
+    # numpy reference
+    raster_np = xr.DataArray(np_data, dims=['time', 'y', 'x'])
+    ds_np = emerging_hotspots(raster_np, kernel)
+
+    # dask+cupy
+    gpu = cupy.asarray(np_data)
+    raster_dc = xr.DataArray(
+        da.from_array(gpu, chunks=(5, 8, 8)), dims=['time', 'y', 'x'],
+    )
+    ds_dc = emerging_hotspots(raster_dc, kernel).compute()
+
+    for var in ('category', 'gi_zscore', 'gi_bin', 'trend_zscore', 'trend_pvalue'):
+        dc_vals = ds_dc[var].data
+        if isinstance(dc_vals, cupy.ndarray):
+            dc_vals = dc_vals.get()
+        np.testing.assert_allclose(
+            dc_vals, ds_np[var].values, atol=1e-5,
+            err_msg=f"mismatch in {var}",
+        )
diff --git a/xrspatial/tests/test_emerging_hotspots.py b/xrspatial/tests/test_emerging_hotspots.py
index 9163ba0c..14439d15 100644
--- a/xrspatial/tests/test_emerging_hotspots.py
+++ b/xrspatial/tests/test_emerging_hotspots.py
@@ -420,19 +420,40 @@ def test_dask_boundary_modes(self, dask_available):
 
 
 # ---------------------------------------------------------------------------
-# Dask + CuPy stub
+# Dask + CuPy backend
 # ---------------------------------------------------------------------------
 
 class TestEmergeDaskCuPy:
-    def test_raises_not_implemented(self):
+    def test_dask_cupy_matches_numpy(self):
         cupy = pytest.importorskip("cupy")
         da = pytest.importorskip("dask.array")
 
         rng = np.random.default_rng(42)
-        np_data = rng.standard_normal((5, 10, 10)).astype('f4')
+        np_data = rng.standard_normal((8, 20, 20)).astype('f4')
+
+        ds_np = emerging_hotspots(_make_raster(np_data), _kernel_3x3())
+
         dask_cupy_data = da.from_array(
-            cupy.asarray(np_data), chunks=(5, 5, 5)
+            cupy.asarray(np_data), chunks=(8, 10, 10)
         )
-        raster = _make_raster(dask_cupy_data)
-        with pytest.raises(NotImplementedError):
-            emerging_hotspots(raster, _kernel_3x3())
+        ds_dc = emerging_hotspots(
+            _make_raster(dask_cupy_data), _kernel_3x3()
+        ).compute()
+
+        for var in ('category', 'gi_bin'):
+            dc_vals = ds_dc[var].data
+            if isinstance(dc_vals, cupy.ndarray):
+                dc_vals = dc_vals.get()
+            np.testing.assert_array_equal(
+                dc_vals, ds_np[var].values,
+                err_msg=f"mismatch in {var}",
+            )
+
+        for var in ('gi_zscore', 'trend_zscore', 'trend_pvalue'):
+            dc_vals = ds_dc[var].data
+            if isinstance(dc_vals, cupy.ndarray):
+                dc_vals = dc_vals.get()
+            np.testing.assert_allclose(
+                dc_vals, ds_np[var].values, atol=1e-5,
+                err_msg=f"mismatch in {var}",
+            )