Skip to content

Commit 5f6c332

Browse files
committed
Fixes #906: add dask+cupy backends for perlin, terrain, crosstab
Add missing dask+cupy backends for perlin() and generate_terrain() using per-chunk GPU kernel launches via da.map_blocks with block_info coordinate mapping. Add cupy and dask+cupy backends for crosstab() by converting to numpy/dask+numpy and delegating. Fix _find_cats() to handle pure cupy arrays without wrapping in @delayed.
1 parent f5ce56a commit 5f6c332

4 files changed

Lines changed: 289 additions & 13 deletions

File tree

xrspatial/perlin.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,37 @@ def _perlin_cupy(data: cupy.ndarray,
192192
return data
193193

194194

195+
def _perlin_dask_cupy(data: da.Array,
196+
freq: tuple,
197+
seed: int) -> da.Array:
198+
np.random.seed(seed)
199+
p = cupy.asarray(np.random.permutation(2**20))
200+
p = cupy.append(p, p)
201+
202+
height, width = data.shape
203+
204+
def _chunk_perlin(block, block_info=None):
205+
info = block_info[0]
206+
y_start, y_end = info['array-location'][0]
207+
x_start, x_end = info['array-location'][1]
208+
x0 = freq[0] * x_start / width
209+
x1 = freq[0] * x_end / width
210+
y0 = freq[1] * y_start / height
211+
y1 = freq[1] * y_end / height
212+
213+
out = cupy.empty(block.shape, dtype=cupy.float32)
214+
griddim, blockdim = cuda_args(block.shape)
215+
_perlin_gpu[griddim, blockdim](p, x0, x1, y0, y1, 1.0, out)
216+
return out
217+
218+
data = da.map_blocks(_chunk_perlin, data, dtype=cupy.float32,
219+
meta=cupy.array((), dtype=cupy.float32))
220+
221+
min_val, max_val = dask.compute(da.min(data), da.max(data))
222+
data = (data - min_val) / (max_val - min_val)
223+
return data
224+
225+
195226
def perlin(agg: xr.DataArray,
196227
freq: tuple = (1, 1),
197228
seed: int = 5,
@@ -246,9 +277,7 @@ def perlin(agg: xr.DataArray,
246277
numpy_func=_perlin_numpy,
247278
cupy_func=_perlin_cupy,
248279
dask_func=_perlin_dask_numpy,
249-
dask_cupy_func=lambda *args: not_implemented_func(
250-
*args, messages='perlin() does not support dask with cupy backed DataArray', # noqa
251-
)
280+
dask_cupy_func=_perlin_dask_cupy
252281
)
253282
out = mapper(agg)(agg.data, freq, seed)
254283
result = xr.DataArray(out,

xrspatial/terrain.py

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,55 @@ def _terrain_gpu(height_map, seed, x_range=(0, 1), y_range=(0, 1)):
162162
return height_map
163163

164164

165+
def _terrain_dask_cupy(data: da.Array,
166+
seed: int,
167+
x_range_scaled: tuple,
168+
y_range_scaled: tuple,
169+
zfactor: int) -> da.Array:
170+
data = data * 0
171+
height, width = data.shape
172+
173+
NOISE_LAYERS = ((1 / 2**i, (2**i, 2**i)) for i in range(16))
174+
175+
nrange = np.arange(2**20, dtype=np.int32)
176+
177+
for i, (m, (xfreq, yfreq)) in enumerate(NOISE_LAYERS):
178+
np.random.seed(seed + i)
179+
p = cupy.asarray(np.random.permutation(nrange))
180+
p = cupy.append(p, p)
181+
182+
def _chunk_noise(block, block_info=None, _p=p, _m=m,
183+
_xr=x_range_scaled, _yr=y_range_scaled,
184+
_xf=xfreq, _yf=yfreq):
185+
info = block_info[0]
186+
y_start, y_end = info['array-location'][0]
187+
x_start, x_end = info['array-location'][1]
188+
x0 = _xr[0] + (_xr[1] - _xr[0]) * x_start / width
189+
x1 = _xr[0] + (_xr[1] - _xr[0]) * x_end / width
190+
y0 = _yr[0] + (_yr[1] - _yr[0]) * y_start / height
191+
y1 = _yr[0] + (_yr[1] - _yr[0]) * y_end / height
192+
193+
out = cupy.empty(block.shape, dtype=cupy.float32)
194+
griddim, blockdim = cuda_args(block.shape)
195+
_perlin_gpu[griddim, blockdim](
196+
_p, x0 * _xf, x1 * _xf, y0 * _yf, y1 * _yf, _m, out
197+
)
198+
return out
199+
200+
noise = da.map_blocks(_chunk_noise, data, dtype=cupy.float32,
201+
meta=cupy.array((), dtype=cupy.float32))
202+
data = data + noise
203+
204+
data /= (1.00 + 0.50 + 0.25 + 0.13 + 0.06 + 0.03)
205+
data = data ** 3
206+
207+
min_val, max_val = dask.compute(da.min(data), da.max(data))
208+
data = (data - min_val) / (max_val - min_val)
209+
data = da.where(data < 0.3, 0, data)
210+
data *= zfactor
211+
return data
212+
213+
165214
def _terrain_cupy(data: cupy.ndarray,
166215
seed: int,
167216
x_range_scaled: tuple,
@@ -263,9 +312,7 @@ def generate_terrain(agg: xr.DataArray,
263312
numpy_func=_terrain_numpy,
264313
cupy_func=_terrain_cupy,
265314
dask_func=_terrain_dask_numpy,
266-
dask_cupy_func=lambda *args: not_implemented_func(
267-
*args, messages='generate_terrain() does not support dask with cupy backed DataArray' # noqa
268-
)
315+
dask_cupy_func=_terrain_dask_cupy
269316
)
270317
out = mapper(agg)(agg.data, seed, x_range_scaled, y_range_scaled, zfactor)
271318
canvas = ds.Canvas(
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
"""Tests for dask+cupy backends: perlin, terrain, crosstab."""
2+
3+
import numpy as np
4+
import xarray as xr
5+
6+
from xrspatial import generate_terrain, perlin
7+
from xrspatial.tests.general_checks import cuda_and_cupy_available, dask_array_available
8+
from xrspatial.utils import has_cuda_and_cupy
9+
10+
11+
def _make_raster(shape=(50, 50), backend='numpy', chunks=(10, 10)):
12+
data = np.zeros(shape, dtype=np.float32)
13+
raster = xr.DataArray(data, dims=['y', 'x'])
14+
if has_cuda_and_cupy() and 'cupy' in backend:
15+
import cupy
16+
raster.data = cupy.asarray(raster.data)
17+
if 'dask' in backend:
18+
import dask.array as da
19+
raster.data = da.from_array(raster.data, chunks=chunks)
20+
return raster
21+
22+
23+
# ---- perlin dask+cupy ----
24+
25+
@cuda_and_cupy_available
26+
@dask_array_available
27+
def test_perlin_dask_cupy():
28+
import cupy
29+
import dask.array as da
30+
31+
cupy_raster = _make_raster(backend='cupy')
32+
result_cupy = perlin(cupy_raster)
33+
34+
dask_cupy_raster = _make_raster(backend='dask+cupy')
35+
result_dask_cupy = perlin(dask_cupy_raster)
36+
37+
assert isinstance(result_dask_cupy.data, da.Array)
38+
assert result_dask_cupy.shape == cupy_raster.shape
39+
40+
computed = result_dask_cupy.data.compute()
41+
assert isinstance(computed, cupy.ndarray)
42+
43+
vals = computed.get()
44+
assert vals.min() >= 0.0
45+
assert vals.max() <= 1.0
46+
47+
np.testing.assert_allclose(
48+
result_cupy.data.get(), vals, rtol=1e-4, atol=1e-4,
49+
)
50+
51+
52+
# ---- terrain dask+cupy ----
53+
54+
@cuda_and_cupy_available
55+
@dask_array_available
56+
def test_terrain_dask_cupy():
57+
import cupy
58+
import dask.array as da
59+
60+
cupy_raster = _make_raster(backend='cupy')
61+
terrain_cupy = generate_terrain(cupy_raster)
62+
63+
dask_cupy_raster = _make_raster(backend='dask+cupy')
64+
terrain_dask_cupy = generate_terrain(dask_cupy_raster)
65+
66+
assert isinstance(terrain_dask_cupy.data, da.Array)
67+
assert terrain_dask_cupy.shape == cupy_raster.shape
68+
69+
computed = terrain_dask_cupy.data.compute()
70+
assert isinstance(computed, cupy.ndarray)
71+
72+
vals = computed.get()
73+
assert vals.min() >= 0.0
74+
75+
np.testing.assert_allclose(
76+
terrain_cupy.data.get(), vals, rtol=1e-4, atol=1e-4,
77+
)
78+
79+
80+
# ---- crosstab cupy ----
81+
82+
@cuda_and_cupy_available
83+
def test_crosstab_cupy():
84+
import cupy
85+
from xrspatial.zonal import crosstab
86+
87+
zones_np = np.array([
88+
[1, 1, 2, 2],
89+
[1, 1, 2, 2],
90+
[3, 3, 4, 4],
91+
[3, 3, 4, 4],
92+
], dtype=np.float64)
93+
values_np = np.array([
94+
[10, 10, 20, 20],
95+
[10, 20, 20, 30],
96+
[30, 30, 10, 10],
97+
[30, 10, 20, 20],
98+
], dtype=np.float64)
99+
100+
zones_xr = xr.DataArray(zones_np, dims=['y', 'x'])
101+
values_xr = xr.DataArray(values_np, dims=['y', 'x'])
102+
df_numpy = crosstab(zones_xr, values_xr)
103+
104+
zones_cupy = xr.DataArray(cupy.asarray(zones_np), dims=['y', 'x'])
105+
values_cupy = xr.DataArray(cupy.asarray(values_np), dims=['y', 'x'])
106+
df_cupy = crosstab(zones_cupy, values_cupy)
107+
108+
# Both should be pandas DataFrames with identical content
109+
assert list(df_numpy.columns) == list(df_cupy.columns)
110+
np.testing.assert_array_equal(df_numpy.values, df_cupy.values)
111+
112+
113+
# ---- crosstab dask+cupy ----
114+
115+
@cuda_and_cupy_available
116+
@dask_array_available
117+
def test_crosstab_dask_cupy():
118+
import cupy
119+
import dask.array as da
120+
from xrspatial.zonal import crosstab
121+
122+
zones_np = np.array([
123+
[1, 1, 2, 2],
124+
[1, 1, 2, 2],
125+
[3, 3, 4, 4],
126+
[3, 3, 4, 4],
127+
], dtype=np.float64)
128+
values_np = np.array([
129+
[10, 10, 20, 20],
130+
[10, 20, 20, 30],
131+
[30, 30, 10, 10],
132+
[30, 10, 20, 20],
133+
], dtype=np.float64)
134+
135+
zones_xr = xr.DataArray(zones_np, dims=['y', 'x'])
136+
values_xr = xr.DataArray(values_np, dims=['y', 'x'])
137+
df_numpy = crosstab(zones_xr, values_xr)
138+
139+
zones_gpu = cupy.asarray(zones_np)
140+
values_gpu = cupy.asarray(values_np)
141+
zones_dask = xr.DataArray(
142+
da.from_array(zones_gpu, chunks=(2, 2)), dims=['y', 'x']
143+
)
144+
values_dask = xr.DataArray(
145+
da.from_array(values_gpu, chunks=(2, 2)), dims=['y', 'x']
146+
)
147+
df_dask_cupy = crosstab(zones_dask, values_dask)
148+
149+
# dask case returns dask DataFrame
150+
df_computed = df_dask_cupy.compute()
151+
# sort both by zone for stable comparison
152+
df_numpy_sorted = df_numpy.sort_values('zone').reset_index(drop=True)
153+
df_computed_sorted = df_computed.sort_values('zone').reset_index(drop=True)
154+
155+
assert list(df_numpy_sorted.columns) == list(df_computed_sorted.columns)
156+
np.testing.assert_array_equal(
157+
df_numpy_sorted.values, df_computed_sorted.values,
158+
)

xrspatial/zonal.py

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -721,7 +721,7 @@ def _find_cats(values, cat_ids, nodata_values):
721721
if cat_ids is None:
722722
cat_ids = unique_cats
723723
else:
724-
if isinstance(values.data, np.ndarray):
724+
if isinstance(values.data, np.ndarray) or is_cupy_array(values.data):
725725
# remove cats that do not exist in `values` raster
726726
cat_ids = [c for c in cat_ids if c in unique_cats]
727727
else:
@@ -960,6 +960,52 @@ def _crosstab_dask_numpy(
960960
return dd.from_delayed(crosstab_df)
961961

962962

963+
def _crosstab_cupy(
964+
zones: np.ndarray,
965+
values: np.ndarray,
966+
zone_ids,
967+
unique_cats,
968+
cat_ids,
969+
nodata_values,
970+
agg: str,
971+
):
972+
# unique_cats / cat_ids may be cupy arrays from _find_cats
973+
if is_cupy_array(unique_cats):
974+
unique_cats = cupy.asnumpy(unique_cats)
975+
if is_cupy_array(cat_ids):
976+
cat_ids = cupy.asnumpy(cat_ids)
977+
return _crosstab_numpy(
978+
cupy.asnumpy(zones), cupy.asnumpy(values),
979+
zone_ids, unique_cats, cat_ids, nodata_values, agg,
980+
)
981+
982+
983+
def _crosstab_dask_cupy(
984+
zones,
985+
values,
986+
zone_ids,
987+
unique_cats,
988+
cat_ids,
989+
nodata_values,
990+
agg: str,
991+
):
992+
zones_cpu = zones.map_blocks(
993+
lambda x: x.get(), dtype=zones.dtype, meta=np.array(()),
994+
)
995+
values_cpu = values.map_blocks(
996+
lambda x: x.get(), dtype=values.dtype, meta=np.array(()),
997+
)
998+
# unique_cats / cat_ids may be cupy arrays from _find_cats
999+
if is_cupy_array(unique_cats):
1000+
unique_cats = cupy.asnumpy(unique_cats)
1001+
if is_cupy_array(cat_ids):
1002+
cat_ids = cupy.asnumpy(cat_ids)
1003+
return _crosstab_dask_numpy(
1004+
zones_cpu, values_cpu, zone_ids, unique_cats, cat_ids,
1005+
nodata_values, agg,
1006+
)
1007+
1008+
9631009
def crosstab(
9641010
zones: xr.DataArray,
9651011
values: xr.DataArray,
@@ -1161,12 +1207,8 @@ def crosstab(
11611207
mapper = ArrayTypeFunctionMapping(
11621208
numpy_func=_crosstab_numpy,
11631209
dask_func=_crosstab_dask_numpy,
1164-
cupy_func=lambda *args: not_implemented_func(
1165-
*args, messages='crosstab() does not support cupy backed DataArray'
1166-
),
1167-
dask_cupy_func=lambda *args: not_implemented_func(
1168-
*args, messages='crosstab() does not support dask with cupy backed DataArray' # noqa
1169-
),
1210+
cupy_func=_crosstab_cupy,
1211+
dask_cupy_func=_crosstab_dask_cupy,
11701212
)
11711213
crosstab_df = mapper(values)(
11721214
zones.data, values.data,

0 commit comments

Comments
 (0)