|
37 | 37 | import numpy as np |
38 | 38 | from typing_extensions import Self, override |
39 | 39 |
|
| 40 | +from pytools import memoize_method |
| 41 | + |
40 | 42 | from arraycontext.container.traversal import ( |
41 | 43 | rec_map_array_container, |
42 | 44 | rec_map_container, |
|
62 | 64 | if TYPE_CHECKING: |
63 | 65 | from collections.abc import Callable, Mapping |
64 | 66 |
|
65 | | - from numpy.typing import NDArray |
| 67 | + from numpy.typing import DTypeLike, NDArray |
66 | 68 |
|
67 | 69 | import loopy as lp |
68 | 70 | import pyopencl as cl |
@@ -263,12 +265,64 @@ def to_numpy(self, array: Array) -> np.ndarray: |
263 | 265 | def to_numpy(self, array: ContainerOrScalarT) -> ContainerOrScalarT: |
264 | 266 | ... |
265 | 267 |
|
| 268 | + @memoize_method |
| 269 | + def _get_to_numpy_contiguous_copy_kernel( |
| 270 | + self, dtype: DTypeLike, ndim: int |
| 271 | + ) -> lp.TranslationUnit: |
| 272 | + import loopy as lp |
| 273 | + |
| 274 | + from arraycontext import make_loopy_program |
| 275 | + |
| 276 | + inames = tuple(f"i{iaxis}" for iaxis in range(ndim)) |
| 277 | + shape_names = tuple(f"n{iaxis}" for iaxis in range(ndim)) |
| 278 | + domain = ( |
| 279 | + "{ [" |
| 280 | + + ", ".join(inames) |
| 281 | + + "] : " |
| 282 | + + " and ".join( |
| 283 | + f"0 <= {iname} < {shape_name}" |
| 284 | + for iname, shape_name in zip(inames, shape_names, strict=True) |
| 285 | + ) |
| 286 | + + " }" |
| 287 | + ) |
| 288 | + |
| 289 | + indices = ", ".join(inames) |
| 290 | + t_unit = make_loopy_program( |
| 291 | + [domain], |
| 292 | + [f"out[{indices}] = inp[{indices}]"], |
| 293 | + kernel_data=[ |
| 294 | + lp.GlobalArg("out", dtype=dtype, shape=lp.auto), |
| 295 | + lp.GlobalArg( |
| 296 | + "inp", |
| 297 | + dtype=dtype, |
| 298 | + strides=tuple(f"s{i}" for i in range(ndim)), |
| 299 | + shape=lp.auto, |
| 300 | + ), |
| 301 | + lp.ValueArg(",".join([f"s{i}" for i in range(ndim)]), dtype=np.int64), |
| 302 | + lp.ValueArg(",".join([f"n{i}" for i in range(ndim)]), dtype=np.int64), |
| 303 | + ], |
| 304 | + name=f"to_numpy_contiguous_copy_{ndim}d", |
| 305 | + ) |
| 306 | + return t_unit |
| 307 | + |
266 | 308 | @override |
267 | 309 | def to_numpy(self, |
268 | 310 | array: ArrayOrContainerOrScalar |
269 | 311 | ) -> NumpyOrContainerOrScalar: |
270 | 312 | def _to_numpy(ary): |
271 | | - return ary.get(queue=self.queue) |
| 313 | + if ary.flags.forc: |
| 314 | + # pyopenclsupports host transfers only for contiguous arrays. |
| 315 | + return ary.get(queue=self.queue) |
| 316 | + |
| 317 | + result = self.call_loopy( |
| 318 | + self._get_to_numpy_contiguous_copy_kernel(ary.dtype, ary.ndim), |
| 319 | + inp=ary, |
| 320 | + **{ |
| 321 | + f"s{i}": stride // ary.dtype.itemsize |
| 322 | + for i, stride in enumerate(ary.strides) |
| 323 | + }, |
| 324 | + )["out"] |
| 325 | + return result.get(queue=self.queue) |
272 | 326 |
|
273 | 327 | return with_array_context( |
274 | 328 | self._rec_map_container(_to_numpy, array), |
|
0 commit comments