Skip to content

Commit e7b455a

Browse files
alexfiklinducer
authored andcommitted
toys: only send cl arrays to kernels
1 parent 9b32441 commit e7b455a

1 file changed

Lines changed: 59 additions & 37 deletions

File tree

sumpy/toys.py

Lines changed: 59 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -201,48 +201,59 @@ def get_l2l(self, from_order, to_order):
201201
# {{{ helpers
202202

203203
def _p2e(psource, center, rscale, order, p2e, expn_class, expn_kwargs):
204-
source_boxes = np.array([0], dtype=np.int32)
205-
box_source_starts = np.array([0], dtype=np.int32)
206-
box_source_counts_nonchild = np.array(
207-
[psource.points.shape[-1]], dtype=np.int32)
208-
209204
toy_ctx = psource.toy_ctx
205+
queue = toy_ctx.queue
206+
207+
source_boxes = cl.array.to_device(
208+
queue, np.array([0], dtype=np.int32))
209+
box_source_starts = cl.array.to_device(
210+
queue, np.array([0], dtype=np.int32))
211+
box_source_counts_nonchild = cl.array.to_device(
212+
queue, np.array([psource.points.shape[-1]], dtype=np.int32))
213+
210214
center = np.asarray(center)
211-
centers = np.array(center, dtype=np.float64).reshape(
212-
toy_ctx.kernel.dim, 1)
215+
centers = cl.array.to_device(
216+
queue,
217+
np.array(center, dtype=np.float64).reshape(toy_ctx.kernel.dim, 1))
213218

214219
evt, (coeffs,) = p2e(toy_ctx.queue,
215220
source_boxes=source_boxes,
216221
box_source_starts=box_source_starts,
217222
box_source_counts_nonchild=box_source_counts_nonchild,
218223
centers=centers,
219-
sources=psource.points,
220-
strengths=(psource.weights,),
224+
sources=cl.array.to_device(queue, psource.points),
225+
strengths=(cl.array.to_device(queue, psource.weights),),
221226
rscale=rscale,
222227
nboxes=1,
223228
tgt_base_ibox=0,
224229

225-
#flags="print_hl_cl",
226230
out_host=True,
227231
**toy_ctx.extra_source_and_kernel_kwargs)
228232

229-
return expn_class(toy_ctx, center, rscale, order, coeffs[0],
233+
return expn_class(toy_ctx, center, rscale, order, coeffs[0].get(queue),
230234
derived_from=psource, **expn_kwargs)
231235

232236

233237
def _e2p(psource, targets, e2p):
234-
ntargets = targets.shape[-1]
238+
toy_ctx = psource.toy_ctx
239+
queue = toy_ctx.queue
235240

236-
boxes = np.array([0], dtype=np.int32)
241+
ntargets = targets.shape[-1]
242+
boxes = cl.array.to_device(
243+
queue, np.array([0], dtype=np.int32))
244+
box_target_starts = cl.array.to_device(
245+
queue, np.array([0], dtype=np.int32))
246+
box_target_counts_nonchild = cl.array.to_device(
247+
queue, np.array([ntargets], dtype=np.int32))
237248

238-
box_target_starts = np.array([0], dtype=np.int32)
239-
box_target_counts_nonchild = np.array([ntargets], dtype=np.int32)
249+
centers = cl.array.to_device(
250+
queue,
251+
np.array(psource.center, dtype=np.float64).reshape(toy_ctx.kernel.dim, 1))
240252

241-
toy_ctx = psource.toy_ctx
242-
centers = np.array(psource.center, dtype=np.float64).reshape(
243-
toy_ctx.kernel.dim, 1)
253+
from pytools.obj_array import make_obj_array
254+
from sumpy.tools import vector_to_device
244255

245-
coeffs = np.array([psource.coeffs])
256+
coeffs = cl.array.to_device(queue, np.array([psource.coeffs]))
246257
evt, (pot,) = e2p(
247258
toy_ctx.queue,
248259
src_expansions=coeffs,
@@ -252,31 +263,38 @@ def _e2p(psource, targets, e2p):
252263
box_target_counts_nonchild=box_target_counts_nonchild,
253264
centers=centers,
254265
rscale=psource.rscale,
255-
targets=targets,
256-
#flags="print_hl_cl",
257-
out_host=True, **toy_ctx.extra_kernel_kwargs)
266+
targets=vector_to_device(queue, make_obj_array(targets)),
267+
268+
out_host=True,
269+
**toy_ctx.extra_kernel_kwargs)
258270

259-
return pot
271+
return pot.get(queue)
260272

261273

262274
def _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs):
263275
toy_ctx = psource.toy_ctx
264-
265-
target_boxes = np.array([1], dtype=np.int32)
266-
src_box_starts = np.array([0, 1], dtype=np.int32)
267-
src_box_lists = np.array([0], dtype=np.int32)
268-
269-
centers = (np.array(
276+
queue = toy_ctx.queue
277+
278+
target_boxes = cl.array.to_device(
279+
queue, np.array([1], dtype=np.int32))
280+
src_box_starts = cl.array.to_device(
281+
queue, np.array([0, 1], dtype=np.int32))
282+
src_box_lists = cl.array.to_device(
283+
queue, np.array([0], dtype=np.int32))
284+
285+
centers = cl.array.to_device(
286+
queue,
287+
np.array(
270288
[
271289
# box 0: source
272290
psource.center,
273291

274292
# box 1: target
275293
to_center,
276-
],
277-
dtype=np.float64)).T.copy()
278-
279-
coeffs = np.array([psource.coeffs])
294+
],
295+
dtype=np.float64).T.copy()
296+
)
297+
coeffs = cl.array.to_device(queue, np.array([psource.coeffs]))
280298

281299
evt, (to_coeffs,) = e2e(
282300
toy_ctx.queue,
@@ -294,10 +312,10 @@ def _e2e(psource, to_center, to_rscale, to_order, e2e, expn_class, expn_kwargs):
294312
src_rscale=psource.rscale,
295313
tgt_rscale=to_rscale,
296314

297-
#flags="print_hl_cl",
298315
out_host=True, **toy_ctx.extra_kernel_kwargs)
299316

300-
return expn_class(toy_ctx, to_center, to_rscale, to_order, to_coeffs[1],
317+
return expn_class(
318+
toy_ctx, to_center, to_rscale, to_order, to_coeffs[1].get(queue),
301319
derived_from=psource, **expn_kwargs)
302320

303321
# }}}
@@ -443,12 +461,16 @@ def __init__(self,
443461
self._center = center
444462

445463
def eval(self, targets: np.ndarray) -> np.ndarray:
464+
queue = self.toy_ctx.queue
446465
evt, (potential,) = self.toy_ctx.get_p2p()(
447-
self.toy_ctx.queue, targets, self.points, [self.weights],
466+
queue,
467+
cl.array.to_device(queue, targets),
468+
cl.array.to_device(queue, self.points),
469+
[cl.array.to_device(queue, self.weights)],
448470
out_host=True,
449471
**self.toy_ctx.extra_source_and_kernel_kwargs)
450472

451-
return potential
473+
return potential.get(queue)
452474

453475
@property
454476
def center(self):

0 commit comments

Comments
 (0)