Skip to content

Commit 4e309ab

Browse files
committed
Experiment with SVM backing arrays
1 parent 0195ba6 commit 4e309ab

3 files changed

Lines changed: 83 additions & 9 deletions

File tree

examples/demo_array_svm.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import pyopencl as cl
2+
import pyopencl.array as cl_array
3+
import numpy as np
4+
import numpy.linalg as la
5+
6+
a = np.random.rand(500).astype(np.float32)
7+
b = np.random.rand(500).astype(np.float32)
8+
9+
10+
class SVMAllocator:
11+
def __init__(self, ctx, flags, alignment):
12+
self._context = ctx
13+
self._flags = flags
14+
self._alignment = alignment
15+
16+
def __call__(self, nbytes):
17+
return cl.SVM(cl.svm_empty(
18+
ctx, self._flags, (nbytes,), np.int8, "C", self._alignment))
19+
20+
21+
ctx = cl.create_some_context()
22+
queue = cl.CommandQueue(ctx)
23+
24+
alloc = SVMAllocator(ctx,
25+
cl.svm_mem_flags.READ_WRITE | cl.svm_mem_flags.SVM_FINE_GRAIN_BUFFER,
26+
0)
27+
28+
a_dev = cl_array.to_device(queue, a, allocator=alloc)
29+
print("A_DEV", a_dev.data.mem.nbytes, a_dev.data.mem.__array_interface__)
30+
b_dev = cl_array.to_device(queue, b, allocator=alloc)
31+
dest_dev = cl_array.empty_like(a_dev)
32+
print("DEST", dest_dev.data.mem.__array_interface__)
33+
34+
prg = cl.Program(ctx, """
35+
__kernel void sum(__global const float *a,
36+
__global const float *b, __global float *c)
37+
{
38+
int gid = get_global_id(0);
39+
c[gid] = a[gid] + b[gid];
40+
}
41+
""").build()
42+
43+
knl = prg.sum # Use this Kernel object for repeated calls
44+
knl(queue, a.shape, None, a_dev.data, b_dev.data, dest_dev.data)
45+
46+
# PROBLEM: numpy frees the temporary out of (a_dev+b_dev) before
47+
# we're done with it
48+
diff = (dest_dev - (a_dev+b_dev)).get()
49+
np.set_printoptions(linewidth=400)
50+
print(dest_dev)
51+
print((a_dev+b_dev).get())
52+
print(diff)
53+
print(la.norm(diff))
54+
print("A_DEV", a_dev.data.mem.__array_interface__)

pyopencl/__init__.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,7 +1771,12 @@ def enqueue_copy(queue, dest, src, **kwargs):
17711771
src = SVM(src)
17721772

17731773
is_blocking = kwargs.pop("is_blocking", True)
1774-
return _cl._enqueue_svm_memcpy(queue, is_blocking, dest, src, **kwargs)
1774+
1775+
# FIXME POCL workaround
1776+
evt = _cl._enqueue_svm_memcpy(queue, False, dest, src, **kwargs)
1777+
if is_blocking:
1778+
evt.wait()
1779+
return evt
17751780

17761781
else:
17771782
# assume to-host
@@ -1800,8 +1805,13 @@ def enqueue_copy(queue, dest, src, **kwargs):
18001805
# from svm
18011806
# dest is not a SVM instance, otherwise we'd be in the branch above
18021807
is_blocking = kwargs.pop("is_blocking", True)
1803-
return _cl._enqueue_svm_memcpy(
1804-
queue, is_blocking, SVM(dest), src, **kwargs)
1808+
1809+
evt = _cl._enqueue_svm_memcpy(queue, False, SVM(dest), src, **kwargs)
1810+
# FIXME: POCL workaround
1811+
if is_blocking:
1812+
evt.wait()
1813+
return evt
1814+
18051815
else:
18061816
# assume from-host
18071817
raise TypeError("enqueue_copy cannot perform host-to-host transfers")

pyopencl/array.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -670,9 +670,14 @@ def set(self, ary, queue=None, async_=None, **kwargs):
670670
stacklevel=2)
671671

672672
if self.size:
673-
event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
674-
device_offset=self.offset,
675-
is_blocking=not async_)
673+
if self.offset:
674+
event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
675+
device_offset=self.offset,
676+
is_blocking=not async_)
677+
else:
678+
event1 = cl.enqueue_copy(queue or self.queue, self.base_data, ary,
679+
is_blocking=not async_)
680+
676681
self.add_event(event1)
677682

678683
def _get(self, queue=None, ary=None, async_=None, **kwargs):
@@ -720,9 +725,14 @@ def _get(self, queue=None, ary=None, async_=None, **kwargs):
720725
"to associate one.")
721726

722727
if self.size:
723-
event1 = cl.enqueue_copy(queue, ary, self.base_data,
724-
device_offset=self.offset,
725-
wait_for=self.events, is_blocking=not async_)
728+
if self.offset:
729+
event1 = cl.enqueue_copy(queue, ary, self.base_data,
730+
device_offset=self.offset,
731+
wait_for=self.events, is_blocking=not async_)
732+
else:
733+
event1 = cl.enqueue_copy(queue, ary, self.base_data,
734+
wait_for=self.events, is_blocking=not async_)
735+
726736
self.add_event(event1)
727737
else:
728738
event1 = None

0 commit comments

Comments
 (0)