Skip to content

Commit 1316c75

Browse files
committed
Update dpctl tests to remove dpctl.tensor
1 parent 634394d commit 1316c75

File tree

6 files changed

+227
-134
lines changed

6 files changed

+227
-134
lines changed

dpctl/tests/test_raw_kernel_arg.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@
1919
import ctypes
2020
import os
2121

22+
import numpy as np
2223
import pytest
2324

2425
import dpctl
25-
import dpctl.tensor
26+
import dpctl.memory as dpm
2627

2728

2829
def get_spirv_abspath(fn):
@@ -73,27 +74,33 @@ def launch_raw_arg_kernel(raw):
7374
local_size = 16
7475
global_size = local_size * 8
7576

76-
x = dpctl.tensor.ones(global_size, dtype="int32")
77-
y = dpctl.tensor.zeros(global_size, dtype="int32")
78-
x.sycl_queue.wait()
79-
y.sycl_queue.wait()
77+
x = np.ones(global_size, dtype="i4")
78+
y = np.zeros_like(x)
79+
80+
x_usm = dpm.MemoryUSMDevice(x.nbytes, queue=q)
81+
y_usm = dpm.MemoryUSMDevice(y.nbytes, queue=q)
82+
83+
ev1 = q.memcpy_async(dest=x_usm, src=x, count=x.nbytes)
8084

8185
try:
82-
q.submit(
86+
ev2 = q.submit(
8387
kernel,
8488
[
85-
x.usm_data,
86-
y.usm_data,
89+
x_usm,
90+
y_usm,
8791
raw,
8892
],
8993
[global_size],
9094
[local_size],
95+
dEvents=[ev1],
9196
)
92-
q.wait()
9397
except dpctl._sycl_queue.SyclKernelSubmitError:
9498
pytest.skip(f"Kernel submission to {q.sycl_device} failed")
9599

96-
assert dpctl.tensor.all(y == 9)
100+
ev3 = q.memcpy_async(dest=y, src=y_usm, count=y.nbytes, dEvents=[ev2])
101+
ev3.wait()
102+
103+
assert np.all(y == 9)
97104

98105

99106
def test_submit_raw_kernel_arg_pointer():

dpctl/tests/test_sycl_event.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616

1717
"""Defines unit test cases for the SyclEvent class."""
1818

19+
import numpy as np
1920
import pytest
2021

2122
import dpctl
2223
import dpctl.memory as dpctl_mem
2324
import dpctl.program as dpctl_prog
24-
import dpctl.tensor as dpt
2525
from dpctl import event_status_type as esty
2626

2727
from .helper import create_invalid_capsule
@@ -41,13 +41,15 @@ def produce_event(profiling=False):
4141
addKernel = prog.get_sycl_kernel("add")
4242

4343
n = 1024 * 1024
44-
a = dpt.arange(n, dtype="i", sycl_queue=q)
45-
args = [a.usm_data]
44+
a = np.arange(n, dtype="i")
45+
a_usm = dpctl_mem.MemoryUSMDevice(a.nbytes, queue=q)
46+
ev1 = q.memcpy_async(dest=a_usm, src=a, count=a.nbytes)
47+
args = [a_usm]
4648

4749
r = [n]
48-
ev = q.submit(addKernel, args, r)
50+
ev2 = q.submit(addKernel, args, r, dEvents=[ev1])
4951

50-
return ev
52+
return ev2
5153

5254

5355
def test_create_default_event():
@@ -162,16 +164,19 @@ def test_get_wait_list():
162164
sinKernel = prog.get_sycl_kernel("sin_k")
163165

164166
n = 1024 * 1024
165-
a = dpt.arange(n, dtype="f", sycl_queue=q)
166-
args = [a.usm_data]
167+
a = np.arange(n, dtype="f")
168+
a_usm = dpctl_mem.MemoryUSMDevice(a.nbytes, queue=q)
169+
ev_1 = q.memcpy_async(dest=a_usm, src=a, count=a.nbytes)
170+
171+
args = [a_usm]
167172

168173
r = [n]
169-
ev_1 = q.submit(addKernel, args, r)
170-
ev_2 = q.submit(sqrtKernel, args, r, dEvents=[ev_1])
171-
ev_3 = q.submit(sinKernel, args, r, dEvents=[ev_2])
174+
ev_2 = q.submit(addKernel, args, r, dEvents=[ev_1])
175+
ev_3 = q.submit(sqrtKernel, args, r, dEvents=[ev_2])
176+
ev_4 = q.submit(sinKernel, args, r, dEvents=[ev_3])
172177

173178
try:
174-
wait_list = ev_3.get_wait_list()
179+
wait_list = ev_4.get_wait_list()
175180
except ValueError:
176181
pytest.fail("Failed to get a list of waiting events from SyclEvent")
177182
# FIXME: Due to an issue in underlying runtime the list returns is always

dpctl/tests/test_sycl_kernel_submit.py

Lines changed: 78 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -23,30 +23,29 @@
2323
import pytest
2424

2525
import dpctl
26-
import dpctl.memory as dpctl_mem
26+
import dpctl.memory as dpm
2727
import dpctl.program as dpctl_prog
28-
import dpctl.tensor as dpt
2928
from dpctl._sycl_queue import kernel_arg_type
3029

3130

3231
@pytest.mark.parametrize(
3332
"ctype_str,dtype,ctypes_ctor",
3433
[
35-
("short", dpt.dtype("i2"), ctypes.c_short),
36-
("int", dpt.dtype("i4"), ctypes.c_int),
37-
("unsigned int", dpt.dtype("u4"), ctypes.c_uint),
38-
("long", dpt.dtype(np.longlong), ctypes.c_longlong),
39-
("unsigned long", dpt.dtype(np.ulonglong), ctypes.c_ulonglong),
40-
("float", dpt.dtype("f4"), ctypes.c_float),
41-
("double", dpt.dtype("f8"), ctypes.c_double),
34+
("short", np.dtype("i2"), ctypes.c_short),
35+
("int", np.dtype("i4"), ctypes.c_int),
36+
("unsigned int", np.dtype("u4"), ctypes.c_uint),
37+
("long", np.dtype(np.longlong), ctypes.c_longlong),
38+
("unsigned long", np.dtype(np.ulonglong), ctypes.c_ulonglong),
39+
("float", np.dtype("f4"), ctypes.c_float),
40+
("double", np.dtype("f8"), ctypes.c_double),
4241
],
4342
)
4443
def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):
4544
try:
4645
q = dpctl.SyclQueue("opencl", property="enable_profiling")
4746
except dpctl.SyclQueueCreationError:
4847
pytest.skip("OpenCL queue could not be created")
49-
if dtype == dpt.dtype("f8") and q.sycl_device.has_aspect_fp64 is False:
48+
if dtype == np.dtype("f8") and q.sycl_device.has_aspect_fp64 is False:
5049
pytest.skip(
5150
"Device does not support double precision floating point type"
5251
)
@@ -66,35 +65,42 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):
6665
n_elems = 1024 * 512
6766
lws = 128
6867
if dtype.kind in "ui":
69-
n_elems = min(n_elems, dpt.iinfo(dtype).max)
68+
n_elems = min(n_elems, np.iinfo(dtype).max)
7069
n_elems = (n_elems // lws) * lws
71-
a = dpt.arange(n_elems, dtype=dtype, sycl_queue=q)
72-
b = dpt.arange(n_elems, stop=0, step=-1, dtype=dtype, sycl_queue=q)
73-
c = dpt.zeros(n_elems, dtype=dtype, sycl_queue=q)
70+
a = np.arange(n_elems, dtype=dtype)
71+
b = np.arange(n_elems, stop=0, step=-1, dtype=dtype)
72+
c = np.zeros(n_elems, dtype=dtype)
73+
74+
a_usm = dpm.MemoryUSMDevice(a.nbytes, queue=q)
75+
b_usm = dpm.MemoryUSMDevice(b.nbytes, queue=q)
76+
c_usm = dpm.MemoryUSMDevice(c.nbytes, queue=q)
77+
78+
ev1 = q.memcpy_async(dest=a_usm, src=a, count=a.nbytes)
79+
ev2 = q.memcpy_async(dest=b_usm, src=b, count=b.nbytes)
80+
81+
dpctl.SyclEvent.wait_for([ev1, ev2])
7482

7583
d = 2
76-
args = [a.usm_data, b.usm_data, c.usm_data, ctypes_ctor(d)]
84+
args = [a_usm, b_usm, c_usm, ctypes_ctor(d)]
7785

7886
assert n_elems % lws == 0
7987

80-
b_np = dpt.asnumpy(b)
81-
a_np = dpt.asnumpy(a)
82-
8388
for r in (
8489
[
8590
n_elems,
8691
],
8792
[2, n_elems],
8893
[2, 2, n_elems],
8994
):
90-
c[:] = 0
95+
c_usm.memset()
9196
timer = dpctl.SyclTimer()
9297
with timer(q):
9398
q.submit(axpyKernel, args, r).wait()
94-
ref_c = a_np * np.array(d, dtype=dtype) + b_np
99+
ref_c = a * np.array(d, dtype=dtype) + b
95100
host_dt, device_dt = timer.dt
96101
assert type(host_dt) is float and type(device_dt) is float
97-
assert np.allclose(dpt.asnumpy(c), ref_c), "Failed for {}".format(r)
102+
q.memcpy(c, c_usm, c.nbytes)
103+
assert np.allclose(c, ref_c), "Failed for {}".format(r)
98104

99105
for gr, lr in (
100106
(
@@ -106,16 +112,15 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):
106112
([2, n_elems], [2, lws // 2]),
107113
([2, 2, n_elems], [2, 2, lws // 4]),
108114
):
109-
c[:] = 0
115+
c_usm.memset()
110116
timer = dpctl.SyclTimer()
111117
with timer(q):
112118
q.submit(axpyKernel, args, gr, lr, [dpctl.SyclEvent()]).wait()
113-
ref_c = a_np * np.array(d, dtype=dtype) + b_np
119+
ref_c = a * np.array(d, dtype=dtype) + b
114120
host_dt, device_dt = timer.dt
115121
assert type(host_dt) is float and type(device_dt) is float
116-
assert np.allclose(dpt.asnumpy(c), ref_c), "Failed for {}, {}".formatg(
117-
r, lr
118-
)
122+
q.memcpy(c, c_usm, c.nbytes)
123+
assert np.allclose(c, ref_c), "Failed for {}, {}".format(gr, lr)
119124

120125

121126
def test_submit_async():
@@ -124,33 +129,41 @@ def test_submit_async():
124129
except dpctl.SyclQueueCreationError:
125130
pytest.skip("OpenCL queue could not be created")
126131
oclSrc = (
127-
"kernel void kern1(global unsigned int *res, unsigned int mod) {"
132+
"kernel void kern1("
133+
" global unsigned int *res_base, ulong res_off, unsigned int mod) {"
128134
" size_t unused_sum = 0;"
129135
" size_t i = 0; "
130136
" for (i = 0; i < 4000; i++) { "
131137
" unused_sum += i;"
132138
" } "
139+
" global unsigned int *res = res_base + (size_t)res_off;"
133140
" size_t index = get_global_id(0);"
134141
" int ri = (index % mod);"
135142
" res[index] = (ri * ri) % mod;"
136143
"}"
137144
" "
138-
"kernel void kern2(global unsigned int *res, unsigned int mod) {"
145+
"kernel void kern2("
146+
" global unsigned int *res_base, ulong res_off, unsigned int mod) {"
139147
" size_t unused_sum = 0;"
140148
" size_t i = 0; "
141149
" for (i = 0; i < 4000; i++) { "
142150
" unused_sum += i;"
143151
" } "
152+
" global unsigned int *res = res_base + (size_t)res_off;"
144153
" size_t index = get_global_id(0);"
145154
" int ri = (index % mod);"
146155
" int ri2 = (ri * ri) % mod;"
147156
" res[index] = (ri2 * ri) % mod;"
148157
"}"
149158
" "
150159
"kernel void kern3("
151-
" global unsigned int *res, global unsigned int *arg1, "
152-
" global unsigned int *arg2)"
160+
" global unsigned int *res_base, ulong res_off,"
161+
" global unsigned int *arg1_base, ulong arg1_off,"
162+
" global unsigned int *arg2_base, ulong arg2_off)"
153163
"{"
164+
" global unsigned int *res = res_base + (size_t)res_off;"
165+
" global unsigned int *arg1 = arg1_base + (size_t)arg1_off;"
166+
" global unsigned int *arg2 = arg2_base + (size_t)arg2_off;"
154167
" size_t index = get_global_id(0);"
155168
" size_t i = 0; "
156169
" size_t unused_sum = 0;"
@@ -177,10 +190,10 @@ def test_submit_async():
177190
n = f * 1024
178191
n_alloc = 4 * n
179192

180-
X = dpt.empty((3, n_alloc), dtype="u4", usm_type="device", sycl_queue=q)
181-
first_row = dpctl_mem.as_usm_memory(X[0])
182-
second_row = dpctl_mem.as_usm_memory(X[1])
183-
third_row = dpctl_mem.as_usm_memory(X[2])
193+
x = np.empty((3, n_alloc), dtype="u4")
194+
x_usm = dpm.MemoryUSMDevice(x.nbytes, queue=q)
195+
196+
e1 = q.memcpy_async(dest=x_usm, src=x, count=x.nbytes)
184197

185198
p1, p2 = 17, 27
186199

@@ -189,26 +202,39 @@ def test_submit_async():
189202
e1 = q.submit_async(
190203
kern1Kernel,
191204
[
192-
first_row,
205+
x_usm,
206+
ctypes.c_ulonglong(0),
193207
ctypes.c_uint(p1),
194208
],
195209
[
196210
n,
197211
],
212+
None,
213+
[e1],
198214
)
199215
e2 = q.submit_async(
200216
kern2Kernel,
201217
[
202-
second_row,
218+
x_usm,
219+
ctypes.c_ulonglong(n_alloc),
203220
ctypes.c_uint(p2),
204221
],
205222
[
206223
n,
207224
],
225+
None,
226+
[e1],
208227
)
209228
e3 = q.submit_async(
210229
kern3Kernel,
211-
[third_row, first_row, second_row],
230+
[
231+
x_usm,
232+
ctypes.c_ulonglong(2 * n_alloc),
233+
x_usm,
234+
ctypes.c_ulonglong(0),
235+
x_usm,
236+
ctypes.c_ulonglong(n_alloc),
237+
],
212238
[
213239
n,
214240
],
@@ -218,9 +244,7 @@ def test_submit_async():
218244
e3_st = e3.execution_status
219245
e2_st = e2.execution_status
220246
e1_st = e1.execution_status
221-
ht_e = q._submit_keep_args_alive(
222-
[first_row, second_row, third_row], [e1, e2, e3]
223-
)
247+
ht_e = q._submit_keep_args_alive([x_usm], [e1, e2, e3])
224248
are_complete = [
225249
e == status_complete
226250
for e in (
@@ -240,14 +264,13 @@ def test_submit_async():
240264
break
241265

242266
assert async_detected, "No evidence of async submission detected, unlucky?"
243-
Xnp = dpt.asnumpy(X)
244-
Xref = np.empty((3, n), dtype="u4")
267+
q.memcpy(dest=x, src=x_usm, count=x.nbytes)
268+
x_ref = np.empty((3, n), dtype="u4")
245269
for i in range(n):
246-
Xref[0, i] = (i * i) % p1
247-
Xref[1, i] = (i * i * i) % p2
248-
Xref[2, i] = min(Xref[0, i], Xref[1, i])
249-
250-
assert np.array_equal(Xnp[:, :n], Xref[:, :n])
270+
x_ref[0, i] = (i * i) % p1
271+
x_ref[1, i] = (i * i * i) % p2
272+
x_ref[2, i] = min(x_ref[0, i], x_ref[1, i])
273+
assert np.array_equal(x[:, :n], x_ref[:, :n])
251274

252275

253276
def _check_kernel_arg_type_instance(kati):
@@ -303,19 +326,20 @@ def test_submit_local_accessor_arg():
303326
krn = prog.get_sycl_kernel("_ZTS14SyclKernel_SLMIlE")
304327
lws = 32
305328
gws = lws * 10
306-
x = dpt.ones(gws, dtype="i8")
307-
x.sycl_queue.wait()
329+
x = np.ones(gws, dtype="i8")
330+
res = np.empty_like(x)
331+
x_usm = dpm.MemoryUSMDevice(x.nbytes, queue=q)
332+
q.memcpy(dest=x_usm, src=x, count=x.nbytes)
308333
try:
309334
e = q.submit(
310335
krn,
311-
[x.usm_data, dpctl.LocalAccessor("i8", (lws,))],
336+
[x_usm, dpctl.LocalAccessor("i8", (lws,))],
312337
[gws],
313338
[lws],
314339
)
315340
e.wait()
316341
except dpctl._sycl_queue.SyclKernelSubmitError:
317342
pytest.skip(f"Kernel submission failed for device {q.sycl_device}")
318-
expected = dpt.arange(1, x.size + 1, dtype=x.dtype, device=x.device) * (
319-
2 * lws
320-
)
321-
assert dpt.all(x == expected)
343+
q.memcpy(dest=res, src=x_usm, count=x.nbytes)
344+
expected = np.arange(1, x.size + 1, dtype=x.dtype) * (2 * lws)
345+
assert np.all(res == expected)

0 commit comments

Comments
 (0)