66
77#example-begin
88import cuda .tile as ct
9- import cupy
109
1110TILE_SIZE = 16
1211
@@ -18,18 +17,17 @@ def vector_add_kernel(a, b, result):
1817 b_tile = ct .load (b , index = (block_id ,), shape = (TILE_SIZE ,))
1918 result_tile = a_tile + b_tile
2019 ct .store (result , index = (block_id ,), tile = result_tile )
21-
22- # Host-side function that launches the above kernel.
23- def vector_add (a : cupy .ndarray , b : cupy .ndarray , result : cupy .ndarray ):
24- assert a .shape == b .shape == result .shape
25- grid = (ct .cdiv (a .shape [0 ], TILE_SIZE ), 1 , 1 )
26- ct .launch (cupy .cuda .get_current_stream (), grid , vector_add_kernel , (a , b , result ))
2720#example-end
2821
29-
3022import numpy as np
3123
32- def test_vector_add ():
24+ def test_vector_add (cupy ):
25+ # Host-side function that launches the above kernel.
26+ def vector_add (a : cupy .ndarray , b : cupy .ndarray , result : cupy .ndarray ):
27+ assert a .shape == b .shape == result .shape
28+ grid = (ct .cdiv (a .shape [0 ], TILE_SIZE ), 1 , 1 )
29+ ct .launch (cupy .cuda .get_current_stream (), grid , vector_add_kernel , (a , b , result ))
30+
3331 rng = cupy .random .default_rng ()
3432 a = rng .random (128 )
3533 b = rng .random (128 )
0 commit comments