1313
1414import cupy as cp
1515
16- from cuda .core import Device , LaunchConfig , Program , launch , system
16+ from cuda .core import Device , LaunchConfig , Program , ProgramOptions , launch , system
1717
1818if system .get_num_devices () < 2 :
1919 print ("this example requires at least 2 GPUs" , file = sys .stderr )
@@ -56,9 +56,9 @@ def __cuda_stream__(self):
5656 }
5757}
5858"""
59- prog_add = Program (code_add , code_type = "c++" , options = { " std" : " c++17" , " arch" : f"sm_{ dev0 .arch } " } )
59+ prog_add = Program (code_add , code_type = "c++" , options = ProgramOptions ( std = " c++17" , arch = f"sm_{ dev0 .arch } " ) )
6060 mod_add = prog_add .compile ("cubin" )
61- ker_add = mod_add .get_kernel ("vector_add" )
61+ add_kernel = mod_add .get_kernel ("vector_add" )
6262
6363 # Set GPU 1
6464 dev1 = Device (1 )
@@ -78,9 +78,9 @@ def __cuda_stream__(self):
7878 }
7979}
8080"""
81- prog_sub = Program (code_sub , code_type = "c++" , options = { " std" : " c++17" , " arch" : f"sm_{ dev1 .arch } " } )
81+ prog_sub = Program (code_sub , code_type = "c++" , options = ProgramOptions ( std = " c++17" , arch = f"sm_{ dev1 .arch } " ) )
8282 mod_sub = prog_sub .compile ("cubin" )
83- ker_sub = mod_sub .get_kernel ("vector_sub" )
83+ sub_kernel = mod_sub .get_kernel ("vector_sub" )
8484
8585 # Create launch configs for each kernel that will be executed on the respective
8686 # CUDA streams.
@@ -103,7 +103,7 @@ def __cuda_stream__(self):
103103 stream0 .wait (cp_stream0 )
104104
105105 # Launch the add kernel on GPU 0 / stream 0
106- launch (stream0 , config0 , ker_add , a .data .ptr , b .data .ptr , c .data .ptr , cp .uint64 (size ))
106+ launch (stream0 , config0 , add_kernel , a .data .ptr , b .data .ptr , c .data .ptr , cp .uint64 (size ))
107107
108108 # Allocate memory on GPU 1
109109 # Note: This runs on CuPy's current stream for GPU 1.
@@ -118,7 +118,7 @@ def __cuda_stream__(self):
118118 stream1 .wait (cp_stream1 )
119119
120120 # Launch the subtract kernel on GPU 1 / stream 1
121- launch (stream1 , config1 , ker_sub , x .data .ptr , y .data .ptr , z .data .ptr , cp .uint64 (size ))
121+ launch (stream1 , config1 , sub_kernel , x .data .ptr , y .data .ptr , z .data .ptr , cp .uint64 (size ))
122122
123123 # Synchronize both GPUs are validate the results
124124 dev0 .set_current ()
0 commit comments