@@ -82,52 +82,52 @@ def _ensure_launch_state() -> None:
8282
8383def bench_launch_empty_kernel (loops : int ) -> float :
8484 _ensure_launch_state ()
85- _cuLaunchKernel = cuda .cuLaunchKernel
85+ _fn = cuda .cuLaunchKernel
8686 _kernel = EMPTY_KERNEL
8787 _stream = STREAM
8888
8989 t0 = time .perf_counter ()
9090 for _ in range (loops ):
91- _cuLaunchKernel (_kernel , 1 , 1 , 1 , 1 , 1 , 1 , 0 , _stream , 0 , 0 )
91+ _fn (_kernel , 1 , 1 , 1 , 1 , 1 , 1 , 0 , _stream , 0 , 0 )
9292 return time .perf_counter () - t0
9393
9494
9595def bench_launch_small_kernel (loops : int ) -> float :
9696 _ensure_launch_state ()
97- _cuLaunchKernel = cuda .cuLaunchKernel
97+ _fn = cuda .cuLaunchKernel
9898 _kernel = SMALL_KERNEL
9999 _stream = STREAM
100100 _args = (FLOAT_PTR ,)
101101 _arg_types = (None ,)
102102
103103 t0 = time .perf_counter ()
104104 for _ in range (loops ):
105- _cuLaunchKernel (_kernel , 1 , 1 , 1 , 1 , 1 , 1 , 0 , _stream , (_args , _arg_types ), 0 )
105+ _fn (_kernel , 1 , 1 , 1 , 1 , 1 , 1 , 0 , _stream , (_args , _arg_types ), 0 )
106106 return time .perf_counter () - t0
107107
108108
109109def bench_launch_16_args (loops : int ) -> float :
110110 _ensure_launch_state ()
111- _cuLaunchKernel = cuda .cuLaunchKernel
111+ _fn = cuda .cuLaunchKernel
112112 _kernel = KERNEL_16_ARGS
113113 _stream = STREAM
114114 _args = INT_PTRS
115115 _arg_types = (None ,) * 16
116116
117117 t0 = time .perf_counter ()
118118 for _ in range (loops ):
119- _cuLaunchKernel (_kernel , 1 , 1 , 1 , 1 , 1 , 1 , 0 , _stream , (_args , _arg_types ), 0 )
119+ _fn (_kernel , 1 , 1 , 1 , 1 , 1 , 1 , 0 , _stream , (_args , _arg_types ), 0 )
120120 return time .perf_counter () - t0
121121
122122
123123def bench_launch_16_args_pre_packed (loops : int ) -> float :
124124 _ensure_launch_state ()
125- _cuLaunchKernel = cuda .cuLaunchKernel
125+ _fn = cuda .cuLaunchKernel
126126 _kernel = KERNEL_16_ARGS
127127 _stream = STREAM
128128 _packed = PACKED_16
129129
130130 t0 = time .perf_counter ()
131131 for _ in range (loops ):
132- _cuLaunchKernel (_kernel , 1 , 1 , 1 , 1 , 1 , 1 , 0 , _stream , _packed , 0 )
132+ _fn (_kernel , 1 , 1 , 1 , 1 , 1 , 1 , 0 , _stream , _packed , 0 )
133133 return time .perf_counter () - t0
0 commit comments