Skip to content

Commit 5ecba20

Browse files
committed
lint
1 parent a368a48 commit 5ecba20

6 files changed

Lines changed: 52 additions & 54 deletions

File tree

benchmarks/cuda_bindings/benchmarks/bench_ctx_device.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,48 +15,48 @@
1515

1616

1717
def bench_ctx_get_current(loops: int) -> float:
18-
_cuCtxGetCurrent = cuda.cuCtxGetCurrent
18+
_fn = cuda.cuCtxGetCurrent
1919

2020
t0 = time.perf_counter()
2121
for _ in range(loops):
22-
_cuCtxGetCurrent()
22+
_fn()
2323
return time.perf_counter() - t0
2424

2525

2626
def bench_ctx_set_current(loops: int) -> float:
27-
_cuCtxSetCurrent = cuda.cuCtxSetCurrent
27+
_fn = cuda.cuCtxSetCurrent
2828
_ctx = CTX
2929

3030
t0 = time.perf_counter()
3131
for _ in range(loops):
32-
_cuCtxSetCurrent(_ctx)
32+
_fn(_ctx)
3333
return time.perf_counter() - t0
3434

3535

3636
def bench_ctx_get_device(loops: int) -> float:
37-
_cuCtxGetDevice = cuda.cuCtxGetDevice
37+
_fn = cuda.cuCtxGetDevice
3838

3939
t0 = time.perf_counter()
4040
for _ in range(loops):
41-
_cuCtxGetDevice()
41+
_fn()
4242
return time.perf_counter() - t0
4343

4444

4545
def bench_device_get(loops: int) -> float:
46-
_cuDeviceGet = cuda.cuDeviceGet
46+
_fn = cuda.cuDeviceGet
4747

4848
t0 = time.perf_counter()
4949
for _ in range(loops):
50-
_cuDeviceGet(0)
50+
_fn(0)
5151
return time.perf_counter() - t0
5252

5353

5454
def bench_device_get_attribute(loops: int) -> float:
55-
_cuDeviceGetAttribute = cuda.cuDeviceGetAttribute
55+
_fn = cuda.cuDeviceGetAttribute
5656
_attr = ATTRIBUTE
5757
_dev = DEVICE
5858

5959
t0 = time.perf_counter()
6060
for _ in range(loops):
61-
_cuDeviceGetAttribute(_attr, _dev)
61+
_fn(_attr, _dev)
6262
return time.perf_counter() - t0

benchmarks/cuda_bindings/benchmarks/bench_event.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,43 +20,43 @@
2020

2121

2222
def bench_event_create_destroy(loops: int) -> float:
23-
_cuEventCreate = cuda.cuEventCreate
24-
_cuEventDestroy = cuda.cuEventDestroy
23+
_create = cuda.cuEventCreate
24+
_destroy = cuda.cuEventDestroy
2525
_flags = EVENT_FLAGS
2626

2727
t0 = time.perf_counter()
2828
for _ in range(loops):
29-
_, e = _cuEventCreate(_flags)
30-
_cuEventDestroy(e)
29+
_, e = _create(_flags)
30+
_destroy(e)
3131
return time.perf_counter() - t0
3232

3333

3434
def bench_event_record(loops: int) -> float:
35-
_cuEventRecord = cuda.cuEventRecord
35+
_fn = cuda.cuEventRecord
3636
_event = EVENT
3737
_stream = STREAM
3838

3939
t0 = time.perf_counter()
4040
for _ in range(loops):
41-
_cuEventRecord(_event, _stream)
41+
_fn(_event, _stream)
4242
return time.perf_counter() - t0
4343

4444

4545
def bench_event_query(loops: int) -> float:
46-
_cuEventQuery = cuda.cuEventQuery
46+
_fn = cuda.cuEventQuery
4747
_event = EVENT
4848

4949
t0 = time.perf_counter()
5050
for _ in range(loops):
51-
_cuEventQuery(_event)
51+
_fn(_event)
5252
return time.perf_counter() - t0
5353

5454

5555
def bench_event_synchronize(loops: int) -> float:
56-
_cuEventSynchronize = cuda.cuEventSynchronize
56+
_fn = cuda.cuEventSynchronize
5757
_event = EVENT
5858

5959
t0 = time.perf_counter()
6060
for _ in range(loops):
61-
_cuEventSynchronize(_event)
61+
_fn(_event)
6262
return time.perf_counter() - t0

benchmarks/cuda_bindings/benchmarks/bench_launch.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,52 +82,52 @@ def _ensure_launch_state() -> None:
8282

8383
def bench_launch_empty_kernel(loops: int) -> float:
8484
_ensure_launch_state()
85-
_cuLaunchKernel = cuda.cuLaunchKernel
85+
_fn = cuda.cuLaunchKernel
8686
_kernel = EMPTY_KERNEL
8787
_stream = STREAM
8888

8989
t0 = time.perf_counter()
9090
for _ in range(loops):
91-
_cuLaunchKernel(_kernel, 1, 1, 1, 1, 1, 1, 0, _stream, 0, 0)
91+
_fn(_kernel, 1, 1, 1, 1, 1, 1, 0, _stream, 0, 0)
9292
return time.perf_counter() - t0
9393

9494

9595
def bench_launch_small_kernel(loops: int) -> float:
9696
_ensure_launch_state()
97-
_cuLaunchKernel = cuda.cuLaunchKernel
97+
_fn = cuda.cuLaunchKernel
9898
_kernel = SMALL_KERNEL
9999
_stream = STREAM
100100
_args = (FLOAT_PTR,)
101101
_arg_types = (None,)
102102

103103
t0 = time.perf_counter()
104104
for _ in range(loops):
105-
_cuLaunchKernel(_kernel, 1, 1, 1, 1, 1, 1, 0, _stream, (_args, _arg_types), 0)
105+
_fn(_kernel, 1, 1, 1, 1, 1, 1, 0, _stream, (_args, _arg_types), 0)
106106
return time.perf_counter() - t0
107107

108108

109109
def bench_launch_16_args(loops: int) -> float:
110110
_ensure_launch_state()
111-
_cuLaunchKernel = cuda.cuLaunchKernel
111+
_fn = cuda.cuLaunchKernel
112112
_kernel = KERNEL_16_ARGS
113113
_stream = STREAM
114114
_args = INT_PTRS
115115
_arg_types = (None,) * 16
116116

117117
t0 = time.perf_counter()
118118
for _ in range(loops):
119-
_cuLaunchKernel(_kernel, 1, 1, 1, 1, 1, 1, 0, _stream, (_args, _arg_types), 0)
119+
_fn(_kernel, 1, 1, 1, 1, 1, 1, 0, _stream, (_args, _arg_types), 0)
120120
return time.perf_counter() - t0
121121

122122

123123
def bench_launch_16_args_pre_packed(loops: int) -> float:
124124
_ensure_launch_state()
125-
_cuLaunchKernel = cuda.cuLaunchKernel
125+
_fn = cuda.cuLaunchKernel
126126
_kernel = KERNEL_16_ARGS
127127
_stream = STREAM
128128
_packed = PACKED_16
129129

130130
t0 = time.perf_counter()
131131
for _ in range(loops):
132-
_cuLaunchKernel(_kernel, 1, 1, 1, 1, 1, 1, 0, _stream, _packed, 0)
132+
_fn(_kernel, 1, 1, 1, 1, 1, 1, 0, _stream, _packed, 0)
133133
return time.perf_counter() - t0

benchmarks/cuda_bindings/benchmarks/bench_memory.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
import ctypes
65
import time
76

87
import numpy as np
9-
108
from runner.runtime import alloc_persistent, ensure_context
119

1210
from cuda.bindings import driver as cuda
@@ -30,61 +28,61 @@
3028

3129

3230
def bench_mem_alloc_free(loops: int) -> float:
33-
_cuMemAlloc = cuda.cuMemAlloc
34-
_cuMemFree = cuda.cuMemFree
31+
_alloc = cuda.cuMemAlloc
32+
_free = cuda.cuMemFree
3533
_size = ALLOC_SIZE
3634

3735
t0 = time.perf_counter()
3836
for _ in range(loops):
39-
_, ptr = _cuMemAlloc(_size)
40-
_cuMemFree(ptr)
37+
_, ptr = _alloc(_size)
38+
_free(ptr)
4139
return time.perf_counter() - t0
4240

4341

4442
def bench_mem_alloc_async_free_async(loops: int) -> float:
45-
_cuMemAllocAsync = cuda.cuMemAllocAsync
46-
_cuMemFreeAsync = cuda.cuMemFreeAsync
43+
_alloc = cuda.cuMemAllocAsync
44+
_free = cuda.cuMemFreeAsync
4745
_size = ALLOC_SIZE
4846
_stream = STREAM
4947

5048
t0 = time.perf_counter()
5149
for _ in range(loops):
52-
_, ptr = _cuMemAllocAsync(_size, _stream)
53-
_cuMemFreeAsync(ptr, _stream)
50+
_, ptr = _alloc(_size, _stream)
51+
_free(ptr, _stream)
5452
return time.perf_counter() - t0
5553

5654

5755
def bench_memcpy_htod(loops: int) -> float:
58-
_cuMemcpyHtoD = cuda.cuMemcpyHtoD
56+
_fn = cuda.cuMemcpyHtoD
5957
_dst = DST_DPTR
6058
_src = HOST_SRC
6159
_size = COPY_SIZE
6260

6361
t0 = time.perf_counter()
6462
for _ in range(loops):
65-
_cuMemcpyHtoD(_dst, _src, _size)
63+
_fn(_dst, _src, _size)
6664
return time.perf_counter() - t0
6765

6866

6967
def bench_memcpy_dtoh(loops: int) -> float:
70-
_cuMemcpyDtoH = cuda.cuMemcpyDtoH
68+
_fn = cuda.cuMemcpyDtoH
7169
_dst = HOST_DST
7270
_src = SRC_DPTR
7371
_size = COPY_SIZE
7472

7573
t0 = time.perf_counter()
7674
for _ in range(loops):
77-
_cuMemcpyDtoH(_dst, _src, _size)
75+
_fn(_dst, _src, _size)
7876
return time.perf_counter() - t0
7977

8078

8179
def bench_memcpy_dtod(loops: int) -> float:
82-
_cuMemcpyDtoD = cuda.cuMemcpyDtoD
80+
_fn = cuda.cuMemcpyDtoD
8381
_dst = DST_DPTR
8482
_src = SRC_DPTR
8583
_size = COPY_SIZE
8684

8785
t0 = time.perf_counter()
8886
for _ in range(loops):
89-
_cuMemcpyDtoD(_dst, _src, _size)
87+
_fn(_dst, _src, _size)
9088
return time.perf_counter() - t0

benchmarks/cuda_bindings/benchmarks/bench_pointer_attributes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515

1616
def bench_pointer_get_attribute(loops: int) -> float:
1717
# Local references to avoid global lookups in the hot loop
18-
_cuPointerGetAttribute = cuda.cuPointerGetAttribute
18+
_fn = cuda.cuPointerGetAttribute
1919
_attr = ATTRIBUTE
2020
_ptr = PTR
2121

2222
t0 = time.perf_counter()
2323
for _ in range(loops):
24-
_cuPointerGetAttribute(_attr, _ptr)
24+
_fn(_attr, _ptr)
2525
return time.perf_counter() - t0

benchmarks/cuda_bindings/benchmarks/bench_stream.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,32 +14,32 @@
1414

1515

1616
def bench_stream_create_destroy(loops: int) -> float:
17-
_cuStreamCreate = cuda.cuStreamCreate
18-
_cuStreamDestroy = cuda.cuStreamDestroy
17+
_create = cuda.cuStreamCreate
18+
_destroy = cuda.cuStreamDestroy
1919
_flags = cuda.CUstream_flags.CU_STREAM_NON_BLOCKING.value
2020

2121
t0 = time.perf_counter()
2222
for _ in range(loops):
23-
_, s = _cuStreamCreate(_flags)
24-
_cuStreamDestroy(s)
23+
_, s = _create(_flags)
24+
_destroy(s)
2525
return time.perf_counter() - t0
2626

2727

2828
def bench_stream_query(loops: int) -> float:
29-
_cuStreamQuery = cuda.cuStreamQuery
29+
_fn = cuda.cuStreamQuery
3030
_stream = STREAM
3131

3232
t0 = time.perf_counter()
3333
for _ in range(loops):
34-
_cuStreamQuery(_stream)
34+
_fn(_stream)
3535
return time.perf_counter() - t0
3636

3737

3838
def bench_stream_synchronize(loops: int) -> float:
39-
_cuStreamSynchronize = cuda.cuStreamSynchronize
39+
_fn = cuda.cuStreamSynchronize
4040
_stream = STREAM
4141

4242
t0 = time.perf_counter()
4343
for _ in range(loops):
44-
_cuStreamSynchronize(_stream)
44+
_fn(_stream)
4545
return time.perf_counter() - t0

0 commit comments

Comments
 (0)