Skip to content

Commit 0620f05

Browse files
authored
Make cuda_bindings build cleanly with clang (#1658)
* Make cuda_bindings build cleanly with clang * Remove `-Werror` * Fix Windows build
1 parent 020c48e commit 0620f05

File tree

4 files changed

+44
-24
lines changed

4 files changed

+44
-24
lines changed

cuda_bindings/cuda/bindings/cufile.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2901,25 +2901,25 @@ cpdef void batch_io_destroy(intptr_t batch_idp) except*:
29012901

29022902
cpdef read_async(intptr_t fh, intptr_t buf_ptr_base, intptr_t size_p, intptr_t file_offset_p, intptr_t buf_ptr_offset_p, intptr_t bytes_read_p, intptr_t stream):
29032903
with nogil:
2904-
__status__ = cuFileReadAsync(<Handle>fh, <void*>buf_ptr_base, <size_t*>size_p, <off_t*>file_offset_p, <off_t*>buf_ptr_offset_p, <ssize_t*>bytes_read_p, <void*>stream)
2904+
__status__ = cuFileReadAsync(<Handle>fh, <void*>buf_ptr_base, <size_t*>size_p, <off_t*>file_offset_p, <off_t*>buf_ptr_offset_p, <ssize_t*>bytes_read_p, <CUstream>stream)
29052905
check_status(__status__)
29062906

29072907

29082908
cpdef write_async(intptr_t fh, intptr_t buf_ptr_base, intptr_t size_p, intptr_t file_offset_p, intptr_t buf_ptr_offset_p, intptr_t bytes_written_p, intptr_t stream):
29092909
with nogil:
2910-
__status__ = cuFileWriteAsync(<Handle>fh, <void*>buf_ptr_base, <size_t*>size_p, <off_t*>file_offset_p, <off_t*>buf_ptr_offset_p, <ssize_t*>bytes_written_p, <void*>stream)
2910+
__status__ = cuFileWriteAsync(<Handle>fh, <void*>buf_ptr_base, <size_t*>size_p, <off_t*>file_offset_p, <off_t*>buf_ptr_offset_p, <ssize_t*>bytes_written_p, <CUstream>stream)
29112911
check_status(__status__)
29122912

29132913

29142914
cpdef stream_register(intptr_t stream, unsigned flags):
29152915
with nogil:
2916-
__status__ = cuFileStreamRegister(<void*>stream, flags)
2916+
__status__ = cuFileStreamRegister(<CUstream>stream, flags)
29172917
check_status(__status__)
29182918

29192919

29202920
cpdef stream_deregister(intptr_t stream):
29212921
with nogil:
2922-
__status__ = cuFileStreamDeregister(<void*>stream)
2922+
__status__ = cuFileStreamDeregister(<CUstream>stream)
29232923
check_status(__status__)
29242924

29252925

cuda_bindings/cuda/bindings/nvml.pxd

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44
#
5-
# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
5+
# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1283+gc7bc6fa75. Do not modify it directly.
66

77
from libc.stdint cimport intptr_t
88

@@ -342,7 +342,6 @@ cpdef object device_get_grid_licensable_features_v4(intptr_t device)
342342
cpdef unsigned int get_vgpu_driver_capabilities(int capability) except? 0
343343
cpdef unsigned int device_get_vgpu_capabilities(intptr_t device, int capability) except? 0
344344
cpdef str vgpu_type_get_class(unsigned int vgpu_type_id)
345-
cpdef str vgpu_type_get_name(unsigned int vgpu_type_id)
346345
cpdef unsigned int vgpu_type_get_gpu_instance_profile_id(unsigned int vgpu_type_id) except? 0
347346
cpdef tuple vgpu_type_get_device_id(unsigned int vgpu_type_id)
348347
cpdef unsigned long long vgpu_type_get_framebuffer_size(unsigned int vgpu_type_id) except? 0

cuda_bindings/cuda/bindings/nvml.pyx

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44
#
5-
# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
5+
# This code was automatically generated across versions from 12.9.1 to 13.1.1, generator version 0.3.1.dev1283+gc7bc6fa75. Do not modify it directly.
66

77
cimport cython # NOQA
88

@@ -23189,22 +23189,6 @@ cpdef str vgpu_type_get_class(unsigned int vgpu_type_id):
2318923189
return cpython.PyUnicode_FromString(vgpu_type_class)
2319023190

2319123191

23192-
cpdef str vgpu_type_get_name(unsigned int vgpu_type_id):
23193-
"""Retrieve the vGPU type name.
23194-
23195-
Args:
23196-
vgpu_type_id (unsigned int): Handle to vGPU type.
23197-
23198-
.. seealso:: `nvmlVgpuTypeGetName`
23199-
"""
23200-
cdef unsigned int size = 64
23201-
cdef char[64] vgpu_type_name
23202-
with nogil:
23203-
__status__ = nvmlVgpuTypeGetName(<nvmlVgpuTypeId_t>vgpu_type_id, vgpu_type_name, <unsigned int*>size)
23204-
check_status(__status__)
23205-
return cpython.PyUnicode_FromString(vgpu_type_name)
23206-
23207-
2320823192
cpdef unsigned int vgpu_type_get_gpu_instance_profile_id(unsigned int vgpu_type_id) except? 0:
2320923193
"""Retrieve the GPU Instance Profile ID for the given vGPU type ID. The API will return a valid GPU Instance Profile ID for the MIG capable vGPU types, else INVALID_GPU_INSTANCE_PROFILE_ID is returned.
2321023194

@@ -26228,3 +26212,19 @@ cpdef str device_get_current_clock_freqs(intptr_t device):
2622826212
__status__ = nvmlDeviceGetCurrentClockFreqs(<Device>device, current_clock_freqs)
2622926213
check_status(__status__)
2623026214
return cpython.PyUnicode_FromString(current_clock_freqs[0].str)
26215+
26216+
26217+
cpdef str vgpu_type_get_name(unsigned int vgpu_type_id):
26218+
"""Retrieve the vGPU type name.
26219+
26220+
Args:
26221+
vgpu_type_id (unsigned int): Handle to vGPU type.
26222+
26223+
.. seealso:: `nvmlVgpuTypeGetName`
26224+
"""
26225+
cdef unsigned int[1] size = [64]
26226+
cdef char[64] vgpu_type_name
26227+
with nogil:
26228+
__status__ = nvmlVgpuTypeGetName(<nvmlVgpuTypeId_t>vgpu_type_id, vgpu_type_name, <unsigned int*>size)
26229+
check_status(__status__)
26230+
return cpython.PyUnicode_FromString(vgpu_type_name)

cuda_bindings/setup.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

44
import atexit
55
import contextlib
6+
import functools
67
import glob
78
import os
89
import pathlib
910
import platform
1011
import shutil
12+
import subprocess
1113
import sys
1214
import sysconfig
1315
import tempfile
@@ -270,6 +272,7 @@ def generate_output(infile, local):
270272
"-fpermissive",
271273
"-Wno-deprecated-declarations",
272274
"-fno-var-tracking-assignments",
275+
"-Wno-unused-function",
273276
]
274277
if "--debug" in sys.argv:
275278
extra_cythonize_kwargs["gdb_debug"] = True
@@ -391,6 +394,21 @@ def do_cythonize(extensions):
391394
building_wheel = False
392395

393396

397+
def is_clang(compiler):
398+
@functools.lru_cache
399+
def _is_clang(compiler_cxx):
400+
try:
401+
output = subprocess.check_output([*compiler_cxx, "--version"]) # noqa: S603
402+
except subprocess.CalledProcessError:
403+
return False
404+
lines = output.decode().splitlines()
405+
return len(lines) > 0 and "clang" in lines[0]
406+
407+
if not hasattr(compiler, "compiler_cxx"):
408+
return False
409+
return _is_clang(tuple(compiler.compiler_cxx))
410+
411+
394412
class WheelsBuildExtensions(bdist_wheel):
395413
def run(self):
396414
global building_wheel
@@ -405,6 +423,9 @@ def initialize_options(self):
405423
self.parallel = nthreads
406424

407425
def build_extension(self, ext):
426+
if is_clang(self.compiler):
427+
ext.extra_compile_args = [x for x in ext.extra_compile_args if x != "-fno-var-tracking-assignments"]
428+
408429
if building_wheel and sys.platform == "linux" and "--debug" not in sys.argv:
409430
# Strip binaries to remove debug symbols
410431
ext.extra_link_args.append("-Wl,--strip-all")

0 commit comments

Comments
 (0)