Skip to content

Commit 29c5777

Browse files
committed
Merge branch 'main' into error_explanations
2 parents c99e8d1 + 111c713 commit 29c5777

11 files changed

Lines changed: 167 additions & 92 deletions

File tree

cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pxd.in

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,9 +108,9 @@ from libcpp cimport bool
108108
{{if 'cudaCreateSurfaceObject' in found_functions}}cdef cudaError_t _cudaCreateSurfaceObject(cudaSurfaceObject_t* pSurfObject, const cudaResourceDesc* pResDesc) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
109109
{{if 'cudaGetTextureObjectResourceDesc' in found_functions}}cdef cudaError_t _cudaGetTextureObjectResourceDesc(cudaResourceDesc* pResDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
110110
{{if 'cudaGraphicsEGLRegisterImage' in found_functions}}cdef cudaError_t _cudaGraphicsEGLRegisterImage(cudaGraphicsResource_t* pCudaResource, EGLImageKHR image, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
111-
{{if 'cudaEGLStreamProducerPresentFrame' in found_functions}}cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
112-
{{if 'cudaEGLStreamProducerReturnFrame' in found_functions}}cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
113-
{{if 'cudaGraphicsResourceGetMappedEglFrame' in found_functions}}cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
111+
{{if True}}cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
112+
{{if True}}cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
113+
{{if True}}cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
114114
{{if True}}cdef cudaError_t _cudaVDPAUSetVDPAUDevice(int device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
115115
{{if 'cudaArrayGetMemoryRequirements' in found_functions}}cdef cudaError_t _cudaArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaArray_t array, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
116116
{{if 'cudaMipmappedArrayGetMemoryRequirements' in found_functions}}cdef cudaError_t _cudaMipmappedArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaMipmappedArray_t mipmap, int device) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}

cuda_bindings/cuda/bindings/_lib/cyruntime/cyruntime.pyx.in

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2206,6 +2206,7 @@ cdef cudaError_t _cudaGetTextureObjectResourceDesc(cudaResourceDesc* pResDesc, c
22062206
return err
22072207

22082208
{{endif}}
2209+
{{if True}}
22092210

22102211
cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
22112212
cdef cudaError_t err = cudaSuccess
@@ -2222,6 +2223,9 @@ cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* con
22222223
_setLastError(err)
22232224
return err
22242225

2226+
{{endif}}
2227+
{{if True}}
2228+
22252229
cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
22262230
cdef cudaError_t err = cudaSuccess
22272231
err = m_global.lazyInitContextState()
@@ -2242,6 +2246,9 @@ cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn
22422246
return err
22432247
return err
22442248

2249+
{{endif}}
2250+
{{if True}}
2251+
22452252
cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil:
22462253
cdef cudaError_t err = cudaSuccess
22472254
err = m_global.lazyInitContextState()
@@ -2259,9 +2266,13 @@ cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame,
22592266
return err
22602267
return err
22612268

2269+
{{endif}}
2270+
{{if True}}
2271+
22622272
cdef cudaError_t _cudaVDPAUSetVDPAUDevice(int device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil:
22632273
return cudaErrorNotSupported
22642274

2275+
{{endif}}
22652276
{{if 'cudaArrayGetMemoryRequirements' in found_functions}}
22662277

22672278
cdef cudaError_t _cudaArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaArray_t array, int device) except ?cudaErrorCallRequiresNewerDriver nogil:

cuda_bindings/docs/source/release/12.X.Y-notes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ Highlights
99

1010
* The ``cuda.bindings.nvvm`` Python module was added, wrapping the
1111
`libNVVM C API <https://docs.nvidia.com/cuda/libnvvm-api/>`_.
12+
* Source build error checking added for missing required headers

cuda_bindings/setup.py

Lines changed: 96 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,11 @@
4848
# ----------------------------------------------------------------------
4949
# Parse user-provided CUDA headers
5050

51-
header_dict = {
52-
"driver": ["cuda.h", "cudaProfiler.h", "cudaEGL.h", "cudaGL.h", "cudaVDPAU.h"],
51+
required_headers = {
52+
"driver": [
53+
"cuda.h",
54+
"cudaProfiler.h",
55+
],
5356
"runtime": [
5457
"driver_types.h",
5558
"vector_types.h",
@@ -61,32 +64,42 @@
6164
"device_types.h",
6265
"driver_functions.h",
6366
"cuda_profiler_api.h",
64-
"cuda_egl_interop.h",
65-
"cuda_gl_interop.h",
66-
"cuda_vdpau_interop.h",
6767
],
68-
"nvrtc": ["nvrtc.h"],
68+
"nvrtc": [
69+
"nvrtc.h",
70+
],
71+
# During compilation, Cython will reference C headers that are not
72+
# explicitly parsed above. These are the known dependencies:
73+
#
74+
# - crt/host_defines.h
75+
# - builtin_types.h
76+
# - cuda_device_runtime_api.h
6977
}
7078

71-
replace = {
72-
" __device_builtin__ ": " ",
73-
"CUDARTAPI ": " ",
74-
"typedef __device_builtin__ enum cudaError cudaError_t;": "typedef cudaError cudaError_t;",
75-
"typedef __device_builtin__ enum cudaOutputMode cudaOutputMode_t;": "typedef cudaOutputMode cudaOutputMode_t;",
76-
"typedef enum cudaError cudaError_t;": "typedef cudaError cudaError_t;",
77-
"typedef enum cudaOutputMode cudaOutputMode_t;": "typedef cudaOutputMode cudaOutputMode_t;",
78-
"typedef enum cudaDataType_t cudaDataType_t;": "",
79-
"typedef enum libraryPropertyType_t libraryPropertyType_t;": "",
80-
" enum ": " ",
81-
", enum ": ", ",
82-
"\\(enum ": "(",
83-
}
8479

85-
found_types = []
86-
found_functions = []
87-
found_values = []
88-
found_struct = []
89-
struct_list = {}
80+
def fetch_header_paths(required_headers, include_path_list):
81+
header_dict = {}
82+
missing_headers = []
83+
for library, header_list in required_headers.items():
84+
header_paths = []
85+
for header in header_list:
86+
path_candidate = [os.path.join(path, header) for path in include_path_list]
87+
for path in path_candidate:
88+
if os.path.exists(path):
89+
header_paths += [path]
90+
break
91+
else:
92+
missing_headers += [header]
93+
94+
# Update dictionary with validated paths to headers
95+
header_dict[library] = header_paths
96+
97+
if missing_headers:
98+
error_message = "Couldn't find required headers: "
99+
error_message += ", ".join([header for header in missing_headers])
100+
raise RuntimeError(f'{error_message}\nIs CUDA_HOME setup correctly? (CUDA_HOME="{CUDA_HOME}")')
101+
102+
return header_dict
90103

91104

92105
class Struct:
@@ -117,52 +130,66 @@ def __repr__(self):
117130
return f"{self._name}: {self._member_names} with types {self._member_types}"
118131

119132

120-
include_path_list = [os.path.join(path, "include") for path in CUDA_HOME]
121-
print(f'Parsing headers in "{include_path_list}" (Caching = {PARSER_CACHING})')
122-
for library, header_list in header_dict.items():
123-
header_paths = []
124-
for header in header_list:
125-
path_candidate = [os.path.join(path, header) for path in include_path_list]
126-
for path in path_candidate:
127-
if os.path.exists(path):
128-
header_paths += [path]
129-
break
130-
if not os.path.exists(path):
131-
print(f"Missing header {header}")
132-
133-
print(f"Parsing {library} headers")
134-
parser = CParser(
135-
header_paths, cache="./cache_{}".format(library.split(".")[0]) if PARSER_CACHING else None, replace=replace
136-
)
133+
def parse_headers(header_dict):
134+
found_types = []
135+
found_functions = []
136+
found_values = []
137+
found_struct = []
138+
struct_list = {}
139+
140+
replace = {
141+
" __device_builtin__ ": " ",
142+
"CUDARTAPI ": " ",
143+
"typedef __device_builtin__ enum cudaError cudaError_t;": "typedef cudaError cudaError_t;",
144+
"typedef __device_builtin__ enum cudaOutputMode cudaOutputMode_t;": "typedef cudaOutputMode cudaOutputMode_t;",
145+
"typedef enum cudaError cudaError_t;": "typedef cudaError cudaError_t;",
146+
"typedef enum cudaOutputMode cudaOutputMode_t;": "typedef cudaOutputMode cudaOutputMode_t;",
147+
"typedef enum cudaDataType_t cudaDataType_t;": "",
148+
"typedef enum libraryPropertyType_t libraryPropertyType_t;": "",
149+
" enum ": " ",
150+
", enum ": ", ",
151+
"\\(enum ": "(",
152+
}
153+
154+
print(f'Parsing headers in "{include_path_list}" (Caching = {PARSER_CACHING})')
155+
for library, header_paths in header_dict.items():
156+
print(f"Parsing {library} headers")
157+
parser = CParser(
158+
header_paths, cache="./cache_{}".format(library.split(".")[0]) if PARSER_CACHING else None, replace=replace
159+
)
160+
161+
if library == "driver":
162+
CUDA_VERSION = parser.defs["macros"].get("CUDA_VERSION", "Unknown")
163+
print(f"Found CUDA_VERSION: {CUDA_VERSION}")
164+
165+
# Combine types with others since they sometimes get tangled
166+
found_types += {key for key in parser.defs["types"]}
167+
found_types += {key for key in parser.defs["structs"]}
168+
found_types += {key for key in parser.defs["unions"]}
169+
found_types += {key for key in parser.defs["enums"]}
170+
found_functions += {key for key in parser.defs["functions"]}
171+
found_values += {key for key in parser.defs["values"]}
172+
173+
for key, value in parser.defs["structs"].items():
174+
struct_list[key] = Struct(key, value["members"])
175+
for key, value in parser.defs["unions"].items():
176+
struct_list[key] = Struct(key, value["members"])
177+
178+
for key, value in struct_list.items():
179+
if key.startswith("anon_union") or key.startswith("anon_struct"):
180+
continue
137181

138-
if library == "driver":
139-
CUDA_VERSION = parser.defs["macros"].get("CUDA_VERSION", "Unknown")
140-
print(f"Found CUDA_VERSION: {CUDA_VERSION}")
141-
142-
# Combine types with others since they sometimes get tangled
143-
found_types += {key for key in parser.defs["types"]}
144-
found_types += {key for key in parser.defs["structs"]}
145-
found_types += {key for key in parser.defs["unions"]}
146-
found_types += {key for key in parser.defs["enums"]}
147-
found_functions += {key for key in parser.defs["functions"]}
148-
found_values += {key for key in parser.defs["values"]}
149-
150-
for key, value in parser.defs["structs"].items():
151-
struct_list[key] = Struct(key, value["members"])
152-
for key, value in parser.defs["unions"].items():
153-
struct_list[key] = Struct(key, value["members"])
154-
155-
for key, value in struct_list.items():
156-
if key.startswith("anon_union") or key.startswith("anon_struct"):
157-
continue
158-
159-
found_struct += [key]
160-
discovered = value.discoverMembers(struct_list, key)
161-
if discovered:
162-
found_struct += discovered
163-
164-
if len(found_functions) == 0:
165-
raise RuntimeError(f'Parser found no functions. Is CUDA_HOME setup correctly? (CUDA_HOME="{CUDA_HOME}")')
182+
found_struct += [key]
183+
discovered = value.discoverMembers(struct_list, key)
184+
if discovered:
185+
found_struct += discovered
186+
187+
return found_types, found_functions, found_values, found_struct, struct_list
188+
189+
190+
include_path_list = [os.path.join(path, "include") for path in CUDA_HOME]
191+
header_dict = fetch_header_paths(required_headers, include_path_list)
192+
found_types, found_functions, found_values, found_struct, struct_list = parse_headers(header_dict)
166193

167194
# ----------------------------------------------------------------------
168195
# Generate

cuda_core/cuda/core/experimental/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from cuda.core.experimental import utils
66
from cuda.core.experimental._device import Device
7-
from cuda.core.experimental._event import EventOptions
7+
from cuda.core.experimental._event import Event, EventOptions
88
from cuda.core.experimental._launcher import LaunchConfig, launch
99
from cuda.core.experimental._linker import Linker, LinkerOptions
1010
from cuda.core.experimental._module import ObjectCode

cuda_core/cuda/core/experimental/_linker.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -349,11 +349,12 @@ class Linker:
349349
"""
350350

351351
class _MembersNeededForFinalize:
352-
__slots__ = ("handle", "use_nvjitlink")
352+
__slots__ = ("handle", "use_nvjitlink", "const_char_keep_alive")
353353

354354
def __init__(self, program_obj, handle, use_nvjitlink):
355355
self.handle = handle
356356
self.use_nvjitlink = use_nvjitlink
357+
self.const_char_keep_alive = []
357358
weakref.finalize(program_obj, self.close)
358359

359360
def close(self):
@@ -390,27 +391,30 @@ def _add_code_object(self, object_code: ObjectCode):
390391
data = object_code._module
391392
assert_type(data, bytes)
392393
with _exception_manager(self):
394+
name_str = f"{object_code._handle}_{object_code._code_type}"
393395
if _nvjitlink:
394396
_nvjitlink.add_data(
395397
self._mnff.handle,
396398
self._input_type_from_code_type(object_code._code_type),
397399
data,
398400
len(data),
399-
f"{object_code._handle}_{object_code._code_type}",
401+
name_str,
400402
)
401403
else:
404+
name_bytes = name_str.encode()
402405
handle_return(
403406
_driver.cuLinkAddData(
404407
self._mnff.handle,
405408
self._input_type_from_code_type(object_code._code_type),
406409
data,
407410
len(data),
408-
f"{object_code._handle}_{object_code._code_type}".encode(),
411+
name_bytes,
409412
0,
410413
None,
411414
None,
412415
)
413416
)
417+
self._mnff.const_char_keep_alive.append(name_bytes)
414418

415419
def link(self, target_type) -> ObjectCode:
416420
"""
@@ -465,7 +469,7 @@ def get_error_log(self) -> str:
465469
_nvjitlink.get_error_log(self._mnff.handle, log)
466470
else:
467471
log = self._options.formatted_options[2]
468-
return log.decode()
472+
return log.decode("utf-8", errors="backslashreplace")
469473

470474
def get_info_log(self) -> str:
471475
"""Get the info log generated by the linker.
@@ -481,7 +485,7 @@ def get_info_log(self) -> str:
481485
_nvjitlink.get_info_log(self._mnff.handle, log)
482486
else:
483487
log = self._options.formatted_options[0]
484-
return log.decode()
488+
return log.decode("utf-8", errors="backslashreplace")
485489

486490
def _input_type_from_code_type(self, code_type: str):
487491
# this list is based on the supported values for code_type in the ObjectCode class definition.

cuda_core/cuda/core/experimental/_program.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ def compile(self, target_type, name_expressions=(), logs=None):
503503
if logsize > 1:
504504
log = b" " * logsize
505505
handle_return(nvrtc.nvrtcGetProgramLog(self._mnff.handle, log), handle=self._mnff.handle)
506-
logs.write(log.decode())
506+
logs.write(log.decode("utf-8", errors="backslashreplace"))
507507

508508
return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping)
509509

cuda_core/cuda/core/experimental/_utils/cuda_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def _check_error(error, handle=None):
9595
_, logsize = nvrtc.nvrtcGetProgramLogSize(handle)
9696
log = b" " * logsize
9797
_ = nvrtc.nvrtcGetProgramLog(handle, log)
98-
err += f", compilation log:\n\n{log.decode()}"
98+
err += f", compilation log:\n\n{log.decode('utf-8', errors='backslashreplace')}"
9999
raise NVRTCError(err)
100100
else:
101101
raise RuntimeError(f"Unknown error type: {error}")
@@ -203,6 +203,7 @@ def is_nested_sequence(obj):
203203
return is_sequence(obj) and any(is_sequence(elem) for elem in obj)
204204

205205

206+
@functools.lru_cache
206207
def get_binding_version():
207208
try:
208209
major_minor = importlib.metadata.version("cuda-bindings").split(".")[:2]

cuda_core/docs/source/release/0.2.0-notes.rst

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
``cuda.core`` 0.2.0 Release Notes
44
=================================
55

6-
Released on <TODO>, 2025
6+
Released on March 17, 2025
77

88
Highlights
99
----------
@@ -27,10 +27,27 @@ New features
2727
- Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`.
2828
- A C++ helper function ``get_cuda_native_handle()`` is provided in the new ``include/utility.cuh`` header to retrive the underlying CUDA C objects (ex: ``CUstream``) from a Python object returned by the ``.handle`` attribute (ex: :attr:`Stream.handle`).
2929
- For objects such as :class:`Program` and :class:`Linker` that could dispatch to different backends, a new ``.backend`` attribute is provided to query this information.
30-
- Support CUDA event timing.
31-
- An :class:`~_event.Event` may now be created without recording it to a :class:`~_stream.Stream` using the :meth:`Device.create_event` method.
30+
- Support CUDA :class:`Event` timing. (#481, #498, #508)
31+
- An :class:`Event` may now be created without recording it to a :class:`~_stream.Stream` using the :meth:`Device.create_event` method.
32+
- :class:`Program` now supports the additional ``PTX`` code type. (#317)
33+
- :meth:`Linker.link` exceptions now include the original error log. (#423)
34+
- In a systematic sweep through the cuda.core implementations, many exceptions messages were made more consistent and informative. (#458)
3235

33-
Limitations
34-
-----------
35-
36-
- <TODO>
36+
New examples
37+
------------
38+
- ``jit_lto_fractal.py`` — Demonstrates just-in-time link-time optimization for fractal generation. (:class:`Device`, :class:`LaunchConfig`, :class:`Linker`, :class:`LinkerOptions`, :class:`Program`, :class:`ProgramOptions`) (#475)
39+
- ``simple_multi_gpu_example.py`` — Example of using multiple GPUs. (:class:`Device`, :class:`Program`, :class:`LaunchConfig`) (#304)
40+
- ``show_device_properties.py`` — Displays detailed device properties. (:class:`Device`) (#474)
41+
42+
Minor fixes and enhancements
43+
----------------------------
44+
- A dangling pointer problem in ``_linker.py`` was fixed. (#516)
45+
- Add ``@functools.lru_cache`` decorator for :func:`get_binding_version`. (#512)
46+
- Selected ``.decode()`` were changed to ``.decode("utf-8", errors="backslashreplace")`` to ensure that decoding error messages does not abort the process. (#510)
47+
- The performance of :meth:`Device.compute_capability` was improved. (#459)
48+
- The :class:`Program` constructor now issues a warning when falling back to :func:`cuLink`. (#315)
49+
- To avoid deprecation warnings, the cuda.bindings imports in the cuda.core implementations were cleaned up. (#404)
50+
51+
Test fixes
52+
----------
53+
- Clean up device initialization in some tests. (#507)

0 commit comments

Comments
 (0)