Skip to content

Commit 2ff8d39

Browse files
committed
fix(core): release driver-backend buffers in Linker.close()
Linker.close() reset only the _culink_handle, leaving the retained option-key/value vectors and the log-buffer bytearrays alive until Python GC/tp_dealloc. Those buffers exist for cuLinkDestroy's sake, but cuLinkDestroy has already run at this point via the shared_ptr deleter, so they can be released immediately. Cache decoded logs into _info_log/_error_log before releasing the raw buffers so get_error_log() / get_info_log() remain callable after close(), including on the failed-link path where link() never caches them itself. Swap the option vectors with empty locals to actually free the backing allocation (std::vector::clear only sets size to 0 and keeps capacity). Adds two driver-backend regression tests: one that links successfully, closes + drops, then performs another full compile + link cycle (prior heap-corruption bugs only surfaced in the next CUDA op after teardown); another that triggers a link failure, closes, and checks get_error_log() still returns the captured diagnostic.
1 parent 3d4318e commit 2ff8d39

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed

cuda_core/cuda/core/_linker.pyx

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,31 @@ cdef class Linker:
144144

145145
def close(self):
146146
"""Destroy this linker."""
147+
cdef vector[cydriver.CUjit_option] empty_keys
148+
cdef vector[void*] empty_values
147149
if self._use_nvjitlink:
148150
self._nvjitlink_handle.reset()
149151
else:
152+
# link() caches decoded logs into _info_log/_error_log on the
153+
# success path only. A failed link leaves them as None with
154+
# _drv_log_bufs as the only source, so cache them here before
155+
# the raw buffers are released.
156+
if self._drv_log_bufs is not None:
157+
if self._info_log is None:
158+
self._info_log = self.get_info_log()
159+
if self._error_log is None:
160+
self._error_log = self.get_error_log()
161+
# .reset() drops the last shared_ptr to the CUlinkState and runs
162+
# cuLinkDestroy synchronously via the custom deleter. The driver
163+
# is no longer looking at our option arrays or log buffers, so
164+
# release the host-side retainers now rather than waiting for
165+
# tp_dealloc. Swap with empty vectors to actually free the
166+
# backing allocation; vector.clear() only sets size to 0 and
167+
# retains capacity.
150168
self._culink_handle.reset()
169+
self._drv_jit_keys.swap(empty_keys)
170+
self._drv_jit_values.swap(empty_values)
171+
self._drv_log_bufs = None
151172

152173
@property
153174
def handle(self) -> LinkerHandleT:

cuda_core/tests/test_linker.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,66 @@ def test_linker_handle(compile_ptx_functions):
252252
assert int(handle) != 0
253253

254254

255+
def test_driver_linker_lifetime_no_heap_corruption(monkeypatch, compile_ptx_functions):
256+
"""Driver-backend teardown must not leave cuLinkCreate option arrays or log buffers dangling.
257+
258+
Two prior bugs corrupted the heap during driver-linker teardown: the log
259+
buffer bytearrays were cleared before cuLinkDestroy ran, and the
260+
optionValues array was a stack-local vector destroyed when Linker_init
261+
returned. Both manifested in the NEXT CUDA operation after the Linker
262+
was destroyed, not at destruction itself. This test forces the driver
263+
backend, links, closes + drops the Linker, and then performs a full
264+
compile + link cycle that would previously segfault.
265+
"""
266+
monkeypatch.setattr(_linker, "_probe_nvjitlink", lambda: None)
267+
268+
linker = Linker(*compile_ptx_functions, options=LinkerOptions(arch=ARCH))
269+
assert linker.backend == "driver"
270+
linker.link("cubin")
271+
linker.close()
272+
del linker
273+
274+
obj_a = Program(kernel_a, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
275+
obj_b = Program(device_function_b, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
276+
obj_c = Program(device_function_c, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
277+
linker2 = Linker(obj_a, obj_b, obj_c, options=LinkerOptions(arch=ARCH))
278+
assert linker2.backend == "driver"
279+
linker2.link("cubin")
280+
linker2.close()
281+
del linker2
282+
283+
284+
def test_driver_linker_get_error_log_after_close_on_failed_link(init_cuda, monkeypatch):
285+
"""close() must preserve get_error_log() output when link() failed.
286+
287+
link() only caches _info_log/_error_log on the success path, so after
288+
a failed cuLinkComplete the driver log buffers are the only source of
289+
the error diagnostic. close() releases those buffers, and callers
290+
should still be able to read the captured error log afterward.
291+
"""
292+
monkeypatch.setattr(_linker, "_probe_nvjitlink", lambda: None)
293+
294+
bad_kernel = """
295+
extern __device__ int Z();
296+
__global__ void A() { int r = Z(); }
297+
"""
298+
bad_obj = Program(bad_kernel, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
299+
linker = Linker(bad_obj, options=LinkerOptions(arch=ARCH))
300+
assert linker.backend == "driver"
301+
with pytest.raises(CUDAError):
302+
linker.link("cubin")
303+
304+
pre_close_err = linker.get_error_log()
305+
assert isinstance(pre_close_err, str)
306+
assert pre_close_err # failed link must have produced a diagnostic
307+
308+
linker.close()
309+
# close() releases the raw driver buffers; the cached decoded logs must
310+
# still be readable.
311+
assert linker.get_error_log() == pre_close_err
312+
assert isinstance(linker.get_info_log(), str)
313+
314+
255315
@pytest.mark.skipif(is_culink_backend, reason="nvjitlink options only tested with nvjitlink backend")
256316
def test_linker_options_nvjitlink_options_as_str():
257317
"""_prepare_nvjitlink_options(as_bytes=False) returns plain strings."""

0 commit comments

Comments
 (0)