Skip to content

Commit bd1874c

Browse files
committed
fix(core): release driver-backend buffers in Linker.close()
Linker.close() reset only the _culink_handle, leaving the retained option-key/value vectors and the log-buffer bytearrays alive until Python GC/tp_dealloc. Those buffers exist for cuLinkDestroy's sake, but cuLinkDestroy has already run at this point via the shared_ptr deleter, so they can be released immediately. Cache decoded logs into _info_log/_error_log before releasing the raw buffers so get_error_log() / get_info_log() remain callable after close(), including on the failed-link path where link() never caches them itself. Swap the option vectors with empty locals to actually free the backing allocation (std::vector::clear only sets size to 0 and keeps capacity). Adds two driver-backend regression tests: one that links successfully, closes + drops, then performs another full compile + link cycle (prior heap-corruption bugs only surfaced in the next CUDA op after teardown); another that triggers a link failure, closes, and checks get_error_log() still returns the captured diagnostic.
1 parent 1572452 commit bd1874c

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

cuda_core/cuda/core/_linker.pyx

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,31 @@ cdef class Linker:
144144

145145
def close(self):
146146
"""Destroy this linker."""
147+
cdef vector[cydriver.CUjit_option] empty_keys
148+
cdef vector[void*] empty_values
147149
if self._use_nvjitlink:
148150
self._nvjitlink_handle.reset()
149151
else:
152+
# link() caches decoded logs into _info_log/_error_log on the
153+
# success path only. A failed link leaves them as None with
154+
# _drv_log_bufs as the only source, so cache them here before
155+
# the raw buffers are released.
156+
if self._drv_log_bufs is not None:
157+
if self._info_log is None:
158+
self._info_log = self.get_info_log()
159+
if self._error_log is None:
160+
self._error_log = self.get_error_log()
161+
# .reset() drops the last shared_ptr to the CUlinkState and runs
162+
# cuLinkDestroy synchronously via the custom deleter. The driver
163+
# is no longer looking at our option arrays or log buffers, so
164+
# release the host-side retainers now rather than waiting for
165+
# tp_dealloc. Swap with empty vectors to actually free the
166+
# backing allocation; vector.clear() only sets size to 0 and
167+
# retains capacity.
150168
self._culink_handle.reset()
169+
self._drv_jit_keys.swap(empty_keys)
170+
self._drv_jit_values.swap(empty_values)
171+
self._drv_log_bufs = None
151172

152173
@property
153174
def handle(self) -> LinkerHandleT:

cuda_core/tests/test_linker.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from cuda.core import Device, Linker, LinkerOptions, Program, ProgramOptions, _linker
88
from cuda.core._module import ObjectCode
9+
from cuda.core._program import _can_load_generated_ptx
910
from cuda.core._utils.cuda_utils import CUDAError
1011
from cuda.core._utils.version import driver_version
1112

@@ -252,6 +253,70 @@ def test_linker_handle(compile_ptx_functions):
252253
assert int(handle) != 0
253254

254255

256+
def test_driver_linker_lifetime_no_heap_corruption(monkeypatch, compile_ptx_functions):
257+
"""Driver-backend teardown must not leave cuLinkCreate option arrays or log buffers dangling.
258+
259+
Two prior bugs corrupted the heap during driver-linker teardown: the log
260+
buffer bytearrays were cleared before cuLinkDestroy ran, and the
261+
optionValues array was a stack-local vector destroyed when Linker_init
262+
returned. Both manifested in the NEXT CUDA operation after the Linker
263+
was destroyed, not at destruction itself. This test forces the driver
264+
backend, links, closes + drops the Linker, and then performs a full
265+
compile + link cycle that would previously segfault.
266+
"""
267+
if not _can_load_generated_ptx():
268+
pytest.skip("PTX version too new for current driver")
269+
monkeypatch.setattr(_linker, "_probe_nvjitlink", lambda: None)
270+
271+
linker = Linker(*compile_ptx_functions, options=LinkerOptions(arch=ARCH))
272+
assert linker.backend == "driver"
273+
linker.link("cubin")
274+
linker.close()
275+
del linker
276+
277+
obj_a = Program(kernel_a, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
278+
obj_b = Program(device_function_b, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
279+
obj_c = Program(device_function_c, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
280+
linker2 = Linker(obj_a, obj_b, obj_c, options=LinkerOptions(arch=ARCH))
281+
assert linker2.backend == "driver"
282+
linker2.link("cubin")
283+
linker2.close()
284+
del linker2
285+
286+
287+
def test_driver_linker_get_error_log_after_close_on_failed_link(init_cuda, monkeypatch):
288+
"""close() must preserve get_error_log() output when link() failed.
289+
290+
link() only caches _info_log/_error_log on the success path, so after
291+
a failed cuLinkComplete the driver log buffers are the only source of
292+
the error diagnostic. close() releases those buffers, and callers
293+
should still be able to read the captured error log afterward.
294+
"""
295+
if not _can_load_generated_ptx():
296+
pytest.skip("PTX version too new for current driver")
297+
monkeypatch.setattr(_linker, "_probe_nvjitlink", lambda: None)
298+
299+
bad_kernel = """
300+
extern __device__ int Z();
301+
__global__ void A() { int r = Z(); }
302+
"""
303+
bad_obj = Program(bad_kernel, "c++", ProgramOptions(relocatable_device_code=True)).compile("ptx")
304+
linker = Linker(bad_obj, options=LinkerOptions(arch=ARCH))
305+
assert linker.backend == "driver"
306+
with pytest.raises(CUDAError):
307+
linker.link("cubin")
308+
309+
pre_close_err = linker.get_error_log()
310+
assert isinstance(pre_close_err, str)
311+
assert pre_close_err # failed link must have produced a diagnostic
312+
313+
linker.close()
314+
# close() releases the raw driver buffers; the cached decoded logs must
315+
# still be readable.
316+
assert linker.get_error_log() == pre_close_err
317+
assert isinstance(linker.get_info_log(), str)
318+
319+
255320
@pytest.mark.skipif(is_culink_backend, reason="nvjitlink options only tested with nvjitlink backend")
256321
def test_linker_options_nvjitlink_options_as_str():
257322
"""_prepare_nvjitlink_options(as_bytes=False) returns plain strings."""

0 commit comments

Comments
 (0)