Skip to content

Commit 23a15de

Browse files
committed
fix: leaking amd card fd
Signed-off-by: thxCode <thxcode0824@gmail.com>
1 parent 5dbb0d4 commit 23a15de

3 files changed

Lines changed: 25 additions & 25 deletions

File tree

gpustack_runtime/detector/amd.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,11 @@ def detect(self) -> Devices | None:
142142
if (
143143
not dev_cores or not dev_asic_family_id
144144
) and dev_card_id is not None:
145-
with contextlib.suppress(pyamdgpu.AMDGPUError):
146-
_, _, dev_gpudev = pyamdgpu.amdgpu_device_initialize(
147-
dev_card_id,
148-
)
145+
with (
146+
contextlib.suppress(pyamdgpu.AMDGPUError),
147+
pyamdgpu.amdgpu_device(dev_card_id) as dev_gpudev,
148+
):
149149
dev_gpudev_info = pyamdgpu.amdgpu_query_gpu_info(dev_gpudev)
150-
pyamdgpu.amdgpu_device_deinitialize(dev_gpudev)
151150
if not dev_cores:
152151
dev_cores = dev_gpudev_info.cu_active_number
153152
if not dev_asic_family_id:

gpustack_runtime/detector/hygon.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,11 @@ def detect(self) -> Devices | None:
132132

133133
dev_cores = dev_hsa_agent.compute_units
134134
if not dev_cores and dev_card_id is not None:
135-
with contextlib.suppress(pyamdgpu.AMDGPUError):
136-
_, _, dev_gpudev = pyamdgpu.amdgpu_device_initialize(
137-
dev_card_id,
138-
)
135+
with (
136+
contextlib.suppress(pyamdgpu.AMDGPUError),
137+
pyamdgpu.amdgpu_device(dev_card_id) as dev_gpudev,
138+
):
139139
dev_gpudev_info = pyamdgpu.amdgpu_query_gpu_info(dev_gpudev)
140-
pyamdgpu.amdgpu_device_deinitialize(dev_gpudev)
141140
dev_cores = dev_gpudev_info.cu_active_number
142141

143142
dev_cores_util = pyrocmsmi.rsmi_dev_busy_percent_get(dev_idx)

gpustack_runtime/detector/pyamdgpu/__init__.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import os
88
import sys
99
import threading
10+
from contextlib import contextmanager
1011
from ctypes import *
1112
from typing import ClassVar
1213

@@ -247,31 +248,32 @@ def _LoadAMDGPULibrary():
247248

248249

249250
## C function wrappers ##
250-
def amdgpu_device_initialize(card=1):
251+
@contextmanager
252+
def amdgpu_device(card=1):
251253
_LoadAMDGPULibrary()
252254

253255
try:
254256
fd = os.open(f"/dev/dri/card{card}", os.O_RDONLY)
255257
except Exception:
256258
raise AMDGPUError(AMDGPU_ERROR_CARD_NOTFOUND)
257259

258-
c_major = c_uint32()
259-
c_minor = c_uint32()
260260
device = c_amdgpu_device_t()
261-
fn = _amdgpuGetFunctionPointer("amdgpu_device_initialize")
262-
# If receive an error print here, try
263-
# sudo vim /etc/default/grub
264-
# and add "amdgpu.dc=0" to GRUB_CMDLINE_LINUX_DEFAULT
265-
# then run "sudo update-grub" and reboot.
266-
ret = fn(fd, byref(c_major), byref(c_minor), byref(device))
267-
_amdgpuCheckReturn(ret)
268-
return c_major.value, c_minor.value, device
269261

262+
try:
263+
c_major = c_uint32()
264+
c_minor = c_uint32()
265+
fn = _amdgpuGetFunctionPointer("amdgpu_device_initialize")
266+
ret = fn(fd, byref(c_major), byref(c_minor), byref(device))
267+
_amdgpuCheckReturn(ret)
268+
269+
yield device
270270

271-
def amdgpu_device_deinitialize(device):
272-
fn = _amdgpuGetFunctionPointer("amdgpu_device_deinitialize")
273-
ret = fn(device)
274-
_amdgpuCheckReturn(ret)
271+
finally:
272+
try:
273+
fn = _amdgpuGetFunctionPointer("amdgpu_device_deinitialize")
274+
fn(device)
275+
finally:
276+
os.close(fd)
275277

276278

277279
def amdgpu_query_gpu_info(device):

0 commit comments

Comments
 (0)