Skip to content

Commit 7b00098

Browse files
authored
cuda.core.system: Add ProcessInfo APIs (#1917)
* cuda.core.system: Add ProcessInfo APIs * Add API docs * Fix test
1 parent c747f7b commit 7b00098

4 files changed

Lines changed: 96 additions & 0 deletions

File tree

cuda_core/cuda/core/system/_device.pyx

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ include "_mig.pxi"
3636
include "_nvlink.pxi"
3737
include "_pci_info.pxi"
3838
include "_performance.pxi"
39+
include "_process.pxi"
3940
include "_repair_status.pxi"
4041
include "_temperature.pxi"
4142
include "_utilization.pxi"
@@ -765,6 +766,33 @@ cdef class Device:
765766
"""
766767
return [Pstates(x) for x in nvml.device_get_supported_performance_states(self._handle)]
767768
769+
##########################################################################
770+
# PROCESS
771+
# See external class definitions in _process.pxi
772+
773+
@property
774+
def compute_running_processes(self) -> list[ProcessInfo]:
775+
"""
776+
Get information about processes with a compute context on a device
777+
778+
For Fermi™ or newer fully supported devices.
779+
780+
This function returns information only about compute running processes
781+
(e.g. CUDA application which have active context). Any graphics
782+
applications (e.g. using OpenGL, DirectX) won't be listed by this
783+
function.
784+
785+
Keep in mind that information returned by this call is dynamic and the
786+
number of elements might change in time.
787+
788+
In MIG mode, if device handle is provided, the API returns aggregate
789+
information, only if the caller has appropriate privileges. Per-instance
790+
information can be queried by using specific MIG device handles.
791+
Querying per-instance information using MIG device handles is not
792+
supported if the device is in vGPU Host virtualization mode.
793+
"""
794+
return [ProcessInfo(self, proc) for proc in nvml.device_get_compute_running_processes_v3(self._handle)]
795+
768796
##########################################################################
769797
# REPAIR STATUS
770798
# See external class definitions in _repair_status.pxi
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
6+
class ProcessInfo:
7+
"""
8+
Information about running compute processes on the GPU.
9+
"""
10+
def __init__(self, device: "Device", process_info: nvml.ProcessInfo):
11+
self._device = device
12+
self._process_info = process_info
13+
14+
@property
15+
def pid(self) -> int:
16+
"""
17+
The PID of the process.
18+
"""
19+
return self._process_info.pid
20+
21+
@property
22+
def used_gpu_memory(self) -> int:
23+
"""
24+
The amount of GPU memory (in bytes) used by the process.
25+
"""
26+
return self._process_info.used_gpu_memory
27+
28+
@property
29+
def gpu_instance_id(self) -> int:
30+
"""
31+
The GPU instance ID for MIG devices.
32+
33+
Only valid for processes running on MIG devices.
34+
"""
35+
if not self._device.mig.is_mig_device:
36+
raise nvml.NotSupportedError(nvml.Return.ERROR_NOT_SUPPORTED)
37+
return self._process_info.gpu_instance_id
38+
39+
@property
40+
def compute_instance_id(self) -> int:
41+
"""
42+
The Compute instance ID for MIG devices.
43+
44+
Only valid for processes running on MIG devices.
45+
"""
46+
if not self._device.mig.is_mig_device:
47+
raise nvml.NotSupportedError(nvml.Return.ERROR_NOT_SUPPORTED)
48+
return self._process_info.compute_instance_id

cuda_core/docs/source/api_private.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ NVML
7979
system._device.MigInfo
8080
system._device.NvlinkInfo
8181
system._device.PciInfo
82+
system._device.ProcessInfo
8283
system._device.RepairStatus
8384
system._device.Temperature
8485
system._device.ThermalSensor

cuda_core/tests/system/test_system_device.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,25 @@ def test_pstates():
731731
assert isinstance(utilization.dec_threshold, int)
732732

733733

734+
def test_compute_running_processes():
735+
for device in system.Device.get_all_devices():
736+
with unsupported_before(device, "FERMI"):
737+
processes = device.compute_running_processes
738+
assert isinstance(processes, list)
739+
for proc in processes:
740+
assert isinstance(proc, _device.ProcessInfo)
741+
assert isinstance(proc.pid, int)
742+
assert isinstance(proc.used_gpu_memory, int)
743+
if device.mig.is_mig_device:
744+
assert isinstance(proc.gpu_instance_id, int)
745+
assert isinstance(proc.compute_instance_id, int)
746+
else:
747+
with pytest.raises(nvml.NotSupportedError):
748+
proc.gpu_instance_id # noqa: B018
749+
with pytest.raises(nvml.NotSupportedError):
750+
proc.compute_instance_id # noqa: B018
751+
752+
734753
def test_nvlink():
735754
for device in system.Device.get_all_devices():
736755
max_links = _device.NvlinkInfo.max_links

0 commit comments

Comments
 (0)