Skip to content

Commit 2facdb6

Browse files
authored
Add new NVML APIs in CTK 13.1 (NVIDIA#1353)
* Add new NVML APIs in CTK 13.1 * Small fixes
1 parent 514daa6 commit 2facdb6

7 files changed

Lines changed: 1326 additions & 245 deletions

File tree

cuda_bindings/cuda/bindings/_internal/_nvml.pxd

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44
#
5-
# This code was automatically generated across versions from 12.9.1 to 13.0.1. Do not modify it directly.
5+
# This code was automatically generated across versions from 12.9.1 to 13.1.0. Do not modify it directly.
66

77
from ..cy_nvml cimport *
88

@@ -358,3 +358,6 @@ cdef nvmlReturn_t _nvmlDeviceGetNvLinkInfo(nvmlDevice_t device, nvmlNvLinkInfo_t
358358
cdef nvmlReturn_t _nvmlDeviceReadWritePRM_v1(nvmlDevice_t device, nvmlPRMTLV_v1_t* buffer) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil
359359
cdef nvmlReturn_t _nvmlDeviceGetGpuInstanceProfileInfoByIdV(nvmlDevice_t device, unsigned int profileId, nvmlGpuInstanceProfileInfo_v2_t* info) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil
360360
cdef nvmlReturn_t _nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(nvmlDevice_t device, nvmlEccSramUniqueUncorrectedErrorCounts_t* errorCounts) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil
361+
cdef nvmlReturn_t _nvmlDeviceGetUnrepairableMemoryFlag_v1(nvmlDevice_t device, nvmlUnrepairableMemoryStatus_v1_t* unrepairableMemoryStatus) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil
362+
cdef nvmlReturn_t _nvmlDeviceReadPRMCounters_v1(nvmlDevice_t device, nvmlPRMCounterList_v1_t* counterList) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil
363+
cdef nvmlReturn_t _nvmlDeviceSetRusdSettings_v1(nvmlDevice_t device, nvmlRusdSettings_v1_t* settings) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil

cuda_bindings/cuda/bindings/_internal/_nvml_linux.pyx

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44
#
5-
# This code was automatically generated across versions from 12.9.1 to 13.0.1. Do not modify it directly.
5+
# This code was automatically generated across versions from 12.9.1 to 13.1.0. Do not modify it directly.
66

77
from libc.stdint cimport intptr_t, uintptr_t
88

@@ -407,6 +407,9 @@ cdef void* __nvmlDeviceGetNvLinkInfo = NULL
407407
cdef void* __nvmlDeviceReadWritePRM_v1 = NULL
408408
cdef void* __nvmlDeviceGetGpuInstanceProfileInfoByIdV = NULL
409409
cdef void* __nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts = NULL
410+
cdef void* __nvmlDeviceGetUnrepairableMemoryFlag_v1 = NULL
411+
cdef void* __nvmlDeviceReadPRMCounters_v1 = NULL
412+
cdef void* __nvmlDeviceSetRusdSettings_v1 = NULL
410413

411414

412415
cdef void* load_library() except* with gil:
@@ -2852,6 +2855,27 @@ cdef int _init_nvml() except -1 nogil:
28522855
handle = load_library()
28532856
__nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts = dlsym(handle, 'nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts')
28542857

2858+
global __nvmlDeviceGetUnrepairableMemoryFlag_v1
2859+
__nvmlDeviceGetUnrepairableMemoryFlag_v1 = dlsym(RTLD_DEFAULT, 'nvmlDeviceGetUnrepairableMemoryFlag_v1')
2860+
if __nvmlDeviceGetUnrepairableMemoryFlag_v1 == NULL:
2861+
if handle == NULL:
2862+
handle = load_library()
2863+
__nvmlDeviceGetUnrepairableMemoryFlag_v1 = dlsym(handle, 'nvmlDeviceGetUnrepairableMemoryFlag_v1')
2864+
2865+
global __nvmlDeviceReadPRMCounters_v1
2866+
__nvmlDeviceReadPRMCounters_v1 = dlsym(RTLD_DEFAULT, 'nvmlDeviceReadPRMCounters_v1')
2867+
if __nvmlDeviceReadPRMCounters_v1 == NULL:
2868+
if handle == NULL:
2869+
handle = load_library()
2870+
__nvmlDeviceReadPRMCounters_v1 = dlsym(handle, 'nvmlDeviceReadPRMCounters_v1')
2871+
2872+
global __nvmlDeviceSetRusdSettings_v1
2873+
__nvmlDeviceSetRusdSettings_v1 = dlsym(RTLD_DEFAULT, 'nvmlDeviceSetRusdSettings_v1')
2874+
if __nvmlDeviceSetRusdSettings_v1 == NULL:
2875+
if handle == NULL:
2876+
handle = load_library()
2877+
__nvmlDeviceSetRusdSettings_v1 = dlsym(handle, 'nvmlDeviceSetRusdSettings_v1')
2878+
28552879
__py_nvml_init = True
28562880
return 0
28572881

@@ -3915,6 +3939,15 @@ cpdef dict _inspect_function_pointers():
39153939
global __nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts
39163940
data["__nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts"] = <intptr_t>__nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts
39173941

3942+
global __nvmlDeviceGetUnrepairableMemoryFlag_v1
3943+
data["__nvmlDeviceGetUnrepairableMemoryFlag_v1"] = <intptr_t>__nvmlDeviceGetUnrepairableMemoryFlag_v1
3944+
3945+
global __nvmlDeviceReadPRMCounters_v1
3946+
data["__nvmlDeviceReadPRMCounters_v1"] = <intptr_t>__nvmlDeviceReadPRMCounters_v1
3947+
3948+
global __nvmlDeviceSetRusdSettings_v1
3949+
data["__nvmlDeviceSetRusdSettings_v1"] = <intptr_t>__nvmlDeviceSetRusdSettings_v1
3950+
39183951
func_ptrs = data
39193952
return data
39203953

@@ -7398,3 +7431,33 @@ cdef nvmlReturn_t _nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(nvmlDevice_t
73987431
raise FunctionNotFoundError("function nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts is not found")
73997432
return (<nvmlReturn_t (*)(nvmlDevice_t, nvmlEccSramUniqueUncorrectedErrorCounts_t*) noexcept nogil>__nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts)(
74007433
device, errorCounts)
7434+
7435+
7436+
cdef nvmlReturn_t _nvmlDeviceGetUnrepairableMemoryFlag_v1(nvmlDevice_t device, nvmlUnrepairableMemoryStatus_v1_t* unrepairableMemoryStatus) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil:
7437+
global __nvmlDeviceGetUnrepairableMemoryFlag_v1
7438+
_check_or_init_nvml()
7439+
if __nvmlDeviceGetUnrepairableMemoryFlag_v1 == NULL:
7440+
with gil:
7441+
raise FunctionNotFoundError("function nvmlDeviceGetUnrepairableMemoryFlag_v1 is not found")
7442+
return (<nvmlReturn_t (*)(nvmlDevice_t, nvmlUnrepairableMemoryStatus_v1_t*) noexcept nogil>__nvmlDeviceGetUnrepairableMemoryFlag_v1)(
7443+
device, unrepairableMemoryStatus)
7444+
7445+
7446+
cdef nvmlReturn_t _nvmlDeviceReadPRMCounters_v1(nvmlDevice_t device, nvmlPRMCounterList_v1_t* counterList) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil:
7447+
global __nvmlDeviceReadPRMCounters_v1
7448+
_check_or_init_nvml()
7449+
if __nvmlDeviceReadPRMCounters_v1 == NULL:
7450+
with gil:
7451+
raise FunctionNotFoundError("function nvmlDeviceReadPRMCounters_v1 is not found")
7452+
return (<nvmlReturn_t (*)(nvmlDevice_t, nvmlPRMCounterList_v1_t*) noexcept nogil>__nvmlDeviceReadPRMCounters_v1)(
7453+
device, counterList)
7454+
7455+
7456+
cdef nvmlReturn_t _nvmlDeviceSetRusdSettings_v1(nvmlDevice_t device, nvmlRusdSettings_v1_t* settings) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil:
7457+
global __nvmlDeviceSetRusdSettings_v1
7458+
_check_or_init_nvml()
7459+
if __nvmlDeviceSetRusdSettings_v1 == NULL:
7460+
with gil:
7461+
raise FunctionNotFoundError("function nvmlDeviceSetRusdSettings_v1 is not found")
7462+
return (<nvmlReturn_t (*)(nvmlDevice_t, nvmlRusdSettings_v1_t*) noexcept nogil>__nvmlDeviceSetRusdSettings_v1)(
7463+
device, settings)

cuda_bindings/cuda/bindings/_internal/_nvml_windows.pyx

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44
#
5-
# This code was automatically generated across versions from 12.9.1 to 13.0.1. Do not modify it directly.
5+
# This code was automatically generated across versions from 12.9.1 to 13.1.0. Do not modify it directly.
66

77
from libc.stdint cimport intptr_t
88

@@ -424,6 +424,9 @@ cdef void* __nvmlDeviceGetNvLinkInfo = NULL
424424
cdef void* __nvmlDeviceReadWritePRM_v1 = NULL
425425
cdef void* __nvmlDeviceGetGpuInstanceProfileInfoByIdV = NULL
426426
cdef void* __nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts = NULL
427+
cdef void* __nvmlDeviceGetUnrepairableMemoryFlag_v1 = NULL
428+
cdef void* __nvmlDeviceReadPRMCounters_v1 = NULL
429+
cdef void* __nvmlDeviceSetRusdSettings_v1 = NULL
427430

428431

429432
cdef uintptr_t load_library() except* with gil:
@@ -1506,6 +1509,15 @@ cdef int _init_nvml() except -1 nogil:
15061509
global __nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts
15071510
__nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts = GetProcAddress(handle, 'nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts')
15081511

1512+
global __nvmlDeviceGetUnrepairableMemoryFlag_v1
1513+
__nvmlDeviceGetUnrepairableMemoryFlag_v1 = GetProcAddress(handle, 'nvmlDeviceGetUnrepairableMemoryFlag_v1')
1514+
1515+
global __nvmlDeviceReadPRMCounters_v1
1516+
__nvmlDeviceReadPRMCounters_v1 = GetProcAddress(handle, 'nvmlDeviceReadPRMCounters_v1')
1517+
1518+
global __nvmlDeviceSetRusdSettings_v1
1519+
__nvmlDeviceSetRusdSettings_v1 = GetProcAddress(handle, 'nvmlDeviceSetRusdSettings_v1')
1520+
15091521
__py_nvml_init = True
15101522
return 0
15111523

@@ -2569,6 +2581,15 @@ cpdef dict _inspect_function_pointers():
25692581
global __nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts
25702582
data["__nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts"] = <intptr_t>__nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts
25712583

2584+
global __nvmlDeviceGetUnrepairableMemoryFlag_v1
2585+
data["__nvmlDeviceGetUnrepairableMemoryFlag_v1"] = <intptr_t>__nvmlDeviceGetUnrepairableMemoryFlag_v1
2586+
2587+
global __nvmlDeviceReadPRMCounters_v1
2588+
data["__nvmlDeviceReadPRMCounters_v1"] = <intptr_t>__nvmlDeviceReadPRMCounters_v1
2589+
2590+
global __nvmlDeviceSetRusdSettings_v1
2591+
data["__nvmlDeviceSetRusdSettings_v1"] = <intptr_t>__nvmlDeviceSetRusdSettings_v1
2592+
25722593
func_ptrs = data
25732594
return data
25742595

@@ -6052,3 +6073,33 @@ cdef nvmlReturn_t _nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(nvmlDevice_t
60526073
raise FunctionNotFoundError("function nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts is not found")
60536074
return (<nvmlReturn_t (*)(nvmlDevice_t, nvmlEccSramUniqueUncorrectedErrorCounts_t*) noexcept nogil>__nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts)(
60546075
device, errorCounts)
6076+
6077+
6078+
cdef nvmlReturn_t _nvmlDeviceGetUnrepairableMemoryFlag_v1(nvmlDevice_t device, nvmlUnrepairableMemoryStatus_v1_t* unrepairableMemoryStatus) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil:
6079+
global __nvmlDeviceGetUnrepairableMemoryFlag_v1
6080+
_check_or_init_nvml()
6081+
if __nvmlDeviceGetUnrepairableMemoryFlag_v1 == NULL:
6082+
with gil:
6083+
raise FunctionNotFoundError("function nvmlDeviceGetUnrepairableMemoryFlag_v1 is not found")
6084+
return (<nvmlReturn_t (*)(nvmlDevice_t, nvmlUnrepairableMemoryStatus_v1_t*) noexcept nogil>__nvmlDeviceGetUnrepairableMemoryFlag_v1)(
6085+
device, unrepairableMemoryStatus)
6086+
6087+
6088+
cdef nvmlReturn_t _nvmlDeviceReadPRMCounters_v1(nvmlDevice_t device, nvmlPRMCounterList_v1_t* counterList) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil:
6089+
global __nvmlDeviceReadPRMCounters_v1
6090+
_check_or_init_nvml()
6091+
if __nvmlDeviceReadPRMCounters_v1 == NULL:
6092+
with gil:
6093+
raise FunctionNotFoundError("function nvmlDeviceReadPRMCounters_v1 is not found")
6094+
return (<nvmlReturn_t (*)(nvmlDevice_t, nvmlPRMCounterList_v1_t*) noexcept nogil>__nvmlDeviceReadPRMCounters_v1)(
6095+
device, counterList)
6096+
6097+
6098+
cdef nvmlReturn_t _nvmlDeviceSetRusdSettings_v1(nvmlDevice_t device, nvmlRusdSettings_v1_t* settings) except?_NVMLRETURN_T_INTERNAL_LOADING_ERROR nogil:
6099+
global __nvmlDeviceSetRusdSettings_v1
6100+
_check_or_init_nvml()
6101+
if __nvmlDeviceSetRusdSettings_v1 == NULL:
6102+
with gil:
6103+
raise FunctionNotFoundError("function nvmlDeviceSetRusdSettings_v1 is not found")
6104+
return (<nvmlReturn_t (*)(nvmlDevice_t, nvmlRusdSettings_v1_t*) noexcept nogil>__nvmlDeviceSetRusdSettings_v1)(
6105+
device, settings)

cuda_bindings/cuda/bindings/_nvml.pxd

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44
#
5-
# This code was automatically generated across versions from 12.9.1 to 13.0.1. Do not modify it directly.
5+
# This code was automatically generated across versions from 12.9.1 to 13.1.0. Do not modify it directly.
66

77
from libc.stdint cimport intptr_t
88

@@ -55,6 +55,7 @@ ctypedef nvmlGpmMetric_t GpmMetric
5555
ctypedef nvmlWorkloadPowerProfileInfo_v1_t WorkloadPowerProfileInfo_v1
5656
ctypedef nvmlWorkloadPowerProfileCurrentProfiles_v1_t WorkloadPowerProfileCurrentProfiles_v1
5757
ctypedef nvmlWorkloadPowerProfileRequestedProfiles_v1_t WorkloadPowerProfileRequestedProfiles_v1
58+
ctypedef nvmlWorkloadPowerProfileUpdateProfiles_v1_t WorkloadPowerProfileUpdateProfiles_v1
5859
ctypedef nvmlPRMTLV_v1_t PRMTLV_v1
5960
ctypedef nvmlVgpuSchedulerSetState_t VgpuSchedulerSetState
6061
ctypedef nvmlGpmMetricsGet_t GpmMetricsGet
@@ -123,6 +124,8 @@ ctypedef nvmlVgpuPgpuCompatibilityLimitCode_t _VgpuPgpuCompatibilityLimitCode
123124
ctypedef nvmlGpmMetricId_t _GpmMetricId
124125
ctypedef nvmlPowerProfileType_t _PowerProfileType
125126
ctypedef nvmlDeviceAddressingModeType_t _DeviceAddressingModeType
127+
ctypedef nvmlPRMCounterId_t _PRMCounterId
128+
ctypedef nvmlPowerProfileOperation_t _PowerProfileOperation
126129

127130

128131
###############################################################################
@@ -445,3 +448,6 @@ cpdef object device_get_pdi(intptr_t device)
445448
cpdef object device_get_nvlink_info(intptr_t device)
446449
cpdef device_read_write_prm_v1(intptr_t device, intptr_t buffer)
447450
cpdef object device_get_gpu_instance_profile_info_by_id_v(intptr_t device, unsigned int profile_id)
451+
cpdef object device_get_unrepairable_memory_flag_v1(intptr_t device)
452+
cpdef device_read_prm_counters_v1(intptr_t device, intptr_t counter_list)
453+
cpdef device_set_rusd_settings_v1(intptr_t device, intptr_t settings)

0 commit comments

Comments
 (0)