Skip to content

Commit 1fa65f1

Browse files
committed
feature: Add more APIs to L0 Sysman python binding
Related-To: NEO-18602 Added following APIs to python binding: 1. zesDevicePciGetProperties 2. zesDevicePciGetStats 3. zesDeviceEccAvailable 4. zesDeviceEccConfigurable 5. zesDeviceGetEccState 6. zesDeviceSetEccState 7. zesPowerGetLimitsExt 8. zesPowerSetLimitsExt 9. zesFrequencyGetAvailableClocks 10. zesFrequencyGetRange 11. zesFrequencySetRange 12. zesFrequencyGetThrottleTime Signed-off-by: Aviral Nigam <aviral.nigam@intel.com>
1 parent 377fcb6 commit 1fa65f1

2 files changed

Lines changed: 0 additions & 196 deletions

File tree

bindings/sysman/python/source/examples/pyzes_black_box_test.py

Lines changed: 0 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -274,23 +274,6 @@ def get_device_action_string(action):
274274
return action_map.get(action, f"UNKNOWN_DEVICE_ACTION_{action}")
275275

276276

277-
def get_ras_error_category_string(category):
278-
"""Convert RAS error category enum to string"""
279-
category_map = {
280-
pz.ZES_RAS_ERROR_CATEGORY_EXP_RESET: "ZES_RAS_ERROR_CATEGORY_EXP_RESET",
281-
pz.ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS: "ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS",
282-
pz.ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS: "ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS",
283-
pz.ZES_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS: "ZES_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS",
284-
pz.ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS: "ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS",
285-
pz.ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS: "ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS",
286-
pz.ZES_RAS_ERROR_CATEGORY_EXP_DISPLAY_ERRORS: "ZES_RAS_ERROR_CATEGORY_EXP_DISPLAY_ERRORS",
287-
pz.ZES_RAS_ERROR_CATEGORY_EXP_MEMORY_ERRORS: "ZES_RAS_ERROR_CATEGORY_EXP_MEMORY_ERRORS",
288-
pz.ZES_RAS_ERROR_CATEGORY_EXP_SCALE_ERRORS: "ZES_RAS_ERROR_CATEGORY_EXP_SCALE_ERRORS",
289-
pz.ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS: "ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS",
290-
}
291-
return category_map.get(category, f"UNKNOWN_RAS_CATEGORY_{category}")
292-
293-
294277
def is_root_user():
295278
"""Return whether the current user has root privileges on platforms that support it"""
296279
geteuid = getattr(os, "geteuid", None)
@@ -692,80 +675,6 @@ def test_ecc_module(device_handle, device_index):
692675
return True
693676

694677

695-
def test_ras_module(device_handle, device_index):
696-
"""Test RAS handle enumeration, state retrieval, and clear-state operations"""
697-
print(f"\n---- Device {device_index} RAS Test ----")
698-
699-
ras_count = c_uint32(0)
700-
rc = pz.zesDeviceEnumRasErrorSets(device_handle, byref(ras_count), None)
701-
if not check_rc(f"zesDeviceEnumRasErrorSets(device {device_index}, count)", rc):
702-
return False
703-
704-
if ras_count.value == 0:
705-
print_verbose("No RAS error sets found on this device")
706-
return True
707-
708-
print_verbose(f"Found {ras_count.value} RAS error set(s)")
709-
710-
RasArray = pz.zes_ras_handle_t * ras_count.value
711-
ras_handles = RasArray()
712-
713-
rc = pz.zesDeviceEnumRasErrorSets(device_handle, byref(ras_count), ras_handles)
714-
if not check_rc(f"zesDeviceEnumRasErrorSets(device {device_index}, handles)", rc):
715-
return False
716-
717-
for i in range(ras_count.value):
718-
print_verbose(f"\n RAS Handle {i}:")
719-
720-
state_count = c_uint32(0)
721-
rc = pz.zesRasGetStateExp(ras_handles[i], byref(state_count), None)
722-
if not check_rc(f"zesRasGetStateExp(handle {i}, count)", rc):
723-
continue
724-
725-
if state_count.value == 0:
726-
print_verbose(" No RAS categories reported for this handle")
727-
continue
728-
729-
RasStateArray = pz.zes_ras_state_exp_t * state_count.value
730-
ras_states = RasStateArray()
731-
732-
rc = pz.zesRasGetStateExp(ras_handles[i], byref(state_count), ras_states)
733-
if not check_rc(f"zesRasGetStateExp(handle {i}, states)", rc):
734-
continue
735-
736-
print_verbose(" RAS States:")
737-
for ras_state in ras_states:
738-
print_verbose(
739-
f" {get_ras_error_category_string(ras_state.category)}: {ras_state.errorCounter}"
740-
)
741-
742-
if not is_root_user():
743-
print_verbose(
744-
" Skipping zesRasClearStateExp due to insufficient permissions"
745-
)
746-
continue
747-
748-
for ras_state in ras_states:
749-
rc = pz.zesRasClearStateExp(ras_handles[i], ras_state.category)
750-
if not check_rc(
751-
f"zesRasClearStateExp(handle {i}, {get_ras_error_category_string(ras_state.category)})",
752-
rc,
753-
):
754-
return False
755-
756-
rc = pz.zesRasGetStateExp(ras_handles[i], byref(state_count), ras_states)
757-
if not check_rc(f"zesRasGetStateExp(handle {i}, verify)", rc):
758-
continue
759-
760-
print_verbose(" RAS States After Clear:")
761-
for ras_state in ras_states:
762-
print_verbose(
763-
f" {get_ras_error_category_string(ras_state.category)}: {ras_state.errorCounter}"
764-
)
765-
766-
return True
767-
768-
769678
def test_device_processes(device_handle, device_index):
770679
"""Test device processes state"""
771680
print(f"\n---- Device {device_index} Processes Test ----")
@@ -1362,9 +1271,6 @@ def run_all_tests():
13621271
# Test ECC module
13631272
test_ecc_module(devices[device_idx], device_idx)
13641273

1365-
# Test RAS module
1366-
test_ras_module(devices[device_idx], device_idx)
1367-
13681274
# Test memory modules
13691275
test_memory_modules(devices[device_idx], device_idx)
13701276

@@ -1394,7 +1300,6 @@ def main():
13941300
%(prog)s -g # Global operations (device properties and processes) only
13951301
%(prog)s -p # PCI tests only
13961302
%(prog)s -C # ECC tests only
1397-
%(prog)s -R # RAS tests only
13981303
%(prog)s -o # Power tests only
13991304
%(prog)s -f # Frequency tests only
14001305
%(prog)s -t # Temperature tests only
@@ -1418,7 +1323,6 @@ def main():
14181323
)
14191324
parser.add_argument("-p", "--pci", action="store_true", help="Run only PCI tests")
14201325
parser.add_argument("-C", "--ecc", action="store_true", help="Run only ECC tests")
1421-
parser.add_argument("-R", "--ras", action="store_true", help="Run only RAS tests")
14221326
parser.add_argument(
14231327
"-f",
14241328
"--frequency",
@@ -1446,7 +1350,6 @@ def main():
14461350
or getattr(args, "global", False)
14471351
or args.pci
14481352
or args.ecc
1449-
or args.ras
14501353
or args.power
14511354
or args.frequency
14521355
or args.temperature
@@ -1488,9 +1391,6 @@ def main():
14881391
if args.ecc:
14891392
test_ecc_module(devices[device_idx], device_idx)
14901393

1491-
if args.ras:
1492-
test_ras_module(devices[device_idx], device_idx)
1493-
14941394
if args.memory:
14951395
test_memory_modules(devices[device_idx], device_idx)
14961396

bindings/sysman/python/source/pyzes.py

Lines changed: 0 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,6 @@ class zes_engine_handle_t(c_void_p):
151151
pass
152152

153153

154-
class zes_ras_handle_t(c_void_p):
155-
pass
156-
157-
158154
##
159155

160156
ze_bool_t = c_uint32
@@ -295,20 +291,6 @@ class zes_ras_handle_t(c_void_p):
295291
ZES_ENGINE_GROUP_MEDIA_CODEC_SINGLE = 14
296292
ZES_ENGINE_GROUP_FORCE_UINT32 = 0x7FFFFFFF
297293

298-
## RAS error category enums ##
299-
zes_ras_error_category_exp_t = c_int32
300-
ZES_RAS_ERROR_CATEGORY_EXP_RESET = 0
301-
ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS = 1
302-
ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS = 2
303-
ZES_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS = 3
304-
ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS = 4
305-
ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS = 5
306-
ZES_RAS_ERROR_CATEGORY_EXP_DISPLAY_ERRORS = 6
307-
ZES_RAS_ERROR_CATEGORY_EXP_MEMORY_ERRORS = 7
308-
ZES_RAS_ERROR_CATEGORY_EXP_SCALE_ERRORS = 8
309-
ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS = 9
310-
ZES_RAS_ERROR_CATEGORY_EXP_FORCE_UINT32 = 0x7FFFFFFF
311-
312294
ze_result_t = c_int32
313295
ZE_RESULT_SUCCESS = 0
314296
ZE_RESULT_NOT_READY = 1
@@ -498,14 +480,6 @@ class zes_pci_stats_t(_PrintableStructure):
498480
}
499481

500482

501-
## RAS structures ##
502-
class zes_ras_state_exp_t(_PrintableStructure):
503-
_fields_ = [
504-
("category", zes_ras_error_category_exp_t),
505-
("errorCounter", c_uint64),
506-
]
507-
508-
509483
## ECC enums and structures ##
510484
zes_device_ecc_state_t = c_int32
511485
ZES_DEVICE_ECC_STATE_UNAVAILABLE = 0
@@ -1000,76 +974,6 @@ def zesDeviceProcessesGetState(hDevice, pCount, pProcesses):
1000974
return retVal
1001975

1002976

1003-
## RAS management functions ##
1004-
def zesDeviceEnumRasErrorSets(hDevice, pCount, phRas):
1005-
"""Wraps API:
1006-
ze_result_t zesDeviceEnumRasErrorSets(
1007-
zes_device_handle_t hDevice,
1008-
uint32_t* pCount,
1009-
zes_ras_handle_t* phRas)
1010-
1011-
Parameters:
1012-
hDevice: device handle
1013-
pCount: POINTER(c_uint32)
1014-
phRas: POINTER(zes_ras_handle_t) or None
1015-
Returns:
1016-
ze_result_t - return code only, RAS handles are filled into phRas
1017-
"""
1018-
funcPtr = getFunctionPointerList("zesDeviceEnumRasErrorSets")
1019-
funcPtr.argtypes = [
1020-
zes_device_handle_t,
1021-
POINTER(c_uint32),
1022-
POINTER(zes_ras_handle_t),
1023-
]
1024-
funcPtr.restype = ze_result_t
1025-
retVal = funcPtr(hDevice, pCount, phRas)
1026-
return retVal
1027-
1028-
1029-
def zesRasGetStateExp(hRas, pCount, pState):
1030-
"""Wraps API:
1031-
ze_result_t zesRasGetStateExp(
1032-
zes_ras_handle_t hRas,
1033-
uint32_t* pCount,
1034-
zes_ras_state_exp_t* pState)
1035-
1036-
Parameters:
1037-
hRas: RAS handle
1038-
pCount: POINTER(c_uint32)
1039-
pState: POINTER(zes_ras_state_exp_t) or None
1040-
Returns:
1041-
ze_result_t - return code only, RAS states are filled into pState
1042-
"""
1043-
funcPtr = getFunctionPointerList("zesRasGetStateExp")
1044-
funcPtr.argtypes = [
1045-
zes_ras_handle_t,
1046-
POINTER(c_uint32),
1047-
POINTER(zes_ras_state_exp_t),
1048-
]
1049-
funcPtr.restype = ze_result_t
1050-
retVal = funcPtr(hRas, pCount, pState)
1051-
return retVal
1052-
1053-
1054-
def zesRasClearStateExp(hRas, category):
1055-
"""Wraps API:
1056-
ze_result_t zesRasClearStateExp(
1057-
zes_ras_handle_t hRas,
1058-
zes_ras_error_category_exp_t category)
1059-
1060-
Parameters:
1061-
hRas: RAS handle
1062-
category: RAS error category to clear
1063-
Returns:
1064-
ze_result_t - return code only
1065-
"""
1066-
funcPtr = getFunctionPointerList("zesRasClearStateExp")
1067-
funcPtr.argtypes = [zes_ras_handle_t, zes_ras_error_category_exp_t]
1068-
funcPtr.restype = ze_result_t
1069-
retVal = funcPtr(hRas, category)
1070-
return retVal
1071-
1072-
1073977
## Memory management functions ##
1074978
def zesDeviceEnumMemoryModules(hDevice, pCount, phMemory):
1075979
"""Wraps API:

0 commit comments

Comments
 (0)