@@ -274,23 +274,6 @@ def get_device_action_string(action):
274274 return action_map .get (action , f"UNKNOWN_DEVICE_ACTION_{ action } " )
275275
276276
277- def get_ras_error_category_string (category ):
278- """Convert RAS error category enum to string"""
279- category_map = {
280- pz .ZES_RAS_ERROR_CATEGORY_EXP_RESET : "ZES_RAS_ERROR_CATEGORY_EXP_RESET" ,
281- pz .ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS : "ZES_RAS_ERROR_CATEGORY_EXP_PROGRAMMING_ERRORS" ,
282- pz .ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS : "ZES_RAS_ERROR_CATEGORY_EXP_DRIVER_ERRORS" ,
283- pz .ZES_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS : "ZES_RAS_ERROR_CATEGORY_EXP_COMPUTE_ERRORS" ,
284- pz .ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS : "ZES_RAS_ERROR_CATEGORY_EXP_NON_COMPUTE_ERRORS" ,
285- pz .ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS : "ZES_RAS_ERROR_CATEGORY_EXP_CACHE_ERRORS" ,
286- pz .ZES_RAS_ERROR_CATEGORY_EXP_DISPLAY_ERRORS : "ZES_RAS_ERROR_CATEGORY_EXP_DISPLAY_ERRORS" ,
287- pz .ZES_RAS_ERROR_CATEGORY_EXP_MEMORY_ERRORS : "ZES_RAS_ERROR_CATEGORY_EXP_MEMORY_ERRORS" ,
288- pz .ZES_RAS_ERROR_CATEGORY_EXP_SCALE_ERRORS : "ZES_RAS_ERROR_CATEGORY_EXP_SCALE_ERRORS" ,
289- pz .ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS : "ZES_RAS_ERROR_CATEGORY_EXP_L3FABRIC_ERRORS" ,
290- }
291- return category_map .get (category , f"UNKNOWN_RAS_CATEGORY_{ category } " )
292-
293-
294277def is_root_user ():
295278 """Return whether the current user has root privileges on platforms that support it"""
296279 geteuid = getattr (os , "geteuid" , None )
@@ -692,80 +675,6 @@ def test_ecc_module(device_handle, device_index):
692675 return True
693676
694677
695- def test_ras_module (device_handle , device_index ):
696- """Test RAS handle enumeration, state retrieval, and clear-state operations"""
697- print (f"\n ---- Device { device_index } RAS Test ----" )
698-
699- ras_count = c_uint32 (0 )
700- rc = pz .zesDeviceEnumRasErrorSets (device_handle , byref (ras_count ), None )
701- if not check_rc (f"zesDeviceEnumRasErrorSets(device { device_index } , count)" , rc ):
702- return False
703-
704- if ras_count .value == 0 :
705- print_verbose ("No RAS error sets found on this device" )
706- return True
707-
708- print_verbose (f"Found { ras_count .value } RAS error set(s)" )
709-
710- RasArray = pz .zes_ras_handle_t * ras_count .value
711- ras_handles = RasArray ()
712-
713- rc = pz .zesDeviceEnumRasErrorSets (device_handle , byref (ras_count ), ras_handles )
714- if not check_rc (f"zesDeviceEnumRasErrorSets(device { device_index } , handles)" , rc ):
715- return False
716-
717- for i in range (ras_count .value ):
718- print_verbose (f"\n RAS Handle { i } :" )
719-
720- state_count = c_uint32 (0 )
721- rc = pz .zesRasGetStateExp (ras_handles [i ], byref (state_count ), None )
722- if not check_rc (f"zesRasGetStateExp(handle { i } , count)" , rc ):
723- continue
724-
725- if state_count .value == 0 :
726- print_verbose (" No RAS categories reported for this handle" )
727- continue
728-
729- RasStateArray = pz .zes_ras_state_exp_t * state_count .value
730- ras_states = RasStateArray ()
731-
732- rc = pz .zesRasGetStateExp (ras_handles [i ], byref (state_count ), ras_states )
733- if not check_rc (f"zesRasGetStateExp(handle { i } , states)" , rc ):
734- continue
735-
736- print_verbose (" RAS States:" )
737- for ras_state in ras_states :
738- print_verbose (
739- f" { get_ras_error_category_string (ras_state .category )} : { ras_state .errorCounter } "
740- )
741-
742- if not is_root_user ():
743- print_verbose (
744- " Skipping zesRasClearStateExp due to insufficient permissions"
745- )
746- continue
747-
748- for ras_state in ras_states :
749- rc = pz .zesRasClearStateExp (ras_handles [i ], ras_state .category )
750- if not check_rc (
751- f"zesRasClearStateExp(handle { i } , { get_ras_error_category_string (ras_state .category )} )" ,
752- rc ,
753- ):
754- return False
755-
756- rc = pz .zesRasGetStateExp (ras_handles [i ], byref (state_count ), ras_states )
757- if not check_rc (f"zesRasGetStateExp(handle { i } , verify)" , rc ):
758- continue
759-
760- print_verbose (" RAS States After Clear:" )
761- for ras_state in ras_states :
762- print_verbose (
763- f" { get_ras_error_category_string (ras_state .category )} : { ras_state .errorCounter } "
764- )
765-
766- return True
767-
768-
769678def test_device_processes (device_handle , device_index ):
770679 """Test device processes state"""
771680 print (f"\n ---- Device { device_index } Processes Test ----" )
@@ -1362,9 +1271,6 @@ def run_all_tests():
13621271 # Test ECC module
13631272 test_ecc_module (devices [device_idx ], device_idx )
13641273
1365- # Test RAS module
1366- test_ras_module (devices [device_idx ], device_idx )
1367-
13681274 # Test memory modules
13691275 test_memory_modules (devices [device_idx ], device_idx )
13701276
@@ -1394,7 +1300,6 @@ def main():
13941300 %(prog)s -g # Global operations (device properties and processes) only
13951301 %(prog)s -p # PCI tests only
13961302 %(prog)s -C # ECC tests only
1397- %(prog)s -R # RAS tests only
13981303 %(prog)s -o # Power tests only
13991304 %(prog)s -f # Frequency tests only
14001305 %(prog)s -t # Temperature tests only
@@ -1418,7 +1323,6 @@ def main():
14181323 )
14191324 parser .add_argument ("-p" , "--pci" , action = "store_true" , help = "Run only PCI tests" )
14201325 parser .add_argument ("-C" , "--ecc" , action = "store_true" , help = "Run only ECC tests" )
1421- parser .add_argument ("-R" , "--ras" , action = "store_true" , help = "Run only RAS tests" )
14221326 parser .add_argument (
14231327 "-f" ,
14241328 "--frequency" ,
@@ -1446,7 +1350,6 @@ def main():
14461350 or getattr (args , "global" , False )
14471351 or args .pci
14481352 or args .ecc
1449- or args .ras
14501353 or args .power
14511354 or args .frequency
14521355 or args .temperature
@@ -1488,9 +1391,6 @@ def main():
14881391 if args .ecc :
14891392 test_ecc_module (devices [device_idx ], device_idx )
14901393
1491- if args .ras :
1492- test_ras_module (devices [device_idx ], device_idx )
1493-
14941394 if args .memory :
14951395 test_memory_modules (devices [device_idx ], device_idx )
14961396
0 commit comments