@@ -44,23 +44,119 @@ def is_powergadget_available() -> bool:
4444 return False
4545
4646
47- def is_rapl_available () -> bool :
47+ def is_rapl_available (rapl_dir : Optional [ str ] = None ) -> bool :
4848 """
4949 Checks if Intel RAPL is available on the system.
5050
5151 Returns:
5252 bool: `True` if Intel RAPL is available, `False` otherwise.
5353 """
54+ # Lightweight detection: scan common powercap locations for a readable
55+ # package/main `energy_uj` file. We avoid instantiating `IntelRAPL` here so
56+ # that callers can decide to create the full interface only when this
57+ # pre-check passes. This prevents raising during global initialization and
58+ # lets callers fall back gracefully.
59+ if rapl_dir is None :
60+ rapl_dir = "/sys/class/powercap/intel-rapl/subsystem"
61+
62+ default_rapl_dir = "/sys/class/powercap/intel-rapl/subsystem"
63+ is_default_dir = os .path .abspath (rapl_dir ) == os .path .abspath (default_rapl_dir )
64+
65+ if is_default_dir :
66+ # Production: scan all common RAPL locations
67+ candidate_bases = [
68+ rapl_dir ,
69+ os .path .dirname (rapl_dir ),
70+ "/sys/class/powercap" ,
71+ "/sys/devices/virtual/powercap" ,
72+ ]
73+ else :
74+ # Testing or custom directory: only scan the specified location
75+ candidate_bases = [
76+ rapl_dir ,
77+ os .path .dirname (rapl_dir ),
78+ ]
79+
80+ seen = set ()
81+ candidate_bases = [
82+ p
83+ for p in candidate_bases
84+ if p and not (p in seen or seen .add (p )) and os .path .exists (p )
85+ ]
86+
5487 try :
55- IntelRAPL ()
56- return True
57- except Exception as e :
58- logger .debug (
59- "Not using the RAPL interface, an exception occurred while instantiating "
60- + "IntelRAPL : %s" ,
61- e ,
62- )
63- return False
88+ for base in candidate_bases :
89+ try :
90+ for entry in os .listdir (base ):
91+ if not entry .startswith ("intel-rapl" ):
92+ continue
93+ entry_path = os .path .join (base , entry )
94+ if not os .path .isdir (entry_path ):
95+ continue
96+
97+ # Look for domain directories (contain ':') under provider
98+ for sub in os .listdir (entry_path ):
99+ sub_path = os .path .join (entry_path , sub )
100+ if ":" not in sub or not os .path .isdir (sub_path ):
101+ continue
102+
103+ energy_path = os .path .join (sub_path , "energy_uj" )
104+ name_path = os .path .join (sub_path , "name" )
105+
106+ # Determine if this domain looks like the main/package domain
107+ is_main = False
108+ try :
109+ if os .path .exists (name_path ):
110+ with open (name_path , "r" ) as nf :
111+ name = nf .read ().strip ().lower ()
112+ if "package" in name :
113+ is_main = True
114+ except Exception :
115+ # If we cannot read the name file, fall back to basename rule
116+ pass
117+ if sub .endswith (":0" ):
118+ is_main = True
119+
120+ if os .path .exists (energy_path ) and os .access (
121+ energy_path , os .R_OK
122+ ):
123+ if is_main :
124+ return True
125+
126+ # Also support trees where `intel-rapl:$i` entries are directly inside `base`
127+ for item in os .listdir (base ):
128+ if ":" not in item :
129+ continue
130+ p = os .path .join (base , item )
131+ if not os .path .isdir (p ):
132+ continue
133+ energy_path = os .path .join (p , "energy_uj" )
134+ name_path = os .path .join (p , "name" )
135+
136+ is_main = False
137+ try :
138+ if os .path .exists (name_path ):
139+ with open (name_path , "r" ) as nf :
140+ name = nf .read ().strip ().lower ()
141+ if "package" in name :
142+ is_main = True
143+ except Exception :
144+ pass
145+ if item .endswith (":0" ):
146+ is_main = True
147+ if os .path .exists (energy_path ) and os .access (energy_path , os .R_OK ):
148+ if is_main :
149+ return True
150+ except Exception :
151+ # Ignore ephemeral errors during detection and continue scanning
152+ logger .debug (
153+ "Error while scanning %s for RAPL domains" , base , exc_info = True
154+ )
155+ continue
156+ except Exception :
157+ logger .debug ("Unexpected error while checking RAPL availability" , exc_info = True )
158+
159+ return False
64160
65161
66162def is_psutil_available ():
@@ -292,12 +388,30 @@ def _fetch_rapl_files(self) -> None:
292388 # that expose an `energy_uj` file. We try to be tolerant to permission
293389 # errors and simply skip unreadable entries instead of failing the whole
294390 # tracker when one RAPL subtree is not accessible (e.g., intel-rapl-mmio).
295- candidate_bases = [
296- self ._lin_rapl_dir ,
297- os .path .dirname (self ._lin_rapl_dir ),
298- "/sys/class/powercap" ,
299- "/sys/devices/virtual/powercap" ,
300- ]
391+ #
392+ # When using the default RAPL directory, we scan all common system locations
393+ # to ensure we don't miss any RAPL providers (including intel-rapl-mmio).
394+ # When a custom rapl_dir is provided (e.g., for testing), we only scan
395+ # that directory and its parent to avoid interference with system files.
396+ default_rapl_dir = "/sys/class/powercap/intel-rapl/subsystem"
397+ is_default_dir = os .path .abspath (self ._lin_rapl_dir ) == os .path .abspath (
398+ default_rapl_dir
399+ )
400+
401+ if is_default_dir :
402+ # Production: scan all common RAPL locations
403+ candidate_bases = [
404+ self ._lin_rapl_dir ,
405+ os .path .dirname (self ._lin_rapl_dir ),
406+ "/sys/class/powercap" ,
407+ "/sys/devices/virtual/powercap" ,
408+ ]
409+ else :
410+ # Testing or custom directory: only scan the specified location
411+ candidate_bases = [
412+ self ._lin_rapl_dir ,
413+ os .path .dirname (self ._lin_rapl_dir ),
414+ ]
301415
302416 # Deduplicate while preserving order and keep only existing paths
303417 seen = set ()
@@ -308,6 +422,7 @@ def _fetch_rapl_files(self) -> None:
308422 ]
309423
310424 domain_dirs = []
425+ found_main_readable = False
311426 for base in candidate_bases :
312427 try :
313428 for entry in os .listdir (base ):
@@ -389,70 +504,64 @@ def _fetch_rapl_files(self) -> None:
389504 rapl_file = os .path .join (domain_dir , "energy_uj" )
390505 rapl_file_max = os .path .join (domain_dir , "max_energy_range_uj" )
391506
392- # Quick sanity check: can we read the energy value? If not, either
393- # fail (for main/package domains) or skip gracefully.
507+ # Quick sanity check: can we read the energy value? If not,
508+ # skip gracefully but mark whether we found a readable main
509+ # domain. We avoid raising here: callers should use
510+ # `is_rapl_available()` to pre-check availability and decide
511+ # whether to instantiate the full interface.
394512 is_required_main = ("package" in name .lower ()) or os .path .basename (
395513 domain_dir
396514 ).endswith (":0" )
397515 try :
398516 with open (rapl_file , "r" ) as f :
399517 _ = float (f .read ())
400- except PermissionError as e :
518+ # If the main/package counter is readable, mark availability
519+ if is_required_main :
520+ found_main_readable = True
521+ except PermissionError :
401522 msg = f"Permission denied reading RAPL file { rapl_file } ."
402523 suggestion = "You can grant read permission with: sudo chmod -R a+r /sys/class/powercap/*"
403- if is_required_main :
404- # Fail early if the main package energy file is not readable
405- raise PermissionError (msg + " " + suggestion ) from e
406- else :
407- logger .warning ("%s %s; skipping." , msg , suggestion )
408- continue
524+ logger .warning ("%s %s; skipping." , msg , suggestion )
525+ # do not raise; skip this domain
526+ continue
409527 except Exception as e :
410- if is_required_main :
411- # If the main file is unreadable or non-numeric, fail early
412- raise RuntimeError (
413- f"Unable to read main RAPL file { rapl_file } : { e } "
414- ) from e
415- else :
416- logger .debug (
417- "Skipping non-numeric or unreadable RAPL file %s: %s" ,
418- rapl_file ,
419- e ,
420- )
421- continue
528+ logger .debug (
529+ "Skipping non-numeric or unreadable RAPL file %s: %s" ,
530+ rapl_file ,
531+ e ,
532+ )
533+ continue
422534
423535 try :
424536 self ._rapl_files .append (
425537 RAPLFile (name = name , path = rapl_file , max_path = rapl_file_max )
426538 )
427539 logger .debug ("We will read Intel RAPL files at %s" , rapl_file )
428- except Exception as e :
429- if isinstance (e , PermissionError ) and is_required_main :
430- raise
431- if isinstance (e , PermissionError ):
432- logger .warning (
433- "Permission denied while initializing RAPL file %s: %s" ,
434- rapl_file ,
435- e ,
436- )
437- else :
438- logger .debug (
439- "Unable to initialize RAPLFile for %s: %s" , rapl_file , e
440- )
441- continue
442- except Exception as e :
443- if isinstance (e , PermissionError ):
444- # If we get a permission error here and it's not handled above,
445- # surface it as a warning unless it's the main domain which
446- # should have failed earlier.
540+ except PermissionError as e :
447541 logger .warning (
448- "Permission error while processing RAPL domain %s: %s" ,
449- domain_dir ,
542+ "Permission denied while initializing RAPL file %s: %s" ,
543+ rapl_file ,
450544 e ,
451545 )
452- else :
453- logger .debug ("Error processing RAPL domain %s: %s" , domain_dir , e )
546+ continue
547+ except Exception as e :
548+ logger .debug (
549+ "Unable to initialize RAPLFile for %s: %s" , rapl_file , e
550+ )
551+ continue
552+ except Exception as e :
553+ # Log and continue on any per-domain failure; availability is
554+ # determined from whether a main/package counter was readable.
555+ logger .warning ("Error processing RAPL domain %s: %s" , domain_dir , e )
454556 continue
455557
558+ # Save whether we found a readable main/package energy counter so
559+ # callers can query `intel_rapl._available` if desired.
560+ try :
561+ self ._available = bool (found_main_readable )
562+ except Exception :
563+ self ._available = False
564+
456565 def get_cpu_details (self , duration : Time ) -> Dict :
457566 """
458567 Fetches the CPU Energy Deltas by fetching values from RAPL files
0 commit comments