@@ -288,42 +288,170 @@ def _fetch_rapl_files(self) -> None:
288288 """
289289 Fetches RAPL files from the RAPL directory
290290 """
291-
292- # consider files like `intel-rapl:$i`
293- files = list (filter (lambda x : ":" in x , os .listdir (self ._lin_rapl_dir )))
291+ # We'll scan common powercap locations and look for domain directories
292+ # that expose an `energy_uj` file. We try to be tolerant to permission
293+ # errors and simply skip unreadable entries instead of failing the whole
294+ # tracker when one RAPL subtree is not accessible (e.g., intel-rapl-mmio).
295+ candidate_bases = [
296+ self ._lin_rapl_dir ,
297+ os .path .dirname (self ._lin_rapl_dir ),
298+ "/sys/class/powercap" ,
299+ "/sys/devices/virtual/powercap" ,
300+ ]
301+
302+ # Deduplicate while preserving order and keep only existing paths
303+ seen = set ()
304+ candidate_bases = [
305+ p
306+ for p in candidate_bases
307+ if p and not (p in seen or seen .add (p )) and os .path .exists (p )
308+ ]
309+
310+ domain_dirs = []
311+ for base in candidate_bases :
312+ try :
313+ for entry in os .listdir (base ):
314+ # Look for powercap provider directories like 'intel-rapl' or 'intel-rapl-mmio'
315+ if not entry .startswith ("intel-rapl" ):
316+ continue
317+ entry_path = os .path .join (base , entry )
318+ if not os .path .isdir (entry_path ):
319+ continue
320+ # Look for domain directories under the provider that usually contain ':' in their name
321+ try :
322+ for sub in os .listdir (entry_path ):
323+ sub_path = os .path .join (entry_path , sub )
324+ if ":" in sub and os .path .isdir (sub_path ):
325+ # Only consider if energy file exists
326+ if os .path .exists (os .path .join (sub_path , "energy_uj" )):
327+ domain_dirs .append (sub_path )
328+ except Exception as e :
329+ if isinstance (e , PermissionError ):
330+ logger .warning (
331+ "Permission denied listing %s: %s" , entry_path , e
332+ )
333+ else :
334+ logger .debug ("Cannot list %s: %s" , entry_path , e )
335+ except Exception as e :
336+ if isinstance (e , PermissionError ):
337+ logger .warning (
338+ "Permission denied scanning %s for RAPL domains: %s" , base , e
339+ )
340+ else :
341+ logger .debug ("Cannot scan %s for RAPL domains: %s" , base , e )
342+
343+ # Fallback: if none found and the configured path looks like it directly
344+ # contains domain entries, try listing it (preserves backward compatibility).
345+ if not domain_dirs :
346+ try :
347+ for item in os .listdir (self ._lin_rapl_dir ):
348+ if ":" in item :
349+ path = os .path .join (self ._lin_rapl_dir , item )
350+ if os .path .isdir (path ) and os .path .exists (
351+ os .path .join (path , "energy_uj" )
352+ ):
353+ domain_dirs .append (path )
354+ except Exception :
355+ # ignore: we'll handle the empty domain_dirs case below
356+ pass
357+
358+ # Remove duplicates
359+ domain_dirs = list (dict .fromkeys (domain_dirs ))
294360
295361 i = 0
296- for file in files :
297- path = os .path .join (self ._lin_rapl_dir , file , "name" )
298- with open (path ) as f :
299- name = f .read ().strip ()
300- # Fake the name used by Power Gadget
301- # We ignore "core" in name as it seems to be included in "package" for Intel CPU.
302- # TODO: Use "dram" for memory power
362+ for domain_dir in domain_dirs :
363+ try :
364+ name_path = os .path .join (domain_dir , "name" )
365+ name = None
366+ if os .path .exists (name_path ):
367+ try :
368+ with open (name_path ) as f :
369+ name = f .read ().strip ()
370+ except Exception as e :
371+ if isinstance (e , PermissionError ):
372+ logger .warning (
373+ "Permission denied reading name file %s: %s" ,
374+ name_path ,
375+ e ,
376+ )
377+ else :
378+ logger .debug (
379+ "Unable to read name file %s: %s" , name_path , e
380+ )
381+ if not name :
382+ # Use the domain directory basename as a fallback
383+ name = os .path .basename (domain_dir )
384+
303385 if "package" in name :
304386 name = f"Processor Energy Delta_{ i } (kWh)"
305387 i += 1
306- # RAPL file to take measurement from
307- rapl_file = os .path .join (self ._lin_rapl_dir , file , "energy_uj" )
308- # RAPL file containing maximum possible value of energy_uj above which it wraps
309- rapl_file_max = os .path .join (
310- self ._lin_rapl_dir , file , "max_energy_range_uj"
311- )
388+
389+ rapl_file = os .path .join (domain_dir , "energy_uj" )
390+ rapl_file_max = os .path .join (domain_dir , "max_energy_range_uj" )
391+
392+ # Quick sanity check: can we read the energy value? If not, either
393+ # fail (for main/package domains) or skip gracefully.
394+ is_required_main = ("package" in name .lower ()) or os .path .basename (
395+ domain_dir
396+ ).endswith (":0" )
312397 try :
313- # Try to read the file to be sure we can
314398 with open (rapl_file , "r" ) as f :
315399 _ = float (f .read ())
400+ except PermissionError as e :
401+ msg = f"Permission denied reading RAPL file { rapl_file } ."
402+ suggestion = "You can grant read permission with: sudo chmod -R a+r /sys/class/powercap/*"
403+ if is_required_main :
404+ # Fail early if the main package energy file is not readable
405+ raise PermissionError (msg + " " + suggestion ) from e
406+ else :
407+ logger .warning ("%s %s; skipping." , msg , suggestion )
408+ continue
409+ except Exception as e :
410+ if is_required_main :
411+ # If the main file is unreadable or non-numeric, fail early
412+ raise RuntimeError (
413+ f"Unable to read main RAPL file { rapl_file } : { e } "
414+ ) from e
415+ else :
416+ logger .debug (
417+ "Skipping non-numeric or unreadable RAPL file %s: %s" ,
418+ rapl_file ,
419+ e ,
420+ )
421+ continue
422+
423+ try :
316424 self ._rapl_files .append (
317425 RAPLFile (name = name , path = rapl_file , max_path = rapl_file_max )
318426 )
319427 logger .debug ("We will read Intel RAPL files at %s" , rapl_file )
320- except PermissionError as e :
321- raise PermissionError (
322- "PermissionError : Unable to read Intel RAPL files for CPU power, we will use a constant for your CPU power."
323- + " Please view https://github.com/mlco2/codecarbon/issues/244"
324- + " for workarounds : %s" ,
428+ except Exception as e :
429+ if isinstance (e , PermissionError ) and is_required_main :
430+ raise
431+ if isinstance (e , PermissionError ):
432+ logger .warning (
433+ "Permission denied while initializing RAPL file %s: %s" ,
434+ rapl_file ,
435+ e ,
436+ )
437+ else :
438+ logger .debug (
439+ "Unable to initialize RAPLFile for %s: %s" , rapl_file , e
440+ )
441+ continue
442+ except Exception as e :
443+ if isinstance (e , PermissionError ):
444+ # If we get a permission error here and it's not handled above,
445+ # surface it as a warning unless it's the main domain which
446+ # should have failed earlier.
447+ logger .warning (
448+ "Permission error while processing RAPL domain %s: %s" ,
449+ domain_dir ,
325450 e ,
326- ) from e
451+ )
452+ else :
453+ logger .debug ("Error processing RAPL domain %s: %s" , domain_dir , e )
454+ continue
327455
328456 def get_cpu_details (self , duration : Time ) -> Dict :
329457 """
0 commit comments