Skip to content

Commit b7ecb41

Browse files
author
benoit-cty
committed
Better RAPL handling
1 parent 1ca38f6 commit b7ecb41

4 files changed

Lines changed: 350 additions & 40 deletions

File tree

codecarbon/core/cpu.py

Lines changed: 126 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -443,17 +443,23 @@ def _fetch_rapl_files(self) -> None:
443443
except Exception as e:
444444
if isinstance(e, PermissionError):
445445
logger.warning(
446-
"Permission denied listing %s: %s", entry_path, e
446+
"\tRAPL - Permission denied listing %s: %s",
447+
entry_path,
448+
e,
447449
)
448450
else:
449-
logger.debug("Cannot list %s: %s", entry_path, e)
451+
logger.debug("\tRAPL - Cannot list %s: %s", entry_path, e)
450452
except Exception as e:
451453
if isinstance(e, PermissionError):
452454
logger.warning(
453-
"Permission denied scanning %s for RAPL domains: %s", base, e
455+
"\tRAPL - Permission denied scanning %s for RAPL domains: %s",
456+
base,
457+
e,
454458
)
455459
else:
456-
logger.debug("Cannot scan %s for RAPL domains: %s", base, e)
460+
logger.debug(
461+
"\tRAPL - Cannot scan %s for RAPL domains: %s", base, e
462+
)
457463

458464
# Fallback: if none found and the configured path looks like it directly
459465
# contains domain entries, try listing it (preserves backward compatibility).
@@ -473,6 +479,12 @@ def _fetch_rapl_files(self) -> None:
473479
# Remove duplicates
474480
domain_dirs = list(dict.fromkeys(domain_dirs))
475481

482+
# Build a list of successfully readable domains with their metadata
483+
# We'll deduplicate at the end, after we know which ones are readable
484+
readable_domains = (
485+
[]
486+
) # List of (name, domain_dir, is_mmio, rapl_file, rapl_file_max)
487+
476488
i = 0
477489
for domain_dir in domain_dirs:
478490
try:
@@ -485,13 +497,13 @@ def _fetch_rapl_files(self) -> None:
485497
except Exception as e:
486498
if isinstance(e, PermissionError):
487499
logger.warning(
488-
"Permission denied reading name file %s: %s",
500+
"\tRAPL - Permission denied reading name file %s: %s",
489501
name_path,
490502
e,
491503
)
492504
else:
493505
logger.debug(
494-
"Unable to read name file %s: %s", name_path, e
506+
"\tRAPL - Unable to read name file %s: %s", name_path, e
495507
)
496508
if not name:
497509
# Use the domain directory basename as a fallback
@@ -519,40 +531,127 @@ def _fetch_rapl_files(self) -> None:
519531
if is_required_main:
520532
found_main_readable = True
521533
except PermissionError:
522-
msg = f"Permission denied reading RAPL file {rapl_file}."
534+
msg = f"\tRAPL - Permission denied reading RAPL file {rapl_file}."
523535
suggestion = "You can grant read permission with: sudo chmod -R a+r /sys/class/powercap/*"
524536
logger.warning("%s %s; skipping.", msg, suggestion)
525537
# do not raise; skip this domain
526538
continue
527539
except Exception as e:
528540
logger.debug(
529-
"Skipping non-numeric or unreadable RAPL file %s: %s",
541+
"\tRAPL - Skipping non-numeric or unreadable RAPL file %s: %s",
530542
rapl_file,
531543
e,
532544
)
533545
continue
534546

535-
try:
536-
self._rapl_files.append(
537-
RAPLFile(name=name, path=rapl_file, max_path=rapl_file_max)
538-
)
539-
logger.debug("We will read Intel RAPL files at %s", rapl_file)
540-
except PermissionError as e:
541-
logger.warning(
542-
"Permission denied while initializing RAPL file %s: %s",
543-
rapl_file,
544-
e,
545-
)
546-
continue
547-
except Exception as e:
548-
logger.debug(
549-
"Unable to initialize RAPLFile for %s: %s", rapl_file, e
550-
)
551-
continue
547+
# This domain is readable, add it to our list
548+
is_mmio = "intel-rapl-mmio" in domain_dir
549+
readable_domains.append(
550+
(name, domain_dir, is_mmio, rapl_file, rapl_file_max)
551+
)
552552
except Exception as e:
553553
# Log and continue on any per-domain failure; availability is
554554
# determined from whether a main/package counter was readable.
555-
logger.warning("Error processing RAPL domain %s: %s", domain_dir, e)
555+
logger.warning(
556+
"\tRAPL - Error processing RAPL domain %s: %s", domain_dir, e
557+
)
558+
continue
559+
560+
# Deduplicate readable domains with same name, preferring MMIO over MSR-based
561+
# This prevents double-counting when same domain appears in both
562+
# intel-rapl and intel-rapl-mmio (e.g., package-0)
563+
564+
# First, check if we have a psys (platform/system) domain
565+
# psys provides total platform power and already includes package, core, uncore, etc.
566+
# Using psys alone is the best way to avoid double-counting on modern Intel systems
567+
psys_domain = None
568+
for domain_tuple in readable_domains:
569+
name, domain_dir, is_mmio, rapl_file, rapl_file_max = domain_tuple
570+
571+
# Check if this is a psys domain
572+
try:
573+
name_path = os.path.join(domain_dir, "name")
574+
if os.path.exists(name_path):
575+
with open(name_path) as f:
576+
domain_name = f.read().strip().lower()
577+
if domain_name == "psys":
578+
psys_domain = domain_tuple
579+
logger.info(
580+
"\tRAPL - Found psys (platform/system) domain - this provides "
581+
"total platform power and avoids double-counting"
582+
)
583+
break
584+
except Exception:
585+
pass
586+
587+
# If psys is available, use ONLY psys to avoid all double-counting
588+
if psys_domain:
589+
logger.info(
590+
"\tRAPL - Using only psys domain for power measurement to ensure accuracy. "
591+
"Other domains (package, core, uncore) are subsets of psys."
592+
)
593+
domain_map = {"psys": psys_domain}
594+
else:
595+
# No psys available, fall back to deduplicating package/core/uncore domains
596+
logger.warning(
597+
"\tRAPL - No psys domain found, using individual domains (package, core, uncore)"
598+
)
599+
domain_map = (
600+
{}
601+
) # name -> (name, domain_dir, is_mmio, rapl_file, rapl_file_max)
602+
for domain_tuple in readable_domains:
603+
name, domain_dir, is_mmio, rapl_file, rapl_file_max = domain_tuple
604+
605+
# Extract the base name (without "Processor Energy Delta_X" numbering)
606+
# to properly identify duplicates
607+
base_name = name
608+
if "Processor Energy" in name:
609+
# This is a package domain, use the original domain name for deduplication
610+
try:
611+
name_path = os.path.join(domain_dir, "name")
612+
if os.path.exists(name_path):
613+
with open(name_path) as f:
614+
base_name = f.read().strip()
615+
except Exception:
616+
base_name = os.path.basename(domain_dir)
617+
618+
# If we haven't seen this base name, or we're replacing MSR with MMIO, keep it
619+
if base_name not in domain_map or (
620+
is_mmio and not domain_map[base_name][2]
621+
):
622+
domain_map[base_name] = domain_tuple
623+
624+
logger.debug(
625+
"\tRAPL - Found %d unique RAPL domains after deduplication (from %d readable domains)",
626+
len(domain_map),
627+
len(readable_domains),
628+
)
629+
630+
# Now create RAPLFile objects for deduplicated domains
631+
for name, _, is_mmio, rapl_file, rapl_file_max in domain_map.values():
632+
try:
633+
# Determine interface type for logging
634+
interface_type = "MMIO" if is_mmio else "MSR"
635+
self._rapl_files.append(
636+
RAPLFile(name=name, path=rapl_file, max_path=rapl_file_max)
637+
)
638+
logger.debug(
639+
"\tRAPL - Reading RAPL domain '%s' via %s interface at %s",
640+
name,
641+
interface_type,
642+
rapl_file,
643+
)
644+
except PermissionError as e:
645+
logger.warning(
646+
"\tRAPL - Permission denied while initializing RAPL file %s: %s",
647+
rapl_file,
648+
e,
649+
)
650+
continue
651+
except Exception as e:
652+
logger.debug(
653+
"\tRAPL - Unable to initialize RAPLFile for %s: %s", rapl_file, e
654+
)
556655
continue
557656

558657
# Save whether we found a readable main/package energy counter so
@@ -580,7 +679,7 @@ def get_cpu_details(self, duration: Time) -> Dict:
580679
)
581680
except Exception as e:
582681
logger.info(
583-
"Unable to read Intel RAPL files at %s\n \
682+
"\tRAPL - Unable to read Intel RAPL files at %s\n \
584683
Exception occurred %s",
585684
self._rapl_files,
586685
e,

examples/intel_rapl_show.py

Lines changed: 101 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,50 @@
33
# The script can be used to monitor power consumption over time for a specific power domain
44
# The power consumption is read from the energy counter in microjoules and converted to watts
55

6+
"""
7+
8+
Sample output for Intel(R) Core(TM) Ultra 7 265H
9+
https://www.intel.com/content/www/us/en/products/sku/241750/intel-core-ultra-7-processor-265h-24m-cache-up-to-5-30-ghz/specifications.html
10+
- Processor Base Power 28 W
11+
- Maximum Turbo Power 115 W
12+
- Minimum Assured Power 20 W
13+
14+
15+
16+
Available Power Domains:
17+
[{'path': 'intel-rapl:1', 'name': 'psys', 'is_mmio': False}, {'path': 'intel-rapl:0:0', 'name': 'core', 'is_mmio': False}, {'path': 'intel-rapl-mmio:0', 'name': 'package-0', 'is_mmio': True}, {'path': 'intel-rapl:0:1', 'name': 'uncore', 'is_mmio': False}]
18+
Starting Power Monitoring (deduplication: True):
19+
20+
Monitoring domains:
21+
- psys (intel-rapl:1) via MSR
22+
- core (intel-rapl:0:0) via MSR
23+
- package-0 (intel-rapl-mmio:0) via MMIO
24+
- uncore (intel-rapl:0:1) via MSR
25+
26+
27+
Idle :
28+
- Domain 'psys' (MSR): 8.03 Watts
29+
- Domain 'core' (MSR): 1.51 Watts
30+
- Domain 'package-0' (MMIO): 4.61 Watts
31+
- Domain 'uncore' (MSR): 0.57 Watts
32+
- Total Power Consumption: 14.72 Watts
33+
34+
With `7z b` to load the CPU:
35+
36+
- Domain 'psys' (MSR): 22.89 Watts
37+
- Domain 'core' (MSR): 14.49 Watts
38+
- Domain 'package-0' (MMIO): 18.51 Watts
39+
- Domain 'uncore' (MSR): 0.19 Watts
40+
- Total Power Consumption: 56.07 Watts
41+
42+
psys (9.61W) ← Most comprehensive
43+
├── package-0 (3.78W)
44+
│ ├── core (0.84W) ← CPU cores only
45+
│ └── uncore (0.21W) ← Memory controller, cache, iGPU
46+
└── Other platform components (~5.8W)
47+
└── Chipset, PCIe, etc.
48+
49+
"""
650
import json
751
import os
852
import time
@@ -22,7 +66,10 @@ def inspect_rapl_domains(self):
2266
# Iterate through all RAPL domains
2367
for domain_dir in os.listdir(self.rapl_base_path):
2468
print(domain_dir)
25-
if not domain_dir.startswith("intel-rapl:"):
69+
if not (
70+
domain_dir.startswith("intel-rapl:")
71+
or domain_dir.startswith("intel-rapl-mmio:")
72+
):
2673
continue
2774

2875
domain_path = os.path.join(self.rapl_base_path, domain_dir)
@@ -134,17 +181,23 @@ def __init__(self):
134181
# Base path for RAPL power readings in sysfs
135182
self.rapl_base_path = "/sys/class/powercap/intel-rapl/subsystem"
136183

137-
def list_power_domains(self):
184+
def list_power_domains(self, deduplicate=True):
138185
"""
139-
List available RAPL power domains
186+
List available RAPL power domains (including intel-rapl and intel-rapl-mmio)
187+
188+
:param deduplicate: If True, avoid duplicate domains with same name, preferring MMIO
189+
:return: List of domain info dictionaries
140190
"""
141-
self.domains = []
191+
all_domains = []
142192
try:
143193
for domain in os.listdir(self.rapl_base_path):
144-
if domain.startswith("intel-rapl:"):
194+
if domain.startswith("intel-rapl:") or domain.startswith(
195+
"intel-rapl-mmio:"
196+
):
145197
domain_info = {
146198
"path": domain,
147199
"name": "",
200+
"is_mmio": domain.startswith("intel-rapl-mmio:"),
148201
}
149202
if os.path.exists(
150203
os.path.join(self.rapl_base_path, domain, "name")
@@ -153,12 +206,39 @@ def list_power_domains(self):
153206
os.path.join(self.rapl_base_path, domain, "name"), "r"
154207
) as f:
155208
domain_info["name"] = f.read().strip()
156-
self.domains.append(domain_info)
209+
all_domains.append(domain_info)
210+
211+
# Deduplicate if requested
212+
if deduplicate:
213+
self.domains = self._deduplicate_domains(all_domains)
214+
else:
215+
self.domains = all_domains
216+
157217
return self.domains
158218
except Exception as e:
159219
print(f"Error listing power domains: {e}")
160220
return []
161221

222+
def _deduplicate_domains(self, domains):
223+
"""
224+
Remove duplicate domains with the same name, preferring MMIO over MSR-based
225+
226+
:param domains: List of domain info dictionaries
227+
:return: Deduplicated list
228+
"""
229+
domain_map = {}
230+
231+
for domain in domains:
232+
name = domain["name"]
233+
234+
# If we haven't seen this name, or we're replacing MSR with MMIO
235+
if name not in domain_map or (
236+
domain["is_mmio"] and not domain_map[name]["is_mmio"]
237+
):
238+
domain_map[name] = domain
239+
240+
return list(domain_map.values())
241+
162242
def read_power_consumption(self, domain=None, interval=1):
163243
"""
164244
Read power consumption for a specific RAPL domain
@@ -201,28 +281,38 @@ def read_power_consumption(self, domain=None, interval=1):
201281
print(f"Error reading power for {domain}: {e}")
202282
return None
203283

204-
def monitor_power(self, interval=1, duration=10):
284+
def monitor_power(self, interval=1, duration=10, deduplicate=True):
205285
"""
206286
Monitor power consumption over time
207287
208288
:param interval: Sampling interval in seconds
209289
:param duration: Total monitoring duration in seconds
290+
:param deduplicate: If True, avoid counting duplicate domains (e.g., same package via MSR and MMIO)
210291
"""
211-
print("Starting Power Monitoring:")
292+
print(f"Starting Power Monitoring (deduplication: {deduplicate}):")
212293
if not self.domains:
213-
self.domains = self.list_power_domains()
294+
self.domains = self.list_power_domains(deduplicate=deduplicate)
295+
296+
# Show which domains are being monitored
297+
print("\nMonitoring domains:")
298+
for domain in self.domains:
299+
interface = "MMIO" if domain.get("is_mmio") else "MSR"
300+
print(f" - {domain.get('name')} ({domain.get('path')}) via {interface}")
301+
print()
302+
214303
start_time = time.time()
215304

216305
while time.time() - start_time < duration:
217306
total_power = 0
218307
for domain in self.domains:
219308
power = self.read_power_consumption(domain)
220309
if power is not None:
310+
interface = "MMIO" if domain.get("is_mmio") else "MSR"
221311
print(
222-
f"Domain '{domain.get('path').split('/')[-1]}/{domain.get('name')}' as a power consumption of {power:.2f} Watts"
312+
f"Domain '{domain.get('name')}' ({interface}): {power:.2f} Watts"
223313
)
224314
total_power += power
225-
print(f"Total Power Consumption: {total_power:.2f} Watts")
315+
print(f"Total Power Consumption: {total_power:.2f} Watts\n")
226316

227317
time.sleep(interval)
228318

0 commit comments

Comments
 (0)