Skip to content

Commit 1f759d9

Browse files
perf: cache hardware detection and optimize warm-path reuse
Add process-level hardware setup cache, probe result caching, platform-aware CPU backend selection, and parallel CPU/GPU setup for faster repeat runs. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent e250afe commit 1f759d9

16 files changed

Lines changed: 954 additions & 49 deletions

codecarbon/core/cpu.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,32 @@
44
https://software.intel.com/content/www/us/en/develop/articles/intel-power-gadget.html
55
"""
66

7+
from __future__ import annotations
8+
79
import os
810
import re
911
import shutil
1012
import subprocess
1113
import sys
12-
from typing import Dict, Optional, Tuple
14+
from functools import lru_cache
15+
from typing import TYPE_CHECKING, Dict, Optional, Tuple
1316

14-
import pandas as pd
1517
import psutil
1618
from rapidfuzz import fuzz, process, utils
1719

1820
from codecarbon.core.rapl import RAPLFile
1921
from codecarbon.core.units import Time
2022
from codecarbon.core.util import count_cpus, detect_cpu_model
2123
from codecarbon.external.logger import logger
22-
from codecarbon.input import DataSource
24+
25+
if TYPE_CHECKING:
26+
import pandas as pd
2327

2428
# default W value per core for a CPU if no model is found in the ref csv
2529
DEFAULT_POWER_PER_CORE = 4
2630

2731

32+
@lru_cache(maxsize=1)
2833
def is_powergadget_available() -> bool:
2934
"""
3035
Checks if Intel Power Gadget is available on the system.
@@ -44,6 +49,10 @@ def is_powergadget_available() -> bool:
4449
return False
4550

4651

52+
def clear_powergadget_cache() -> None:
53+
is_powergadget_available.cache_clear()
54+
55+
4756
def _get_candidate_bases(rapl_dir: str) -> list:
4857
"""Get list of directories to scan for RAPL files."""
4958
default_rapl_dir = "/sys/class/powercap/intel-rapl/subsystem"
@@ -366,6 +375,8 @@ def get_cpu_details(self) -> Dict:
366375
self._log_values()
367376
cpu_details = {}
368377
try:
378+
import pandas as pd
379+
369380
cpu_data = pd.read_csv(self._log_file_path).dropna()
370381
for col_name in cpu_data.columns:
371382
if col_name in ["System Time", "Elapsed Time (sec)", "RDTSC"]:
@@ -892,6 +903,8 @@ def _get_cpu_constant_power(match: str, cpu_power_df: pd.DataFrame) -> int:
892903
return float(cpu_power_df[cpu_power_df["Name"] == match]["TDP"].values[0])
893904

894905
def _get_cpu_power_from_registry(self, cpu_model_raw: str) -> Optional[int]:
906+
from codecarbon.input import DataSource
907+
895908
cpu_power_df = DataSource().get_cpu_power_data()
896909
cpu_matching = self._get_matching_cpu(cpu_model_raw, cpu_power_df)
897910
if cpu_matching:

codecarbon/core/emissions.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,17 @@
66
https://github.com/responsibleproblemsolving/energy-usage
77
"""
88

9-
from typing import Dict, Optional
10-
11-
import pandas as pd
9+
from typing import TYPE_CHECKING, Dict, Optional
1210

1311
from codecarbon.core import electricitymaps_api
1412
from codecarbon.core.units import EmissionsPerKWh, Energy
1513
from codecarbon.external.geography import CloudMetadata, GeoMetadata
1614
from codecarbon.external.logger import logger
1715
from codecarbon.input import DataSource, DataSourceException
1816

17+
if TYPE_CHECKING:
18+
import pandas as pd
19+
1920
_NORDIC_REGIONS_BY_COUNTRY = {
2021
"SWE": {"SE1", "SE2", "SE3", "SE4"},
2122
"NOR": {"NO1", "NO2", "NO3", "NO4", "NO5"},

codecarbon/core/gpu_amd.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
11
import subprocess
22
from collections import namedtuple
3+
from functools import lru_cache
34
from typing import Callable
45

56
from codecarbon.core.gpu_device import GPUDevice
67
from codecarbon.external.logger import logger
78

89

10+
@lru_cache(maxsize=1)
911
def is_rocm_system():
1012
"""Returns True if the system has an rocm-smi interface."""
1113
try:
12-
# Check if rocm-smi is available
1314
subprocess.check_output(["rocm-smi", "--help"])
1415
return True
1516
except (subprocess.CalledProcessError, OSError):
1617
return False
1718

1819

20+
def clear_rocm_system_cache() -> None:
21+
is_rocm_system.cache_clear()
22+
23+
1924
try:
2025
import amdsmi
2126

codecarbon/core/gpu_nvidia.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
11
import subprocess
22
from dataclasses import dataclass
3+
from functools import lru_cache
34
from typing import Any, Union
45

56
from codecarbon.core.gpu_device import GPUDevice
67
from codecarbon.external.logger import logger
78

89

10+
@lru_cache(maxsize=1)
911
def is_nvidia_system():
1012
"""Returns True if the system has an nvidia-smi interface."""
1113
try:
12-
# Check if nvidia-smi is available
1314
subprocess.check_output(["nvidia-smi", "--help"])
1415
return True
1516
except Exception:
1617
return False
1718

1819

20+
def clear_nvidia_system_cache() -> None:
21+
is_nvidia_system.cache_clear()
22+
23+
1924
try:
2025
import pynvml
2126

codecarbon/core/hardware_cache.py

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
"""
2+
Process-level cache for hardware detection and setup.
3+
4+
Reuses the outcome of the first tracker hardware probe so additional runs on
5+
the same device (same process) skip repeated powermetrics, cpuinfo, and GPU
6+
detection work.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
import threading
12+
from dataclasses import dataclass, field
13+
from enum import Enum
14+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
15+
16+
from codecarbon.core.config import normalize_gpu_ids
17+
18+
if TYPE_CHECKING:
19+
from codecarbon.core.resource_tracker import ResourceTracker
20+
21+
DEFAULT_RAPL_DIR = "/sys/class/powercap/intel-rapl/subsystem"
22+
23+
CONF_KEYS = (
24+
"ram_total_size",
25+
"cpu_count",
26+
"cpu_physical_count",
27+
"cpu_model",
28+
"gpu_count",
29+
"gpu_model",
30+
"gpu_ids",
31+
)
32+
33+
34+
class HardwareKind(str, Enum):
35+
RAM = "ram"
36+
CPU = "cpu"
37+
APPLE_CHIP = "apple_chip"
38+
GPU = "gpu"
39+
40+
41+
_cache_lock = threading.Lock()
42+
_plans: Dict["_HardwareCacheKey", "_HardwarePlan"] = {}
43+
_tdp = None
44+
45+
46+
@dataclass(frozen=True)
47+
class _HardwareCacheKey:
48+
tracking_mode: str
49+
force_cpu_power: Any
50+
force_ram_power: Any
51+
force_mode_cpu_load: bool
52+
gpu_ids: Any
53+
rapl_include_dram: bool
54+
rapl_prefer_psys: bool
55+
56+
57+
@dataclass
58+
class _HardwarePlan:
59+
ram_tracker: str
60+
cpu_tracker: str
61+
gpu_tracker: str
62+
conf: Dict[str, Any] = field(default_factory=dict)
63+
hardware_specs: List[Dict[str, Any]] = field(default_factory=list)
64+
65+
66+
def _canonical_gpu_ids(
67+
gpu_ids: Optional[List],
68+
) -> Optional[Tuple[str, ...]]:
69+
"""Normalize GPU ids to a stable cache-key form (tuple of strings)."""
70+
if gpu_ids is None:
71+
return None
72+
if not isinstance(gpu_ids, (list, tuple)):
73+
gpu_ids = [gpu_ids]
74+
normalized = normalize_gpu_ids(list(gpu_ids))
75+
if not normalized:
76+
return None
77+
return tuple(str(gpu_id) for gpu_id in normalized)
78+
79+
80+
def make_key(tracker) -> _HardwareCacheKey:
81+
return _HardwareCacheKey(
82+
tracking_mode=tracker._tracking_mode,
83+
force_cpu_power=tracker._force_cpu_power,
84+
force_ram_power=tracker._force_ram_power,
85+
force_mode_cpu_load=bool(tracker._conf.get("force_mode_cpu_load", False)),
86+
gpu_ids=_canonical_gpu_ids(tracker._gpu_ids),
87+
rapl_include_dram=bool(tracker._rapl_include_dram),
88+
rapl_prefer_psys=bool(tracker._rapl_prefer_psys),
89+
)
90+
91+
92+
def get_cached_tdp(cpu_module):
93+
"""Return a shared cpu.TDP() instance for this process."""
94+
global _tdp
95+
if _tdp is None:
96+
_tdp = cpu_module.TDP()
97+
return _tdp
98+
99+
100+
def _hardware_kind(hw) -> HardwareKind:
101+
"""Classify hardware without isinstance (safe if modules were reloaded)."""
102+
name = type(hw).__name__
103+
if name == "RAM":
104+
return HardwareKind.RAM
105+
if name == "CPU":
106+
return HardwareKind.CPU
107+
if name == "AppleSiliconChip":
108+
return HardwareKind.APPLE_CHIP
109+
if name == "GPU":
110+
return HardwareKind.GPU
111+
raise TypeError(f"Unsupported hardware type for cache: {type(hw)}")
112+
113+
114+
def _spec_from_hardware(hw) -> Dict[str, Any]:
115+
kind = _hardware_kind(hw)
116+
if kind == HardwareKind.RAM:
117+
return {
118+
"kind": kind.value,
119+
"tracking_mode": hw._tracking_mode,
120+
"force_ram_power": hw._force_ram_power,
121+
}
122+
if kind == HardwareKind.CPU:
123+
spec: Dict[str, Any] = {
124+
"kind": kind.value,
125+
"mode": hw._mode,
126+
"model": hw._model,
127+
"tdp": hw._tdp,
128+
"tracking_mode": hw._tracking_mode,
129+
"rapl_include_dram": False,
130+
"rapl_prefer_psys": False,
131+
}
132+
if hw._mode == "intel_rapl" and hasattr(hw, "_intel_interface"):
133+
intel = hw._intel_interface
134+
spec["rapl_include_dram"] = getattr(intel, "rapl_include_dram", False)
135+
spec["rapl_prefer_psys"] = getattr(intel, "rapl_prefer_psys", False)
136+
spec["rapl_dir"] = getattr(intel, "_lin_rapl_dir", DEFAULT_RAPL_DIR)
137+
return spec
138+
if kind == HardwareKind.APPLE_CHIP:
139+
return {
140+
"kind": kind.value,
141+
"model": hw._model,
142+
"chip_part": hw.chip_part,
143+
}
144+
if kind == HardwareKind.GPU:
145+
gpu_ids = _canonical_gpu_ids(hw.gpu_ids)
146+
return {"kind": kind.value, "gpu_ids": list(gpu_ids) if gpu_ids else None}
147+
raise TypeError(f"Unsupported hardware type for cache: {type(hw)}")
148+
149+
150+
def _hardware_from_spec(spec: Dict[str, Any], output_dir: str):
151+
from codecarbon.external.hardware import CPU, GPU, AppleSiliconChip
152+
from codecarbon.external.ram import RAM
153+
154+
try:
155+
kind = HardwareKind(spec["kind"])
156+
except ValueError as exc:
157+
raise ValueError(f"Unknown hardware spec kind: {spec['kind']}") from exc
158+
159+
if kind == HardwareKind.RAM:
160+
return RAM(
161+
tracking_mode=spec["tracking_mode"],
162+
force_ram_power=spec.get("force_ram_power"),
163+
)
164+
if kind == HardwareKind.CPU:
165+
return CPU(
166+
output_dir=output_dir,
167+
mode=spec["mode"],
168+
model=spec["model"],
169+
tdp=spec["tdp"],
170+
tracking_mode=spec["tracking_mode"],
171+
rapl_dir=spec.get("rapl_dir", DEFAULT_RAPL_DIR),
172+
rapl_include_dram=spec.get("rapl_include_dram", False),
173+
rapl_prefer_psys=spec.get("rapl_prefer_psys", False),
174+
)
175+
if kind == HardwareKind.APPLE_CHIP:
176+
return AppleSiliconChip(
177+
output_dir=output_dir,
178+
model=spec["model"],
179+
chip_part=spec["chip_part"],
180+
)
181+
if kind == HardwareKind.GPU:
182+
gpu_ids = _canonical_gpu_ids(spec.get("gpu_ids"))
183+
return GPU.from_utils(gpu_ids=list(gpu_ids) if gpu_ids else None)
184+
raise ValueError(f"Unknown hardware spec kind: {kind}")
185+
186+
187+
def capture(resource_tracker: "ResourceTracker") -> _HardwarePlan:
188+
tracker = resource_tracker.tracker
189+
conf = {k: tracker._conf[k] for k in CONF_KEYS if k in tracker._conf}
190+
return _HardwarePlan(
191+
ram_tracker=resource_tracker.ram_tracker,
192+
cpu_tracker=resource_tracker.cpu_tracker,
193+
gpu_tracker=resource_tracker.gpu_tracker,
194+
conf=conf,
195+
hardware_specs=[_spec_from_hardware(hw) for hw in tracker._hardware],
196+
)
197+
198+
199+
def apply(resource_tracker: "ResourceTracker", plan: _HardwarePlan) -> None:
200+
tracker = resource_tracker.tracker
201+
resource_tracker.ram_tracker = plan.ram_tracker
202+
resource_tracker.cpu_tracker = plan.cpu_tracker
203+
resource_tracker.gpu_tracker = plan.gpu_tracker
204+
tracker._conf.update(plan.conf)
205+
if "gpu_ids" in plan.conf:
206+
tracker._gpu_ids = plan.conf["gpu_ids"]
207+
tracker._hardware = [
208+
_hardware_from_spec(spec, tracker._output_dir) for spec in plan.hardware_specs
209+
]
210+
211+
212+
def get_or_run_setup(
213+
resource_tracker: "ResourceTracker",
214+
setup_fn,
215+
) -> None:
216+
"""Apply cached hardware plan or run full setup once per cache key."""
217+
key = make_key(resource_tracker.tracker)
218+
with _cache_lock:
219+
plan = _plans.get(key)
220+
if plan is not None:
221+
apply(resource_tracker, plan)
222+
return
223+
setup_fn()
224+
_plans[key] = capture(resource_tracker)
225+
226+
227+
def clear_cache() -> None:
228+
"""Clear cached plans (for tests)."""
229+
global _tdp
230+
import sys
231+
232+
with _cache_lock:
233+
_plans.clear()
234+
_tdp = None
235+
236+
for mod_name, clear_fn in (
237+
("codecarbon.core.gpu_nvidia", "clear_nvidia_system_cache"),
238+
("codecarbon.core.gpu_amd", "clear_rocm_system_cache"),
239+
("codecarbon.core.cpu", "clear_powergadget_cache"),
240+
("codecarbon.core.powermetrics", "clear_powermetrics_cache"),
241+
):
242+
mod = sys.modules.get(mod_name)
243+
if mod is not None:
244+
getattr(mod, clear_fn)()
245+
246+
if "codecarbon.external.hardware" in sys.modules:
247+
from codecarbon.external.hardware import clear_cpu_load_prime_cache
248+
249+
clear_cpu_load_prime_cache()

0 commit comments

Comments
 (0)