Skip to content

Commit 70e95ef

Browse files
authored
feat: first version of telemetry (#1171)
* feat: first version of telemetry * tests: add a test so that client and server do not differ
1 parent 987fbdf commit 70e95ef

14 files changed

Lines changed: 859 additions & 1 deletion

File tree

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import abc
2+
from uuid import UUID
3+
4+
from carbonserver.api import schemas_telemetry
5+
6+
7+
class Telemetry(abc.ABC):
8+
@abc.abstractmethod
9+
def add_telemetry(self, telemetry: schemas_telemetry.TelemetryCreate) -> UUID:
10+
raise NotImplementedError
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""SQLAlchemy models for telemetry data in the CarbonServer API."""
2+
3+
import uuid
4+
5+
from sqlalchemy import JSON, Boolean, Column, DateTime, Float, Integer, String
6+
from sqlalchemy.dialects.postgresql import UUID
7+
8+
from carbonserver.database.database import Base
9+
10+
11+
class Telemetry(Base):
12+
__tablename__ = "telemetry"
13+
14+
id = Column(UUID(as_uuid=True), primary_key=True, index=True, default=uuid.uuid4)
15+
timestamp = Column(DateTime, nullable=False)
16+
telemetry_level = Column(String, nullable=False)
17+
18+
os = Column(String, nullable=True)
19+
country_name = Column(String, nullable=True)
20+
country_iso_code = Column(String, nullable=True)
21+
region = Column(String, nullable=True)
22+
cloud_provider = Column(String, nullable=True)
23+
cloud_region = Column(String, nullable=True)
24+
longitude = Column(Float, nullable=True)
25+
latitude = Column(Float, nullable=True)
26+
27+
cpu_count = Column(Integer, nullable=True)
28+
cpu_physical_count = Column(Integer, nullable=True)
29+
cpu_model = Column(String, nullable=True)
30+
cpu_architecture = Column(String, nullable=True)
31+
gpu_count = Column(Integer, nullable=True)
32+
gpu_model = Column(String, nullable=True)
33+
gpu_driver_version = Column(String, nullable=True)
34+
gpu_memory_total_gb = Column(Float, nullable=True)
35+
ram_total_size_gb = Column(Float, nullable=True)
36+
cuda_version = Column(String, nullable=True)
37+
cudnn_version = Column(String, nullable=True)
38+
39+
python_version = Column(String, nullable=True)
40+
python_implementation = Column(String, nullable=True)
41+
python_executable_hash = Column(String, nullable=True)
42+
python_env_type = Column(String, nullable=True)
43+
codecarbon_version = Column(String, nullable=True)
44+
codecarbon_install_method = Column(String, nullable=True)
45+
46+
total_emissions_kg = Column(Float, nullable=True)
47+
emissions_rate_kg_per_sec = Column(Float, nullable=True)
48+
energy_consumed_kwh = Column(Float, nullable=True)
49+
cpu_energy_kwh = Column(Float, nullable=True)
50+
gpu_energy_kwh = Column(Float, nullable=True)
51+
ram_energy_kwh = Column(Float, nullable=True)
52+
duration_seconds = Column(Float, nullable=True)
53+
cpu_utilization_avg = Column(Float, nullable=True)
54+
gpu_utilization_avg = Column(Float, nullable=True)
55+
ram_utilization_avg = Column(Float, nullable=True)
56+
57+
tracking_mode = Column(String, nullable=True)
58+
api_mode = Column(String, nullable=True)
59+
output_methods = Column(JSON, nullable=True)
60+
hardware_tracked = Column(JSON, nullable=True)
61+
task_tracking_used = Column(Boolean, nullable=True)
62+
decorator_vs_context = Column(String, nullable=True)
63+
measure_power_interval_secs = Column(Float, nullable=True)
64+
65+
hardware_detection_success = Column(Boolean, nullable=True)
66+
rapl_available = Column(Boolean, nullable=True)
67+
gpu_detection_method = Column(String, nullable=True)
68+
first_measurement_time_ms = Column(Float, nullable=True)
69+
tracking_overhead_percent = Column(Float, nullable=True)
70+
errors_encountered = Column(JSON, nullable=True)
71+
warning_count = Column(Integer, nullable=True)
72+
73+
ide_used = Column(String, nullable=True)
74+
notebook_environment = Column(String, nullable=True)
75+
ci_environment = Column(String, nullable=True)
76+
python_package_manager = Column(String, nullable=True)
77+
framework_detected = Column(String, nullable=True)
78+
79+
has_torch = Column(Boolean, nullable=True)
80+
torch_version = Column(String, nullable=True)
81+
has_transformers = Column(Boolean, nullable=True)
82+
transformers_version = Column(String, nullable=True)
83+
has_diffusers = Column(Boolean, nullable=True)
84+
diffusers_version = Column(String, nullable=True)
85+
has_tensorflow = Column(Boolean, nullable=True)
86+
tensorflow_version = Column(String, nullable=True)
87+
has_keras = Column(Boolean, nullable=True)
88+
keras_version = Column(String, nullable=True)
89+
has_pytorch_lightning = Column(Boolean, nullable=True)
90+
pytorch_lightning_version = Column(String, nullable=True)
91+
has_fastai = Column(Boolean, nullable=True)
92+
fastai_version = Column(String, nullable=True)
93+
ml_framework_primary = Column(String, nullable=True)
94+
95+
container_runtime = Column(String, nullable=True)
96+
in_container = Column(Boolean, nullable=True)
97+
host_machine_hash = Column(String, nullable=True)
98+
99+
def __repr__(self):
100+
return (
101+
f'<Telemetry(id="{self.id}", '
102+
f'timestamp="{self.timestamp}", '
103+
f'telemetry_level="{self.telemetry_level}")>'
104+
)
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""Repository implementation for telemetry data using SQLAlchemy."""
2+
3+
import uuid
4+
from contextlib import AbstractContextManager
5+
from uuid import UUID
6+
7+
from dependency_injector.providers import Callable
8+
9+
from carbonserver.api.domain.telemetry import Telemetry
10+
from carbonserver.api.infra.database.telemetry_sql_models import (
11+
Telemetry as SqlModelTelemetry,
12+
)
13+
from carbonserver.api.schemas_telemetry import TelemetryCreate
14+
15+
16+
class SqlAlchemyRepository(Telemetry):
17+
def __init__(self, session_factory) -> Callable[..., AbstractContextManager]:
18+
self.session_factory = session_factory
19+
20+
def add_telemetry(self, telemetry: TelemetryCreate) -> UUID:
21+
with self.session_factory() as session:
22+
db_telemetry = SqlModelTelemetry(
23+
id=uuid.uuid4(),
24+
**telemetry.model_dump(),
25+
)
26+
session.add(db_telemetry)
27+
session.commit()
28+
return db_telemetry.id
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""API router for handling telemetry data in the CarbonServer API."""
2+
3+
from uuid import UUID
4+
5+
from dependency_injector.wiring import Provide, inject
6+
from fastapi import APIRouter, Depends
7+
from starlette import status
8+
9+
from carbonserver.api.schemas_telemetry import TelemetryCreate
10+
from carbonserver.api.services.telemetry_service import TelemetryService
11+
from carbonserver.container import ServerContainer
12+
13+
TELEMETRY_ROUTER_TAGS = ["Telemetry"]
14+
15+
router = APIRouter()
16+
17+
18+
@router.post(
19+
"/telemetry",
20+
tags=TELEMETRY_ROUTER_TAGS,
21+
status_code=status.HTTP_201_CREATED,
22+
response_model=UUID,
23+
)
24+
@inject
25+
def add_telemetry(
26+
telemetry: TelemetryCreate,
27+
telemetry_service: TelemetryService = Depends(
28+
Provide[ServerContainer.telemetry_service]
29+
),
30+
) -> UUID:
31+
return telemetry_service.add_telemetry(telemetry)
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
"""Schemas for telemetry data submitted to the CarbonServer API."""
2+
3+
from datetime import datetime
4+
from enum import Enum
5+
from typing import List, Optional
6+
7+
from pydantic import BaseModel, ConfigDict, Field, model_validator
8+
9+
10+
class TelemetryLevel(str, Enum):
11+
disabled = "disabled"
12+
minimal = "minimal"
13+
extensive = "extensive"
14+
15+
16+
class TelemetryBase(BaseModel):
17+
model_config = ConfigDict(
18+
extra="forbid",
19+
use_enum_values=True,
20+
json_schema_extra={
21+
"example": {
22+
"timestamp": "2026-05-03T12:00:00+00:00",
23+
"telemetry_level": "minimal",
24+
"os": "Linux-5.10.0-x86_64",
25+
"country_name": "France",
26+
"country_iso_code": "FRA",
27+
"cpu_count": 12,
28+
"cpu_model": "Intel(R) Core(TM) i7-8850H CPU @ 2.60GHz",
29+
"python_version": "3.11.5",
30+
"codecarbon_version": "3.0.0",
31+
}
32+
},
33+
)
34+
35+
timestamp: datetime
36+
telemetry_level: TelemetryLevel
37+
38+
os: Optional[str] = None
39+
country_name: Optional[str] = None
40+
country_iso_code: Optional[str] = Field(default=None, min_length=2, max_length=3)
41+
region: Optional[str] = None
42+
cloud_provider: Optional[str] = None
43+
cloud_region: Optional[str] = None
44+
longitude: Optional[float] = Field(default=None, ge=-180, le=180)
45+
latitude: Optional[float] = Field(default=None, ge=-90, le=90)
46+
47+
cpu_count: Optional[int] = Field(default=None, ge=0)
48+
cpu_physical_count: Optional[int] = Field(default=None, ge=0)
49+
cpu_model: Optional[str] = None
50+
cpu_architecture: Optional[str] = None
51+
gpu_count: Optional[int] = Field(default=None, ge=0)
52+
gpu_model: Optional[str] = None
53+
gpu_driver_version: Optional[str] = None
54+
gpu_memory_total_gb: Optional[float] = Field(default=None, ge=0)
55+
ram_total_size_gb: Optional[float] = Field(default=None, ge=0)
56+
cuda_version: Optional[str] = None
57+
cudnn_version: Optional[str] = None
58+
59+
python_version: Optional[str] = None
60+
python_implementation: Optional[str] = None
61+
python_executable_hash: Optional[str] = Field(
62+
default=None, min_length=64, max_length=64
63+
)
64+
python_env_type: Optional[str] = None
65+
codecarbon_version: Optional[str] = None
66+
codecarbon_install_method: Optional[str] = None
67+
68+
total_emissions_kg: Optional[float] = Field(default=None, ge=0)
69+
emissions_rate_kg_per_sec: Optional[float] = Field(default=None, ge=0)
70+
energy_consumed_kwh: Optional[float] = Field(default=None, ge=0)
71+
cpu_energy_kwh: Optional[float] = Field(default=None, ge=0)
72+
gpu_energy_kwh: Optional[float] = Field(default=None, ge=0)
73+
ram_energy_kwh: Optional[float] = Field(default=None, ge=0)
74+
duration_seconds: Optional[float] = Field(default=None, ge=0)
75+
cpu_utilization_avg: Optional[float] = Field(default=None, ge=0, le=100)
76+
gpu_utilization_avg: Optional[float] = Field(default=None, ge=0, le=100)
77+
ram_utilization_avg: Optional[float] = Field(default=None, ge=0, le=100)
78+
79+
tracking_mode: Optional[str] = None
80+
api_mode: Optional[str] = None
81+
output_methods: Optional[List[str]] = None
82+
hardware_tracked: Optional[List[str]] = None
83+
task_tracking_used: Optional[bool] = None
84+
decorator_vs_context: Optional[str] = None
85+
measure_power_interval_secs: Optional[float] = Field(default=None, ge=0)
86+
87+
hardware_detection_success: Optional[bool] = None
88+
rapl_available: Optional[bool] = None
89+
gpu_detection_method: Optional[str] = None
90+
first_measurement_time_ms: Optional[float] = Field(default=None, ge=0)
91+
tracking_overhead_percent: Optional[float] = Field(default=None, ge=0)
92+
errors_encountered: Optional[List[str]] = None
93+
warning_count: Optional[int] = Field(default=None, ge=0)
94+
95+
ide_used: Optional[str] = None
96+
notebook_environment: Optional[str] = None
97+
ci_environment: Optional[str] = None
98+
python_package_manager: Optional[str] = None
99+
framework_detected: Optional[str] = None
100+
101+
has_torch: Optional[bool] = None
102+
torch_version: Optional[str] = None
103+
has_transformers: Optional[bool] = None
104+
transformers_version: Optional[str] = None
105+
has_diffusers: Optional[bool] = None
106+
diffusers_version: Optional[str] = None
107+
has_tensorflow: Optional[bool] = None
108+
tensorflow_version: Optional[str] = None
109+
has_keras: Optional[bool] = None
110+
keras_version: Optional[str] = None
111+
has_pytorch_lightning: Optional[bool] = None
112+
pytorch_lightning_version: Optional[str] = None
113+
has_fastai: Optional[bool] = None
114+
fastai_version: Optional[str] = None
115+
ml_framework_primary: Optional[str] = None
116+
117+
container_runtime: Optional[str] = None
118+
in_container: Optional[bool] = None
119+
host_machine_hash: Optional[str] = None
120+
121+
@model_validator(mode="after")
122+
def validate_telemetry_level(self):
123+
if self.telemetry_level == TelemetryLevel.disabled:
124+
raise ValueError("Disabled telemetry must not be submitted")
125+
126+
if self.telemetry_level == TelemetryLevel.minimal:
127+
extensive_fields = set(type(self).model_fields) - MINIMAL_TELEMETRY_FIELDS
128+
submitted_extensive_fields = [
129+
field
130+
for field in extensive_fields
131+
if getattr(self, field) not in (None, [], {})
132+
]
133+
if submitted_extensive_fields:
134+
fields = ", ".join(sorted(submitted_extensive_fields))
135+
raise ValueError(
136+
f"Minimal telemetry cannot include extensive fields: {fields}"
137+
)
138+
139+
return self
140+
141+
142+
MINIMAL_TELEMETRY_FIELDS = {
143+
"timestamp",
144+
"telemetry_level",
145+
"os",
146+
"country_name",
147+
"country_iso_code",
148+
"region",
149+
"cloud_provider",
150+
"cloud_region",
151+
"longitude",
152+
"latitude",
153+
"cpu_count",
154+
"cpu_physical_count",
155+
"cpu_model",
156+
"cpu_architecture",
157+
"gpu_count",
158+
"gpu_model",
159+
"gpu_driver_version",
160+
"gpu_memory_total_gb",
161+
"ram_total_size_gb",
162+
"cuda_version",
163+
"cudnn_version",
164+
"python_version",
165+
"python_implementation",
166+
"python_executable_hash",
167+
"python_env_type",
168+
"codecarbon_version",
169+
"codecarbon_install_method",
170+
}
171+
172+
173+
class TelemetryCreate(TelemetryBase):
174+
pass
175+
176+
177+
class Telemetry(TelemetryBase):
178+
id: str
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""Service layer for handling telemetry data in the CarbonServer API."""
2+
3+
from uuid import UUID
4+
5+
from carbonserver.api.infra.repositories.repository_telemetry import (
6+
SqlAlchemyRepository as TelemetrySqlRepository,
7+
)
8+
from carbonserver.api.schemas_telemetry import TelemetryCreate
9+
10+
11+
class TelemetryService:
12+
def __init__(self, telemetry_repository: TelemetrySqlRepository):
13+
self._repository = telemetry_repository
14+
15+
def add_telemetry(self, telemetry: TelemetryCreate) -> UUID:
16+
return self._repository.add_telemetry(telemetry)

0 commit comments

Comments
 (0)