99from fastapi import Depends , FastAPI
1010from fastapi .middleware .cors import CORSMiddleware
1111
12+ from . import storage
1213from .auth import verify_cf_access
13- from .clients import cloudflare , pbs , probes
14+ from .clients import cloudflare , pbs , probes , proxmox , unifi
1415from .config import get_settings
1516from .notify import NotificationCenter , run_notification_loop
16- from .routers import audit , backups , certs , network , services , system , tunnel
17+ from .routers import (
18+ audit ,
19+ backups ,
20+ certs ,
21+ cloudflare as cloudflare_router ,
22+ network ,
23+ services ,
24+ system ,
25+ tunnel ,
26+ )
1727
1828_settings = get_settings ()
1929logging .basicConfig (
@@ -70,12 +80,76 @@ async def _gather_notify_snapshot() -> dict:
7080 }
7181
7282
83+ async def _history_cleanup_loop () -> None :
84+ """Periodically drop samples older than the retention window."""
85+ while True :
86+ try :
87+ await asyncio .sleep (_settings .history_cleanup_interval_s )
88+ await storage .cleanup_old (_settings .history_retention_days )
89+ except asyncio .CancelledError :
90+ raise
91+ except Exception as e : # noqa: BLE001 - never let the loop die
92+ structlog .get_logger ().error (
93+ "history.cleanup_error" , error = str (e ), error_type = type (e ).__name__
94+ )
95+
96+
97+ async def _metrics_sample_loop () -> None :
98+ """Pull guest CPU/RAM and WAN throughput every tick and persist them.
99+
100+ Lets the dashboard draw 24h CPU/RAM-per-guest and 1h WAN-throughput
101+ charts without leaning on each upstream's (missing) history endpoints.
102+ Best-effort: if either upstream is unreachable on this tick we skip
103+ writing for that side rather than killing the loop.
104+ """
105+ interval = max (15 , _settings .metrics_sample_interval_s )
106+ log_ = structlog .get_logger ("metrics" )
107+ while True :
108+ try :
109+ await asyncio .sleep (interval )
110+ guests_task = asyncio .create_task (proxmox .fetch_guests (_settings ))
111+ net_task = asyncio .create_task (unifi .fetch_network_snapshot (_settings ))
112+ guests , net = await asyncio .gather (
113+ guests_task , net_task , return_exceptions = True
114+ )
115+ if isinstance (guests , list ):
116+ rows = [
117+ (g .id , float (g .cpu_pct ), int (g .ram_used_b ), int (g .ram_total_b ))
118+ for g in guests
119+ if g .running and g .type != "HOST"
120+ ]
121+ await storage .record_guest_metrics (rows )
122+ else :
123+ log_ .info ("metrics.guests_skip" , error = str (guests ))
124+ if not isinstance (net , BaseException ) and net .reachable :
125+ await storage .record_network_metrics (
126+ float (net .throughput_down_mbit ), float (net .throughput_up_mbit )
127+ )
128+ elif isinstance (net , BaseException ):
129+ log_ .info ("metrics.network_skip" , error = str (net ))
130+ except asyncio .CancelledError :
131+ raise
132+ except Exception as e : # noqa: BLE001 - never let the loop die
133+ log_ .error (
134+ "metrics.sample_error" , error = str (e ), error_type = type (e ).__name__
135+ )
136+
137+
73138@asynccontextmanager
74139async def lifespan (app : FastAPI ):
75- task : asyncio .Task | None = None
140+ notify_task : asyncio .Task | None = None
141+ cleanup_task : asyncio .Task | None = None
142+ metrics_task : asyncio .Task | None = None
143+
144+ await storage .ensure_schema (_settings )
145+ if storage .is_enabled ():
146+ cleanup_task = asyncio .create_task (_history_cleanup_loop ())
147+ if _settings .metrics_sample_interval_s > 0 :
148+ metrics_task = asyncio .create_task (_metrics_sample_loop ())
149+
76150 if _settings .notify_webhook_url :
77151 center = NotificationCenter (settings = _settings )
78- task = asyncio .create_task (
152+ notify_task = asyncio .create_task (
79153 run_notification_loop (
80154 center , _gather_notify_snapshot , interval_s = _settings .notify_interval_s
81155 )
@@ -86,7 +160,9 @@ async def lifespan(app: FastAPI):
86160 try :
87161 yield
88162 finally :
89- if task :
163+ for task in (notify_task , cleanup_task , metrics_task ):
164+ if task is None :
165+ continue
90166 task .cancel ()
91167 try :
92168 await task
@@ -135,3 +211,5 @@ async def me(claims: dict = Depends(verify_cf_access)) -> dict:
135211app .include_router (network .router )
136212app .include_router (certs .router )
137213app .include_router (audit .router )
214+ app .include_router (audit .events_router )
215+ app .include_router (cloudflare_router .router )
0 commit comments