Skip to content

Commit e4be65d

Browse files
authored
Merge pull request #23 from rxf-sys/claude/auth-accounts
fix: address audit findings — restart 500, drawer a11y, polling, erro…
2 parents 9088cc2 + 6b20969 commit e4be65d

19 files changed

Lines changed: 381 additions & 180 deletions

backend/app/clients/cloudflare.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,10 @@ async def _get_paginated(
9494

9595
async def fetch_tunnel_status(settings: Settings) -> TunnelStatus:
9696
if not (settings.cf_api_token and settings.cf_account_id and settings.cf_tunnel_id):
97-
return TunnelStatus()
97+
return TunnelStatus(
98+
reachable=False,
99+
error="CF_API_TOKEN, CF_ACCOUNT_ID oder CF_TUNNEL_ID nicht konfiguriert",
100+
)
98101
async with httpx.AsyncClient(timeout=8.0) as client:
99102
try:
100103
tunnel = await _get(
@@ -114,7 +117,12 @@ async def fetch_tunnel_status(settings: Settings) -> TunnelStatus:
114117
error=str(e),
115118
error_type=type(e).__name__,
116119
)
117-
return TunnelStatus(id=settings.cf_tunnel_id, status="unknown")
120+
return TunnelStatus(
121+
id=settings.cf_tunnel_id,
122+
status="unknown",
123+
reachable=False,
124+
error=f"Cloudflare-API nicht erreichbar: {type(e).__name__}",
125+
)
118126

119127
raw_status = (tunnel.get("status") if isinstance(tunnel, dict) else None) or "unknown"
120128
mapped = {"healthy": "healthy", "degraded": "degraded", "down": "down", "inactive": "down"}.get(
@@ -150,15 +158,17 @@ async def fetch_wan_ip() -> str | None:
150158
return None
151159

152160

153-
async def fetch_certs(settings: Settings) -> list[CertInfo]:
161+
async def fetch_certs(settings: Settings) -> tuple[list[CertInfo], str | None]:
162+
"""Return ``(certs, error)``. ``error`` is None on success, otherwise a
163+
human-readable reason (unconfigured / API unreachable)."""
154164
if not (settings.cf_api_token and settings.cf_zone_id):
155-
return []
165+
return [], "CF_API_TOKEN oder CF_ZONE_ID nicht konfiguriert"
156166
async with httpx.AsyncClient(timeout=8.0) as client:
157167
try:
158168
packs = await _get(client, settings, f"/zones/{settings.cf_zone_id}/ssl/certificate_packs")
159169
except httpx.HTTPError as e:
160170
log.warning("cloudflare.certs_failed", zone=settings.cf_zone_id, error=str(e))
161-
return []
171+
return [], f"Cloudflare-API nicht erreichbar: {type(e).__name__}"
162172
out: list[CertInfo] = []
163173
now = datetime.now(timezone.utc)
164174
if isinstance(packs, list):
@@ -188,7 +198,7 @@ async def fetch_certs(settings: Settings) -> list[CertInfo]:
188198
key = (c.domain, c.issuer)
189199
if key not in dedup or dedup[key].days_left > c.days_left:
190200
dedup[key] = c
191-
return sorted(dedup.values(), key=lambda c: c.days_left)
201+
return sorted(dedup.values(), key=lambda c: c.days_left), None
192202

193203

194204
async def fetch_access_sessions(settings: Settings, hours: int = 24, limit: int = 100) -> dict:

backend/app/clients/proxmox.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -367,21 +367,29 @@ async def restart_guest(settings: Settings, vmid: int, gtype: str) -> bool:
367367
the reboot call alone just means the task was queued — for VMs/CTs that
368368
fail to shut down cleanly the actual reboot can still error out, and we
369369
want the dashboard's success toast to reflect that.
370+
371+
Any transport-level failure (PVE unreachable, timeout) is swallowed and
372+
reported as ``False`` so the API never 500s on a restart request.
370373
"""
371374
path_type = "lxc" if gtype.lower() in ("lxc", "ct") else "qemu"
372-
async with httpx.AsyncClient(verify=settings.proxmox_verify_tls, timeout=10.0) as client:
373-
r = await client.post(
374-
f"{_base_url(settings)}/nodes/{settings.proxmox_node}/{path_type}/{vmid}/status/reboot",
375-
headers=_auth_header(settings),
376-
)
377-
if r.status_code not in (200, 202):
378-
return False
379-
# PVE returns the UPID as the `data` field of a plain JSON envelope.
380-
try:
381-
upid = r.json().get("data")
382-
except ValueError:
383-
upid = None
384-
if not isinstance(upid, str) or not upid.startswith("UPID:"):
385-
# No UPID — fall back to the legacy "accepted == success" semantics.
386-
return True
387-
return await _wait_for_task(client, settings, upid)
375+
try:
376+
async with httpx.AsyncClient(verify=settings.proxmox_verify_tls, timeout=10.0) as client:
377+
r = await client.post(
378+
f"{_base_url(settings)}/nodes/{settings.proxmox_node}/{path_type}/{vmid}/status/reboot",
379+
headers=_auth_header(settings),
380+
)
381+
if r.status_code not in (200, 202):
382+
log.info("proxmox.restart_rejected", vmid=vmid, status=r.status_code)
383+
return False
384+
# PVE returns the UPID as the `data` field of a plain JSON envelope.
385+
try:
386+
upid = r.json().get("data")
387+
except ValueError:
388+
upid = None
389+
if not isinstance(upid, str) or not upid.startswith("UPID:"):
390+
# No UPID — fall back to the legacy "accepted == success" semantics.
391+
return True
392+
return await _wait_for_task(client, settings, upid)
393+
except httpx.HTTPError as e:
394+
log.warning("proxmox.restart_failed", vmid=vmid, error=str(e), error_type=type(e).__name__)
395+
return False

backend/app/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,10 @@ async def _gather_notify_snapshot() -> dict:
7676
if not isinstance(backup_summary, BaseException)
7777
else None
7878
),
79+
# fetch_certs returns a (certs, error) tuple; unwrap the list half.
7980
"certs": [
8081
{"domain": c.domain, "days_left": c.days_left}
81-
for c in (certs_list if isinstance(certs_list, list) else [])
82+
for c in (certs_list[0] if isinstance(certs_list, tuple) else [])
8283
],
8384
}
8485

backend/app/models.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,11 @@ class TunnelStatus(BaseModel):
9191
regions: list[str] = Field(default_factory=list)
9292
cloudflared_version: str | None = None
9393
wan_ip: str | None = None
94+
# ``reachable`` is False both when Cloudflare credentials are missing and
95+
# when the API call fails — ``error`` carries the reason so the UI can
96+
# tell "not configured" apart from a real outage.
97+
reachable: bool = True
98+
error: str | None = None
9499

95100

96101
# ---------- Backups ----------
@@ -178,3 +183,7 @@ class DNSRecordCheck(BaseModel):
178183
class CertsSnapshot(BaseModel):
179184
certs: list[CertInfo]
180185
dns: list[DNSRecordCheck]
186+
# False when the Cloudflare API is unconfigured or unreachable, so the UI
187+
# can tell "no certs" apart from "couldn't ask Cloudflare".
188+
reachable: bool = True
189+
error: str | None = None

backend/app/routers/certs.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,16 @@
1616
@router.get("", response_model=CertsSnapshot)
1717
async def get_certs(settings: Settings = Depends(get_settings)) -> CertsSnapshot:
1818
async def loader() -> CertsSnapshot:
19-
certs, dns = await asyncio.gather(
19+
certs_result, dns = await asyncio.gather(
2020
cloudflare.fetch_certs(settings),
2121
cloudflare.fetch_dns_consistency(settings),
2222
)
23-
return CertsSnapshot(certs=certs, dns=dns)
23+
certs, cert_error = certs_result
24+
return CertsSnapshot(
25+
certs=certs,
26+
dns=dns,
27+
reachable=cert_error is None,
28+
error=cert_error,
29+
)
2430

2531
return await cache.get_or_set("certs", settings.cache_ttl_certs, loader)

backend/tests/test_cloudflare.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,9 @@ async def test_certs_dedupes_and_sorts(settings):
107107
},
108108
)
109109

110-
certs = await cloudflare.fetch_certs(settings)
110+
certs, error = await cloudflare.fetch_certs(settings)
111111

112+
assert error is None
112113
assert len(certs) == 1
113114
assert certs[0].domain == "example.test"
114115
assert certs[0].days_left <= 10

backend/tests/test_proxmox.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,3 +294,17 @@ async def test_restart_guest_lxc_without_upid_falls_back_to_http_status(settings
294294
ok = await proxmox.restart_guest(settings, 101, "lxc")
295295

296296
assert ok is True
297+
298+
299+
@pytest.mark.asyncio
300+
@respx.mock
301+
async def test_restart_guest_returns_false_when_pve_unreachable(settings):
302+
"""A transport error must degrade to False, not bubble up as a 500."""
303+
base = f"https://{settings.proxmox_host}:{settings.proxmox_port}/api2/json"
304+
respx.post(f"{base}/nodes/{settings.proxmox_node}/lxc/101/status/reboot").mock(
305+
side_effect=httpx.ConnectError("connection refused")
306+
)
307+
308+
ok = await proxmox.restart_guest(settings, 101, "lxc")
309+
310+
assert ok is False

frontend/src/App.tsx

Lines changed: 82 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,17 @@ import type { Account, BackupSnapshot, Guest } from './types';
2828

2929
const SECTION_KEYS: Section[] = ['overview', 'server', 'network', 'backup', 'cloudflare', 'settings'];
3030

31+
// Human-readable section names for the tabpanel's aria-label.
32+
const SECTION_LABELS: Record<Section, string> = {
33+
overview: 'Übersicht',
34+
server: 'Server',
35+
network: 'Netzwerk',
36+
backup: 'Backup',
37+
cloudflare: 'Cloudflare',
38+
admin: 'Konten',
39+
settings: 'Einstellungen',
40+
};
41+
3142
export function App() {
3243
const auth = useAuth();
3344

@@ -54,6 +65,7 @@ function Dashboard({ user, onLogout }: DashboardProps) {
5465
const [selectedSvc, setSelectedSvc] = useState<string | null>(null);
5566
const [logsGuest, setLogsGuest] = useState<Guest | null>(null);
5667
const [confirmGuest, setConfirmGuest] = useState<Guest | null>(null);
68+
const [verifyTarget, setVerifyTarget] = useState<BackupSnapshot | null>(null);
5769
const [toasts, setToasts] = useState<Toast[]>([]);
5870
const [paletteOpen, setPaletteOpen] = useState(false);
5971
const [helpOpen, setHelpOpen] = useState(false);
@@ -191,22 +203,19 @@ function Dashboard({ user, onLogout }: DashboardProps) {
191203
}
192204
};
193205

194-
const onVerifyBackup = useCallback(
195-
async (snap: BackupSnapshot) => {
196-
const ok = window.confirm(
197-
`Verifikation für ${snap.target} (${new Date(snap.backup_time * 1000).toLocaleString()}) starten?`,
198-
);
199-
if (!ok) return;
200-
try {
201-
const r = await api.verifyBackup(snap.backup_type, snap.backup_id, snap.backup_time);
202-
pushToast({ level: 'ok', title: `Verify gestartet · ${snap.target}`, body: `UPID: ${r.upid.slice(0, 32)}…` });
203-
setTimeout(bkp.refresh, 2000);
204-
} catch (e) {
205-
pushToast({ level: 'err', title: 'Verify fehlgeschlagen', body: (e as Error).message });
206-
}
207-
},
208-
[pushToast, bkp],
209-
);
206+
// Backup verification routes through the ConfirmModal (no native window.confirm).
207+
const confirmVerify = useCallback(async () => {
208+
const snap = verifyTarget;
209+
if (!snap) return;
210+
setVerifyTarget(null);
211+
try {
212+
const r = await api.verifyBackup(snap.backup_type, snap.backup_id, snap.backup_time);
213+
pushToast({ level: 'ok', title: `Verify gestartet · ${snap.target}`, body: `UPID: ${r.upid.slice(0, 32)}…` });
214+
setTimeout(bkp.refresh, 2000);
215+
} catch (e) {
216+
pushToast({ level: 'err', title: 'Verify fehlgeschlagen', body: (e as Error).message });
217+
}
218+
}, [verifyTarget, pushToast, bkp]);
210219

211220
const onSnapshot = useCallback(async () => {
212221
const snapshot = {
@@ -234,15 +243,28 @@ function Dashboard({ user, onLogout }: DashboardProps) {
234243
useEffect(() => {
235244
if (errSig === lastErrSig.current) return;
236245
lastErrSig.current = errSig;
237-
const errs: { name: string; err: Error | null }[] = [
246+
const failed = [
238247
{ name: 'System', err: sys.error },
239248
{ name: 'Services', err: svc.error },
240249
{ name: 'Tunnel', err: tun.error },
241250
{ name: 'Backups', err: bkp.error },
242251
{ name: 'Netzwerk', err: net.error },
243-
];
244-
for (const { name, err } of errs) {
245-
if (err) pushToast({ level: 'err', title: `${name}-API Fehler`, body: err.message });
252+
].filter((e) => e.err);
253+
if (failed.length === 0) return;
254+
// Bundle simultaneous failures into a single toast instead of flooding
255+
// the user with one per endpoint.
256+
if (failed.length === 1) {
257+
pushToast({
258+
level: 'err',
259+
title: `${failed[0].name}-API Fehler`,
260+
body: failed[0].err!.message,
261+
});
262+
} else {
263+
pushToast({
264+
level: 'err',
265+
title: `${failed.length} API-Endpunkte nicht erreichbar`,
266+
body: failed.map((e) => e.name).join(', '),
267+
});
246268
}
247269
}, [errSig, sys.error, svc.error, tun.error, bkp.error, net.error, pushToast]);
248270

@@ -297,22 +319,23 @@ function Dashboard({ user, onLogout }: DashboardProps) {
297319
<button className="btn" type="button" onClick={() => setPaused(false)}>Fortsetzen</button>
298320
</div>
299321
)}
300-
<main className="dash-main" id={`section-${section}`} role="tabpanel" aria-label={section}>
322+
<main className="dash-main" id={`section-${section}`} role="tabpanel" aria-label={SECTION_LABELS[section]}>
301323
{section === 'overview' && (
302324
<>
303325
<AttentionHero
304326
guests={guests}
305327
services={services}
306328
certs={cer.data?.certs ?? []}
307329
backups={bkp.data}
330+
tunnel={tun.data}
308331
certWarnDays={ui.certWarnDays}
309332
onInspectService={setSelectedSvc}
310333
onInspectGuest={onInspectGuest}
311334
/>
312335
<HostPanel host={sys.data?.host ?? null} guests={guests} />
313336
<div className="quick-stats">
314337
<KpiStrip guests={guests} services={services} />
315-
<AuditLog />
338+
<AuditLog pollMs={pollFast} />
316339
</div>
317340
<ServiceGrid
318341
services={services}
@@ -327,7 +350,7 @@ function Dashboard({ user, onLogout }: DashboardProps) {
327350
<HostPanel host={sys.data?.host ?? null} guests={guests} />
328351
<div className="quick-stats">
329352
<KpiStrip guests={guests} services={services} />
330-
<AuditLog />
353+
<AuditLog pollMs={pollFast} />
331354
</div>
332355
<VMTable guests={guests} onLogs={onLogs} onRestart={onRestart} />
333356
<ServiceGrid
@@ -338,12 +361,14 @@ function Dashboard({ user, onLogout }: DashboardProps) {
338361
/>
339362
</>
340363
)}
341-
{section === 'network' && <NetworkPanel network={net.data} tunnel={tun.data} />}
364+
{section === 'network' && (
365+
<NetworkPanel network={net.data} tunnel={tun.data} pollMs={pollFast} />
366+
)}
342367
{section === 'backup' && (
343368
<BackupsSection
344369
backups={bkp.data}
345370
guests={guests}
346-
onVerify={onVerifyBackup}
371+
onVerify={setVerifyTarget}
347372
onOpenGuest={onLogs}
348373
/>
349374
)}
@@ -354,6 +379,7 @@ function Dashboard({ user, onLogout }: DashboardProps) {
354379
services={services}
355380
zoneName="rxf-sys.de"
356381
onSelectService={setSelectedSvc}
382+
pollMs={pollCerts}
357383
/>
358384
)}
359385
{section === 'admin' && isAdmin && (
@@ -392,10 +418,40 @@ function Dashboard({ user, onLogout }: DashboardProps) {
392418
/>
393419
<ConfirmModal
394420
open={!!confirmGuest}
395-
guest={confirmGuest}
421+
title="Container neu starten?"
422+
message={
423+
confirmGuest && (
424+
<>
425+
<p style={{ margin: '0 0 8px' }}>
426+
<strong>{confirmGuest.name}</strong> ({confirmGuest.type} {confirmGuest.id}) wird neu gestartet.
427+
</p>
428+
<p style={{ margin: 0, fontSize: 12, color: 'var(--text-3)' }}>
429+
Die zugehörigen Services sind dabei ca. 30–60 Sekunden nicht erreichbar.
430+
</p>
431+
</>
432+
)
433+
}
434+
confirmLabel="Restart"
435+
danger
396436
onConfirm={confirmRestart}
397437
onCancel={() => setConfirmGuest(null)}
398438
/>
439+
<ConfirmModal
440+
open={!!verifyTarget}
441+
title="Backup verifizieren?"
442+
message={
443+
verifyTarget && (
444+
<p style={{ margin: 0 }}>
445+
Verifikation für <strong>{verifyTarget.target}</strong> (
446+
{new Date(verifyTarget.backup_time * 1000).toLocaleString()}) starten? Der Job läuft
447+
asynchron auf dem PBS.
448+
</p>
449+
)
450+
}
451+
confirmLabel="Verify starten"
452+
onConfirm={confirmVerify}
453+
onCancel={() => setVerifyTarget(null)}
454+
/>
399455
<CommandPalette
400456
open={paletteOpen}
401457
onClose={() => setPaletteOpen(false)}

0 commit comments

Comments
 (0)