Skip to content

Commit a78e9e4

Browse files
SecAI-Hubclaude
andcommitted
Add update verification with cosign checks, greenboot health checks, and auto-rollback (M24)
Staged update workflow (check → stage → apply) with cosign signature verification. Greenboot health check validates critical services, firewall rules, and integrity scripts on every boot — auto-rollback via rpm-ostree if checks fail (max 2 attempts). UI endpoints for update management and health monitoring. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9196040 commit a78e9e4

10 files changed

Lines changed: 1135 additions & 0 deletions

File tree

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Secure AI Appliance — Greenboot Health Check (M24)
4+
#
5+
# Runs on every boot via greenboot. If this script exits non-zero,
6+
# greenboot triggers an automatic rpm-ostree rollback + reboot.
7+
#
8+
# Checks:
9+
# 1. Critical systemd services are active
10+
# 2. Registry API is reachable
11+
# 3. Firewall rules are loaded
12+
# 4. Integrity check script exists
13+
# 5. Vault mapper device exists (if configured)
14+
# 6. securectl is executable
15+
#
16+
# Timeout: 5 minutes (configured in greenboot.conf)
17+
#
18+
19+
set -euo pipefail
20+
21+
HEALTH_LOG="/var/lib/secure-ai/logs/health-check.json"
22+
ROLLBACK_COUNTER="/run/secure-ai/rollback-count"
23+
MAX_ROLLBACKS=2
24+
BOOT_ID=$(cat /proc/sys/kernel/random/boot_id 2>/dev/null || echo "unknown")
25+
26+
log() {
27+
echo "[health-check] $*"
28+
logger -t secure-ai-health "$*" 2>/dev/null || true
29+
}
30+
31+
fail() {
32+
log "FAIL: $*"
33+
write_result "fail" "$*"
34+
35+
# Track rollback attempts
36+
local count=0
37+
if [ -f "$ROLLBACK_COUNTER" ]; then
38+
count=$(cat "$ROLLBACK_COUNTER" 2>/dev/null || echo "0")
39+
fi
40+
count=$((count + 1))
41+
42+
mkdir -p "$(dirname "$ROLLBACK_COUNTER")" 2>/dev/null || true
43+
echo "$count" > "$ROLLBACK_COUNTER"
44+
45+
if [ "$count" -ge "$MAX_ROLLBACKS" ]; then
46+
log "ERROR: max rollback attempts ($MAX_ROLLBACKS) reached — halting"
47+
write_result "fail" "max rollbacks reached: $*"
48+
# Don't exit non-zero here to prevent infinite rollback loop
49+
# System stays on current (broken) deployment for manual intervention
50+
exit 0
51+
fi
52+
53+
exit 1
54+
}
55+
56+
write_result() {
57+
local status="$1"
58+
local detail="${2:-}"
59+
mkdir -p "$(dirname "$HEALTH_LOG")" 2>/dev/null || true
60+
python3 -c "
61+
import json, hashlib
62+
from datetime import datetime
63+
entry = {
64+
'timestamp': datetime.now().isoformat(),
65+
'event': 'health_check',
66+
'status': '${status}',
67+
'detail': '${detail}',
68+
'boot_id': '${BOOT_ID}'
69+
}
70+
entry['hash'] = hashlib.sha256(json.dumps(entry, sort_keys=True).encode()).hexdigest()
71+
print(json.dumps(entry))
72+
" > "$HEALTH_LOG" 2>/dev/null || true
73+
}
74+
75+
# ── Check 1: Critical services ──
76+
log "Checking critical systemd services..."
77+
CRITICAL_SERVICES=(
78+
"nftables.service"
79+
)
80+
81+
# These services should be active if they were enabled
82+
OPTIONAL_SERVICES=(
83+
"secure-ai-registry.service"
84+
"secure-ai-tool-firewall.service"
85+
"secure-ai-ui.service"
86+
)
87+
88+
for svc in "${CRITICAL_SERVICES[@]}"; do
89+
if ! systemctl is-active --quiet "$svc" 2>/dev/null; then
90+
fail "critical service not active: $svc"
91+
fi
92+
log " $svc: active"
93+
done
94+
95+
for svc in "${OPTIONAL_SERVICES[@]}"; do
96+
if systemctl is-enabled --quiet "$svc" 2>/dev/null; then
97+
# Give services up to 60 seconds to start
98+
for i in $(seq 1 12); do
99+
if systemctl is-active --quiet "$svc" 2>/dev/null; then
100+
break
101+
fi
102+
sleep 5
103+
done
104+
if ! systemctl is-active --quiet "$svc" 2>/dev/null; then
105+
fail "enabled service failed to start: $svc"
106+
fi
107+
log " $svc: active"
108+
fi
109+
done
110+
111+
# ── Check 2: Registry API ──
112+
log "Checking registry API..."
113+
if systemctl is-enabled --quiet secure-ai-registry.service 2>/dev/null; then
114+
for i in $(seq 1 6); do
115+
if curl -sf http://127.0.0.1:8470/health >/dev/null 2>&1; then
116+
log " registry API: reachable"
117+
break
118+
fi
119+
if [ "$i" -eq 6 ]; then
120+
fail "registry API unreachable after 30s"
121+
fi
122+
sleep 5
123+
done
124+
fi
125+
126+
# ── Check 3: Firewall rules ──
127+
log "Checking firewall rules..."
128+
if command -v nft &>/dev/null; then
129+
if ! nft list ruleset 2>/dev/null | grep -q "secure_ai"; then
130+
fail "nftables secure_ai table not loaded"
131+
fi
132+
log " nftables: secure_ai table loaded"
133+
else
134+
fail "nft command not found"
135+
fi
136+
137+
# ── Check 4: Integrity scripts ──
138+
log "Checking integrity scripts..."
139+
for script in \
140+
/usr/libexec/secure-ai/securectl \
141+
/usr/libexec/secure-ai/verify-boot-chain.sh \
142+
/usr/libexec/secure-ai/canary-check.sh; do
143+
if [ ! -x "$script" ]; then
144+
fail "integrity script missing or not executable: $script"
145+
fi
146+
done
147+
log " integrity scripts: present"
148+
149+
# ── Check 5: Vault device ──
150+
log "Checking vault configuration..."
151+
if [ -f /etc/crypttab ]; then
152+
if grep -q "secure-ai-vault" /etc/crypttab 2>/dev/null; then
153+
log " vault: configured in crypttab"
154+
fi
155+
fi
156+
157+
# ── All checks passed ──
158+
log "All health checks passed"
159+
write_result "pass" "all checks passed"
160+
161+
# Clear rollback counter on success
162+
rm -f "$ROLLBACK_COUNTER" 2>/dev/null || true
163+
164+
exit 0

files/system/etc/secure-ai/config/appliance.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,31 @@ canary:
165165
- "/var/lib/secure-ai/keys/.canary"
166166
- "/etc/secure-ai/.canary"
167167

168+
emergency:
169+
# Emergency wipe / panic system (M23).
170+
# Three severity levels:
171+
# Level 1 — Lock: stop services, lock vault, invalidate sessions (reversible)
172+
# Level 2 — Wipe Keys: shred LUKS header, cosign keys, TPM2 keys, MOK key
173+
# Level 3 — Full Wipe: re-encrypt vault with random key (data unrecoverable),
174+
# clear memory, delete logs, registry, auth data
175+
# Levels 2+ require passphrase confirmation.
176+
# Countdown gives 5 seconds to cancel (skippable with --no-countdown).
177+
countdown_seconds: 5
178+
require_passphrase_level2: true
179+
audit_log: "/var/lib/secure-ai/logs/panic-audit.jsonl"
180+
181+
updates:
182+
# Update verification + auto-rollback (M24).
183+
# Cosign signature verification before applying rpm-ostree upgrades.
184+
# Staged updates: check → stage → apply workflow via UI.
185+
# Greenboot health checks: auto-rollback if post-update boot fails.
186+
# Max 2 rollback attempts before halting for manual intervention.
187+
cosign_verify: true
188+
auto_check_interval: 6 # hours
189+
max_rollback_attempts: 2
190+
health_check_timeout: 300 # seconds
191+
staged_updates: true # require explicit user confirmation before applying
192+
168193
logging:
169194
level: "info"
170195
store_raw_prompts: false
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
[Unit]
2+
Description=Secure AI Post-Update Health Check
3+
After=secure-ai-firstboot.service multi-user.target
4+
Wants=network-online.target
5+
6+
[Service]
7+
Type=oneshot
8+
ExecStart=/etc/greenboot/check/required.d/01-secure-ai-health.sh
9+
TimeoutStartSec=300
10+
11+
ProtectHome=yes
12+
PrivateTmp=yes
13+
ProtectKernelModules=yes
14+
ProtectControlGroups=yes
15+
LimitCORE=0
16+
17+
ReadWritePaths=/var/lib/secure-ai/logs /run/secure-ai
18+
19+
[Install]
20+
WantedBy=multi-user.target
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[Unit]
2+
Description=Secure AI Update Check (cosign-verified)
3+
After=network-online.target
4+
Wants=network-online.target
5+
6+
[Service]
7+
Type=oneshot
8+
ExecStart=/usr/libexec/secure-ai/update-verify.sh check
9+
10+
ProtectHome=yes
11+
PrivateTmp=yes
12+
ProtectKernelModules=yes
13+
ProtectControlGroups=yes
14+
LimitCORE=0
15+
16+
ReadWritePaths=/var/lib/secure-ai/logs /run/secure-ai
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[Unit]
2+
Description=Periodic Update Check (every 6 hours)
3+
4+
[Timer]
5+
OnBootSec=15min
6+
OnUnitActiveSec=6h
7+
RandomizedDelaySec=30min
8+
Persistent=true
9+
10+
[Install]
11+
WantedBy=timers.target

files/system/usr/libexec/secure-ai/firstboot.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,28 @@ log "Running initial canary verification..."
265265
log "WARNING: initial canary check failed"
266266
}
267267

268+
# --- Emergency wipe verification (M23) ---
269+
if [ -x /usr/libexec/secure-ai/securectl ]; then
270+
log "Emergency wipe tool (securectl) available"
271+
# Verify panic state directory exists
272+
mkdir -p /run/secure-ai 2>/dev/null || true
273+
else
274+
log "WARNING: securectl not found or not executable"
275+
fi
276+
277+
# --- Update verification + greenboot (M24) ---
278+
if [ -x /usr/libexec/secure-ai/update-verify.sh ]; then
279+
log "Update verification tool available"
280+
else
281+
log "WARNING: update-verify.sh not found or not executable"
282+
fi
283+
284+
if [ -x /etc/greenboot/check/required.d/01-secure-ai-health.sh ]; then
285+
log "Greenboot health check script available"
286+
else
287+
log "WARNING: greenboot health check not found"
288+
fi
289+
268290
# Write marker (read-only to prevent tampering)
269291
date -Iseconds > "$MARKER"
270292
chmod 444 "$MARKER"

0 commit comments

Comments
 (0)