Skip to content

Commit 747c4ec

Browse files
sjmiller609claude
andcommitted
Split wrapper into identity-free and identity-bound phases
Boot now layers as: Phase A: xorg/xvfb, dbus, chromedriver + envoy cert install (cert generation/CA trust/NSS DB - no per-instance envs read) Phase B: mutter, chromium, neko (X/dbus consumers, identity-free) Phase C: envoy template render + envoy and kernel-images-api restart (gated on INST_NAME/METRO_NAME/XDS_SERVER/KERNEL_INSTANCE_JWT) init-envoy.sh now takes a phase argument (certs|config|all). The wrapper calls certs early so chromium can boot with the proxy cert in trust, and defers config (template render + supervisorctl start) until Phase C. A FORK HOOK comment marks the Phase B/C boundary: post-snapshot restore, fresh identity envs need to be received from the host before Phase C runs, and `supervisorctl restart` makes the same Phase C code path safe on both boot (start cold service) and fork (stop+start to drop stale identity). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent bcdbc28 commit 747c4ec

2 files changed

Lines changed: 159 additions & 84 deletions

File tree

server/cmd/wrapper/main.go

Lines changed: 60 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -128,24 +128,24 @@ func main() {
128128
}
129129
waitForSocket(supervisorSock, 10*time.Second)
130130

131-
// Envoy bootstrap: cert generation, NSS DB, template render, and
132-
// `supervisorctl start envoy`. Run concurrently with Phase A so the
133-
// shell-out work (openssl, certutil, update-ca-certificates) overlaps
134-
// xorg/dbus/chromedriver bring-up. Phase B (chromium) gates on this
135-
// because chromium reads the system CA trust store at process start
136-
// and needs the envoy self-signed cert in place. The envoy listener
137-
// itself (port 3128) is probed in waitAllReady, not here.
138-
envoyDone := make(chan struct{})
131+
// Envoy cert work (openssl, update-ca-certificates, certutil) is the
132+
// only piece of Envoy bring-up that's identity-free, and it has to land
133+
// before chromium starts because chromium reads the system CA trust
134+
// store at process start. Run it concurrently with Phase A so the
135+
// shell-out cost overlaps xorg/dbus/chromedriver bring-up. Template
136+
// render and `supervisorctl start envoy` happen later in Phase C —
137+
// those depend on INST_NAME/METRO_NAME/XDS_SERVER/KERNEL_INSTANCE_JWT.
138+
envoyCertsDone := make(chan struct{})
139139
if isExecutable("/usr/local/bin/init-envoy.sh") {
140140
go func() {
141-
defer close(envoyDone)
142-
runStream("envoy-init", "/usr/local/bin/init-envoy.sh")
141+
defer close(envoyCertsDone)
142+
runStream("envoy-init", "/usr/local/bin/init-envoy.sh", "certs")
143143
}()
144144
} else {
145-
close(envoyDone)
145+
close(envoyCertsDone)
146146
}
147147

148-
// Phase A: services with no X/dbus/chromium dependency. chromedriver
148+
// Phase A: identity-free services with no X/dbus dependency. chromedriver
149149
// listens on 9225 immediately and only attaches to chromium on session
150150
// creation, so it can come up alongside the display stack.
151151
xServer := "xorg"
@@ -161,25 +161,52 @@ func main() {
161161
// parallel with chromium.
162162
_ = os.WriteFile(filepath.Join(supervisordLogD, "chromium"), nil, 0o644)
163163

164-
// Gate chromium on envoy cert/template work being done.
165-
<-envoyDone
164+
// Gate chromium on the envoy cert being installed in the trust store.
165+
<-envoyCertsDone
166166

167-
// Phase B: everything that needs X+dbus, started in a single supervisorctl
168-
// invocation. On headful, mutter is the compositor and neko/api come up
169-
// alongside chromium so their bring-up overlaps with chromium boot rather
170-
// than trailing CDP. Headless has no compositor and no neko.
167+
// Phase B: identity-free X/dbus consumers. Chromium itself doesn't read
168+
// any per-instance identity envs — it just needs the envoy cert (Phase A)
169+
// in trust. mutter is the compositor on headful; neko is the WebRTC
170+
// streamer when ENABLE_WEBRTC=true.
171171
webrtc := prof == profileHeadful && os.Getenv("ENABLE_WEBRTC") == "true"
172172
var phaseB []string
173173
if prof == profileHeadful {
174-
phaseB = []string{"mutter", "chromium", "kernel-images-api"}
174+
phaseB = []string{"mutter", "chromium"}
175175
if webrtc {
176176
phaseB = append(phaseB, "neko")
177177
}
178178
} else {
179-
phaseB = []string{"chromium", "kernel-images-api"}
179+
phaseB = []string{"chromium"}
180180
}
181181
startAll(phaseB...)
182182

183+
// FORK HOOK:
184+
// When this binary runs as a forked snapshot restore, the per-fork
185+
// identity envs (INST_NAME, METRO_NAME, XDS_SERVER, KERNEL_INSTANCE_JWT,
186+
// plus any future per-tenant secrets) won't be set yet at this point —
187+
// the snapshot was taken from a different instance. Insert the
188+
// following sequence here once the env-delivery channel exists:
189+
// 1. Block on the host-pushed env bundle (vsock socket, virtio-fs
190+
// drop file, or whatever transport the control plane settles on).
191+
// 2. Apply the bundle to this process's environ via os.Setenv so
192+
// Phase C below picks them up via the existing $VAR expansion in
193+
// init-envoy.sh and the supervisorctl-spawned services inherit
194+
// them.
195+
// 3. Phase C uses `supervisorctl restart envoy` (idempotent — start
196+
// on first boot, stop+start on a re-render after fork) so a
197+
// restored snapshot drops its stale identity cleanly.
198+
// Boot path keeps running through unchanged: the wait simply no-ops
199+
// when there's no fork bundle to receive.
200+
201+
// Phase C: identity-bound. Render envoy bootstrap with INST_NAME/JWT/etc
202+
// and (re)start envoy + kernel-images-api. Both services use `restart`
203+
// so the same code path works for boot (start a stopped service) and
204+
// post-fork (stop+start to force a re-read of refreshed envs).
205+
if isExecutable("/usr/local/bin/init-envoy.sh") {
206+
runStream("envoy-init", "/usr/local/bin/init-envoy.sh", "config")
207+
}
208+
restartAll("kernel-images-api")
209+
183210
// Wait for the union of caller-visible ready signals. Each probe runs
184211
// concurrently and logs as soon as its target is reachable.
185212
waitAllReady(t0, webrtc)
@@ -219,10 +246,22 @@ func main() {
219246
// supervisorctl once (it accepts multiple args) so we don't pay python
220247
// cold-start costs per service.
221248
func startAll(progs ...string) {
249+
supervisorctl("start", progs...)
250+
}
251+
252+
// restartAll is the start-or-stop+start variant. It's used for services
253+
// that may already be running from a snapshot restore (post-fork, see the
254+
// FORK HOOK in main) so they pick up refreshed envs cleanly. supervisorctl
255+
// `restart` is a no-op stop on cold programs followed by a normal start.
256+
func restartAll(progs ...string) {
257+
supervisorctl("restart", progs...)
258+
}
259+
260+
func supervisorctl(verb string, progs ...string) {
222261
if len(progs) == 0 {
223262
return
224263
}
225-
args := append([]string{"-c", supervisorConf, "start"}, progs...)
264+
args := append([]string{"-c", supervisorConf, verb}, progs...)
226265
cmd := exec.Command("supervisorctl", args...)
227266
cmd.Stdout = os.Stdout
228267
cmd.Stderr = os.Stderr

shared/envoy/init-envoy.sh

Lines changed: 99 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,37 @@
22

33
set -o pipefail -o errexit -o nounset
44

5-
# The browser instance JWT is the sole token contract for xDS and host-local
6-
# services in the image runtime.
7-
INSTANCE_JWT="${KERNEL_INSTANCE_JWT:-}"
8-
9-
# Check for required environment variables, to see if envoy is enabled
10-
if [[ -z "${INST_NAME:-}" || -z "${METRO_NAME:-}" || -z "${XDS_SERVER:-}" || -z "${INSTANCE_JWT:-}" ]]; then
11-
echo "[envoy-init] Required environment variables not set. Skipping Envoy initialization."
12-
exit 0
13-
fi
14-
15-
# Also check for template file
16-
if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then
17-
echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping Envoy initialization."
18-
exit 0
19-
fi
20-
21-
echo "[envoy-init] Preparing Envoy bootstrap configuration"
22-
mkdir -p /etc/envoy
23-
24-
# Generate self-signed certificates for TLS forward proxy
25-
echo "[envoy-init] Generating self-signed certificates for TLS forward proxy"
26-
mkdir -p /etc/envoy/certs
27-
28-
if [[ ! -f /etc/envoy/certs/proxy.crt || ! -f /etc/envoy/certs/proxy.key ]]; then
5+
# Phase argument lets the Go wrapper split the script into an identity-free
6+
# stage (certs/CA trust/NSS DB — runs early so chromium boots with the cert
7+
# already trusted) and an identity-bound stage (template render with
8+
# INST_NAME/METRO_NAME/XDS_SERVER/KERNEL_INSTANCE_JWT, then envoy start).
9+
# certs — generate self-signed cert and install it in trust stores
10+
# config — render bootstrap template and start envoy via supervisord
11+
# all — both phases (default; preserves legacy single-call behavior)
12+
PHASE="${1:-all}"
13+
14+
case "$PHASE" in
15+
certs|config|all) ;;
16+
*)
17+
echo "[envoy-init] Unknown phase: $PHASE (expected certs|config|all)" >&2
18+
exit 2
19+
;;
20+
esac
21+
22+
run_certs() {
23+
if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then
24+
echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping cert generation."
25+
return 0
26+
fi
27+
28+
echo "[envoy-init] Generating self-signed certificates for TLS forward proxy"
29+
mkdir -p /etc/envoy/certs
30+
31+
if [[ -f /etc/envoy/certs/proxy.crt && -f /etc/envoy/certs/proxy.key ]]; then
32+
echo "[envoy-init] Certificates already exist, skipping generation"
33+
return 0
34+
fi
35+
2936
echo "[envoy-init] Creating new self-signed certificate"
3037
openssl req -x509 -nodes -days 3650 -newkey rsa:2048 \
3138
-keyout /etc/envoy/certs/proxy.key \
@@ -34,46 +41,75 @@ if [[ ! -f /etc/envoy/certs/proxy.crt || ! -f /etc/envoy/certs/proxy.key ]]; the
3441
-addext "subjectAltName = DNS:localhost,IP:127.0.0.1" \
3542
2>&1 | sed 's/^/[envoy-init] /'
3643
echo "[envoy-init] Certificate generated successfully"
37-
38-
# Add certificate to system trust store for Chrome/Chromium
39-
echo "[envoy-init] Adding certificate to system trust store"
40-
cp /etc/envoy/certs/proxy.crt /usr/local/share/ca-certificates/kernel-envoy-proxy.crt
41-
cp /etc/envoy/certs/proxy.crt /kernel-envoy-proxy.crt
42-
update-ca-certificates 2>&1 | sed 's/^/[envoy-init] /'
43-
echo "[envoy-init] Certificate added to system trust store"
44-
if [[ "${RUN_AS_ROOT:-}" == "true" ]]; then
44+
45+
echo "[envoy-init] Adding certificate to system trust store"
46+
cp /etc/envoy/certs/proxy.crt /usr/local/share/ca-certificates/kernel-envoy-proxy.crt
47+
cp /etc/envoy/certs/proxy.crt /kernel-envoy-proxy.crt
48+
update-ca-certificates 2>&1 | sed 's/^/[envoy-init] /'
49+
echo "[envoy-init] Certificate added to system trust store"
50+
51+
if [[ "${RUN_AS_ROOT:-}" == "true" ]]; then
4552
mkdir -p /root/.pki/nssdb
4653
certutil -d /root/.pki/nssdb -N --empty-password 2>/dev/null || true
4754
certutil -d /root/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt
4855
echo "[envoy-init] Certificate added to nssdb as root"
49-
else
50-
mkdir -p /home/kernel/.pki/nssdb
51-
certutil -d /home/kernel/.pki/nssdb -N --empty-password 2>/dev/null || true
52-
certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt
53-
chown -R kernel:kernel /home/kernel/.pki
54-
echo "[envoy-init] Certificate added to nssdb as kernel"
55-
fi
56-
echo "[envoy-init] Certificate added to nssdb"
57-
else
58-
echo "[envoy-init] Certificates already exist, skipping generation"
59-
fi
60-
61-
# Render template with provided environment variables
62-
echo "[envoy-init] Rendering template with INST_NAME=${INST_NAME}, METRO_NAME=${METRO_NAME}, XDS_SERVER=${XDS_SERVER}, KERNEL_INSTANCE_JWT=***"
63-
inst_esc=$(printf '%s' "$INST_NAME" | sed -e 's/[\/&]/\\&/g')
64-
metro_esc=$(printf '%s' "$METRO_NAME" | sed -e 's/[\/&]/\\&/g')
65-
xds_esc=$(printf '%s' "$XDS_SERVER" | sed -e 's/[\/&]/\\&/g')
66-
jwt_esc=$(printf '%s' "$INSTANCE_JWT" | sed -e 's/[\/&]/\\&/g')
67-
sed -e "s|{INST_NAME}|$inst_esc|g" \
68-
-e "s|{METRO_NAME}|$metro_esc|g" \
69-
-e "s|{XDS_SERVER}|$xds_esc|g" \
70-
-e "s|{KERNEL_INSTANCE_JWT}|$jwt_esc|g" \
71-
/etc/envoy/templates/bootstrap.yaml > /etc/envoy/bootstrap.yaml
72-
73-
echo "[envoy-init] Starting Envoy via supervisord"
74-
supervisorctl -c /etc/supervisor/supervisord.conf start envoy
75-
76-
# Readiness (port 3128 reachable) is now probed by the Go wrapper's
77-
# waitAllReady alongside CDP/chromedriver, so this script returns as soon
78-
# as the start request has been issued. Removing the in-script poll lets
79-
# init-envoy.sh run concurrently with Phase A bring-up.
56+
else
57+
mkdir -p /home/kernel/.pki/nssdb
58+
certutil -d /home/kernel/.pki/nssdb -N --empty-password 2>/dev/null || true
59+
certutil -d /home/kernel/.pki/nssdb -A -t "C,," -n "Kernel Envoy Proxy" -i /etc/envoy/certs/proxy.crt
60+
chown -R kernel:kernel /home/kernel/.pki
61+
echo "[envoy-init] Certificate added to nssdb as kernel"
62+
fi
63+
}
64+
65+
run_config() {
66+
# Identity envs gate the config phase: without them xDS can't bind, so
67+
# render+start is a no-op on images that don't run with a JWT.
68+
INSTANCE_JWT="${KERNEL_INSTANCE_JWT:-}"
69+
if [[ -z "${INST_NAME:-}" || -z "${METRO_NAME:-}" || -z "${XDS_SERVER:-}" || -z "${INSTANCE_JWT:-}" ]]; then
70+
echo "[envoy-init] Required environment variables not set. Skipping Envoy config/start."
71+
return 0
72+
fi
73+
74+
if [[ ! -f /etc/envoy/templates/bootstrap.yaml ]]; then
75+
echo "[envoy-init] Template file /etc/envoy/templates/bootstrap.yaml not found. Skipping Envoy config/start."
76+
return 0
77+
fi
78+
79+
echo "[envoy-init] Preparing Envoy bootstrap configuration"
80+
mkdir -p /etc/envoy
81+
82+
echo "[envoy-init] Rendering template with INST_NAME=${INST_NAME}, METRO_NAME=${METRO_NAME}, XDS_SERVER=${XDS_SERVER}, KERNEL_INSTANCE_JWT=***"
83+
inst_esc=$(printf '%s' "$INST_NAME" | sed -e 's/[\/&]/\\&/g')
84+
metro_esc=$(printf '%s' "$METRO_NAME" | sed -e 's/[\/&]/\\&/g')
85+
xds_esc=$(printf '%s' "$XDS_SERVER" | sed -e 's/[\/&]/\\&/g')
86+
jwt_esc=$(printf '%s' "$INSTANCE_JWT" | sed -e 's/[\/&]/\\&/g')
87+
sed -e "s|{INST_NAME}|$inst_esc|g" \
88+
-e "s|{METRO_NAME}|$metro_esc|g" \
89+
-e "s|{XDS_SERVER}|$xds_esc|g" \
90+
-e "s|{KERNEL_INSTANCE_JWT}|$jwt_esc|g" \
91+
/etc/envoy/templates/bootstrap.yaml > /etc/envoy/bootstrap.yaml
92+
93+
echo "[envoy-init] Starting Envoy via supervisord"
94+
# `restart` is start-or-stop+start: on first boot this just starts envoy,
95+
# on a re-render (e.g. post-fork env refresh) it forces a clean re-read
96+
# of the rendered bootstrap. Either way no callers see stale identity.
97+
supervisorctl -c /etc/supervisor/supervisord.conf restart envoy
98+
99+
# Readiness (port 3128 reachable) is probed by the Go wrapper's
100+
# waitAllReady alongside CDP/chromedriver, so this script returns as soon
101+
# as the start request has been issued.
102+
}
103+
104+
case "$PHASE" in
105+
certs)
106+
run_certs
107+
;;
108+
config)
109+
run_config
110+
;;
111+
all)
112+
run_certs
113+
run_config
114+
;;
115+
esac

0 commit comments

Comments
 (0)