Skip to content

Commit 7ee7312

Browse files
committed
wrapper: ENABLE_STZ gate, rename phases, drop unused forward-proxy probe
- Add ENABLE_STZ env var. Default is the prior behavior (disable STZ during boot, restore after hot path is up). ENABLE_STZ=false or 0 keeps STZ disabled for the lifetime of the container. - Rename the boot phases by purpose: browser (X server, dbus, chromedriver, chromium, optional mutter/neko) and identity (envoy bootstrap render + kernel-images-api restart). Ready log line is now 'ready in T (browser=… identity=…; cdp=… …)'. - Drop the forward-proxy probe. The 8888 listener was a private Chromium-fork feature; stock Chromium never binds it, so the probe was always timing out at 60s on envoy-disabled boots. - Fix shared/envoy/init-envoy.sh: branch on supervisor status so cold boots 'start' envoy instead of 'restart' (which fails under errexit on a stopped service); re-renders still 'restart'. - Replace the handwritten atoi with strconv.Atoi and promote the anonymous probe struct to a named type.
1 parent 3f47183 commit 7ee7312

4 files changed

Lines changed: 84 additions & 69 deletions

File tree

server/cmd/wrapper/chromium.go

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"os"
66
"os/exec"
7+
"strconv"
78
"strings"
89
"time"
910
)
@@ -78,8 +79,8 @@ func dismissNoSandboxWarning() {
7879
}
7980
width := parts[0]
8081
x := width
81-
if w := atoi(width); w > 30 {
82-
x = fmt.Sprintf("%d", w-30)
82+
if w, err := strconv.Atoi(width); err == nil && w > 30 {
83+
x = strconv.Itoa(w - 30)
8384
}
8485
target := "New Tab - Chromium"
8586
deadline := time.Now().Add(30 * time.Second)
@@ -109,13 +110,3 @@ func dismissNoSandboxWarning() {
109110
"-d", body).Run()
110111
}
111112

112-
func atoi(s string) int {
113-
n := 0
114-
for _, c := range s {
115-
if c < '0' || c > '9' {
116-
return 0
117-
}
118-
n = n*10 + int(c-'0')
119-
}
120-
return n
121-
}

server/cmd/wrapper/main.go

Lines changed: 54 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ func profileName(p profile) string {
5656
func main() {
5757
t0 := time.Now()
5858
prof := detectProfile()
59-
logf("starting wrapper (profile=%s)", profileName(prof))
59+
stzManaged := scaleToZeroManaged()
60+
logf("starting wrapper (profile=%s stz=%s)", profileName(prof), stzMode(stzManaged))
6061

6162
// Register signal handling early so a SIGTERM/SIGINT during the
6263
// seconds-long startup window queues into the channel instead of
@@ -72,9 +73,13 @@ func main() {
7273
_ = exec.Command("mount", "-t", "tmpfs", "tmpfs", "/dev/shm").Run()
7374
}
7475

75-
// Disable scale-to-zero for the duration of startup; restored on exit.
76+
// Disable scale-to-zero for the duration of startup. When ENABLE_STZ is
77+
// false/0 the caller wants STZ off permanently, so we don't re-enable on
78+
// exit or once the hot path is up.
7679
disableScaleToZero()
77-
defer enableScaleToZero()
80+
if stzManaged {
81+
defer enableScaleToZero()
82+
}
7883

7984
// Headless ships a default CHROMIUM_FLAGS list (headless+stealth flags)
8085
// when callers don't set one. Headful's defaults are caller-supplied.
@@ -141,13 +146,13 @@ func main() {
141146
}()
142147
waitForSocket(supervisorSock, 10*time.Second)
143148

144-
// Phase A: identity-free services. Chromium itself doesn't read any
145-
// per-instance identity envs — it just needs the envoy CA cert (baked
146-
// into the image at build time, see shared/envoy/bake-certs.sh) so it
147-
// trusts the forward proxy on first start with no runtime cert work to
148-
// wait on. chromium-launcher internally waits for the X server before
149-
// exec'ing chromium, so we start it in parallel with the X server to
150-
// overlap chromium-launcher's preamble with display startup.
149+
// Browser phase: identity-free services. Chromium itself doesn't read
150+
// any per-instance identity envs — it just needs the envoy CA cert
151+
// (baked into the image at build time, see shared/envoy/bake-certs.sh)
152+
// so it trusts the forward proxy on first start with no runtime cert
153+
// work to wait on. chromium-launcher internally waits for the X server
154+
// before exec'ing chromium, so we start it in parallel with the X
155+
// server to overlap chromium-launcher's preamble with display startup.
151156
// chromedriver listens on 9225 immediately and only attaches to
152157
// chromium on session creation, so it can come up alongside everything.
153158
// mutter has no internal X-wait, so it's started after the X server is
@@ -163,7 +168,7 @@ func main() {
163168
// parallel with chromium.
164169
_ = os.WriteFile(filepath.Join(supervisordLogD, "chromium"), nil, 0o644)
165170

166-
phaseAStart := time.Now()
171+
browserStart := time.Now()
167172
startAll(xServer, "dbus", "chromedriver", "chromium")
168173
waitForX(defaultDisplay, 20*time.Second)
169174
waitForSocket(dbusSocket, 10*time.Second)
@@ -174,7 +179,7 @@ func main() {
174179
}
175180
startAll(post...)
176181
}
177-
phaseADone := time.Now()
182+
browserDone := time.Now()
178183

179184
// FORK HOOK:
180185
// When this binary runs as a forked snapshot restore, the per-fork
@@ -185,33 +190,35 @@ func main() {
185190
// 1. Block on the host-pushed env bundle (vsock socket, virtio-fs
186191
// drop file, or whatever transport the control plane settles on).
187192
// 2. Apply the bundle to this process's environ via os.Setenv so
188-
// Phase C below picks them up via the existing $VAR expansion in
189-
// init-envoy.sh and the supervisorctl-spawned services inherit
190-
// them.
191-
// 3. Phase C uses `supervisorctl restart envoy` (idempotent — start
192-
// on first boot, stop+start on a re-render after fork) so a
193-
// restored snapshot drops its stale identity cleanly.
193+
// the identity phase below picks them up via the existing $VAR
194+
// expansion in init-envoy.sh and the supervisorctl-spawned
195+
// services inherit them.
196+
// 3. The identity phase uses `supervisorctl restart envoy`
197+
// (idempotent — start on first boot, stop+start on a re-render
198+
// after fork) so a restored snapshot drops its stale identity
199+
// cleanly.
194200
// Boot path keeps running through unchanged: the wait simply no-ops
195201
// when there's no fork bundle to receive.
196202

197-
// Phase C: identity-bound. Render envoy bootstrap with INST_NAME/JWT/etc
198-
// and (re)start envoy + kernel-images-api. Both services use `restart`
199-
// so the same code path works for boot (start a stopped service) and
200-
// post-fork (stop+start to force a re-read of refreshed envs).
201-
phaseCStart := time.Now()
203+
// Identity phase: identity-bound services. Render envoy bootstrap with
204+
// INST_NAME/JWT/etc and (re)start envoy + kernel-images-api. Both
205+
// services use `restart` so the same code path works for boot (start a
206+
// stopped service) and post-fork (stop+start to force a re-read of
207+
// refreshed envs).
208+
identityStart := time.Now()
202209
if isExecutable("/usr/local/bin/init-envoy.sh") {
203210
runStreamFatal("envoy-init", "/usr/local/bin/init-envoy.sh")
204211
}
205212
restartAll("kernel-images-api")
206-
phaseCDone := time.Now()
213+
identityDone := time.Now()
207214

208215
// Wait for the union of caller-visible ready signals. Each probe runs
209216
// concurrently and logs as soon as its target is reachable.
210217
probeDurations := waitAllReady(t0, webrtc)
211-
logf("ready in %s (phaseA=%s phaseC=%s; %s)",
218+
logf("ready in %s (browser=%s identity=%s; %s)",
212219
since(t0),
213-
phaseADone.Sub(phaseAStart).Truncate(time.Millisecond),
214-
phaseCDone.Sub(phaseCStart).Truncate(time.Millisecond),
220+
browserDone.Sub(browserStart).Truncate(time.Millisecond),
221+
identityDone.Sub(identityStart).Truncate(time.Millisecond),
215222
formatProbeDurations(probeDurations))
216223

217224
// Cosmetic + non-critical services come up off the hot path. Headless has
@@ -225,8 +232,11 @@ func main() {
225232
}()
226233
}
227234

228-
// Re-enable scale-to-zero now that the hot path is up.
229-
enableScaleToZero()
235+
// Re-enable scale-to-zero now that the hot path is up — unless the caller
236+
// asked to keep it disabled via ENABLE_STZ=false/0.
237+
if stzManaged {
238+
enableScaleToZero()
239+
}
230240

231241
// Block on supervisord; container exits when it does.
232242
if err := supCmd.Wait(); err != nil {
@@ -235,37 +245,23 @@ func main() {
235245
}
236246

237247
// waitAllReady gates on all caller-visible ready signals concurrently:
238-
// - CDP : HTTP /json/version on the public CDP port (proves api proxy is wired
239-
// through to chromium's DevTools server)
240-
// - cd : TCP on chromedriver's internal port 9225 (api on 9224 is bound when
241-
// api itself is up, which CDP readiness already implies)
242-
// - proxy : TCP on chromium's --forward-proxy-port (8888)
243-
// - neko : TCP on neko's HTTP port (8080), only when ENABLE_WEBRTC=true
244-
// - envoy : TCP on envoy's listener (3128), only when envoy is enabled
248+
// - cdp : HTTP /json/version on the public CDP port (proves api proxy is
249+
// wired through to chromium's DevTools server)
250+
// - chromedriver : TCP on chromedriver's internal port 9225 (api on 9224 is bound
251+
// when api itself is up, which CDP readiness already implies)
252+
// - neko : TCP on neko's HTTP port (8080), only when ENABLE_WEBRTC=true
253+
// - envoy : TCP on envoy's listener (3128), only when envoy is enabled
245254
func waitAllReady(t0 time.Time, webrtc bool) map[string]time.Duration {
246255
chromePort := os.Getenv("CHROME_PORT")
247-
if chromePort == "" {
248-
chromePort = "9222"
249-
}
250-
probes := []struct {
251-
name string
252-
fn func() bool
253-
}{
256+
probes := []probe{
254257
{"cdp", func() bool { return httpProbeOK("http://127.0.0.1:" + chromePort + "/json/version") }},
255258
{"chromedriver", func() bool { return tcpOK("127.0.0.1", "9225") }},
256-
{"forward-proxy", func() bool { return tcpOK("127.0.0.1", "8888") }},
257259
}
258260
if webrtc {
259-
probes = append(probes, struct {
260-
name string
261-
fn func() bool
262-
}{"neko", func() bool { return tcpOK("127.0.0.1", "8080") }})
261+
probes = append(probes, probe{"neko", func() bool { return tcpOK("127.0.0.1", "8080") }})
263262
}
264263
if envoyEnabled() {
265-
probes = append(probes, struct {
266-
name string
267-
fn func() bool
268-
}{"envoy", func() bool { return tcpOK("127.0.0.1", "3128") }})
264+
probes = append(probes, probe{"envoy", func() bool { return tcpOK("127.0.0.1", "3128") }})
269265
}
270266

271267
type result struct {
@@ -301,11 +297,16 @@ func waitAllReady(t0 time.Time, webrtc bool) map[string]time.Duration {
301297
return durations
302298
}
303299

300+
type probe struct {
301+
name string
302+
fn func() bool
303+
}
304+
304305
// formatProbeDurations renders waitAllReady's per-probe ready times in a stable
305306
// order so log lines diff cleanly across runs. Probes that never succeeded are
306307
// omitted (they'd already have logged a WARNING separately).
307308
func formatProbeDurations(d map[string]time.Duration) string {
308-
order := []string{"cdp", "chromedriver", "forward-proxy", "neko", "envoy"}
309+
order := []string{"cdp", "chromedriver", "neko", "envoy"}
309310
parts := make([]string, 0, len(d))
310311
for _, name := range order {
311312
if v, ok := d[name]; ok {

server/cmd/wrapper/system.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,21 @@ func writeScaleToZero(c string) {
1717
_ = os.WriteFile(scaleToZeroFile, []byte(c), 0o644)
1818
}
1919

20+
// scaleToZeroManaged reports whether the wrapper should re-enable scale-to-zero
21+
// once boot completes. Default is true (preserves the previous behavior); set
22+
// ENABLE_STZ=false or 0 to keep STZ disabled for the lifetime of the container.
23+
func scaleToZeroManaged() bool {
24+
v := os.Getenv("ENABLE_STZ")
25+
return v != "false" && v != "0"
26+
}
27+
28+
func stzMode(managed bool) string {
29+
if managed {
30+
return "managed"
31+
}
32+
return "off"
33+
}
34+
2035
func prepareUserDirs(asRoot bool) {
2136
if asRoot {
2237
for _, d := range []string{"/tmp", "/var/log", supervisordLogD, "/home/kernel", "/home/kernel/user-data"} {

shared/envoy/init-envoy.sh

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,18 @@ sed -e "s|{INST_NAME}|$inst_esc|g" \
3535
/etc/envoy/templates/bootstrap.yaml > /etc/envoy/bootstrap.yaml
3636

3737
echo "[envoy-init] Starting Envoy via supervisord"
38-
# `restart` is start-or-stop+start: on first boot this just starts envoy,
39-
# on a re-render (e.g. post-fork env refresh) it forces a clean re-read
40-
# of the rendered bootstrap. Either way no callers see stale identity.
41-
supervisorctl -c /etc/supervisor/supervisord.conf restart envoy
38+
# Envoy's supervisor program has autostart=false, so on cold boot it's in
39+
# the STOPPED state. supervisorctl's `restart` is implemented as stop+start
40+
# and reports a non-zero exit when the stop sees a service that isn't
41+
# running — which under `set -o errexit` would abort the boot path. Branch
42+
# on the current state so cold boots only `start`, while re-renders (e.g.
43+
# post-fork env refresh) `restart` to force a clean re-read of the
44+
# rendered bootstrap.
45+
if supervisorctl -c /etc/supervisor/supervisord.conf status envoy | grep -q RUNNING; then
46+
supervisorctl -c /etc/supervisor/supervisord.conf restart envoy
47+
else
48+
supervisorctl -c /etc/supervisor/supervisord.conf start envoy
49+
fi
4250

4351
# Readiness (port 3128 reachable) is probed by the Go wrapper's
4452
# waitAllReady alongside CDP/chromedriver, so this script returns as soon

0 commit comments

Comments
 (0)