Skip to content

Commit 3f47183

Browse files
committed
wrapper: handle early signals, fail-fast on init-envoy, share waitForX
- Register SIGTERM/SIGINT and install the shutdown goroutine right after supervisord starts, so a signal during the readiness window queues rather than triggering Go's default immediate exit. - runStream now returns the command error; init-envoy fatals on non-zero exit, matching the old wrapper.sh under `set -o errexit`. - Extract waitForX to lib/x11.WaitForDisplay so the wrapper and chromium-launcher share one implementation.
1 parent 40f565c commit 3f47183

5 files changed

Lines changed: 74 additions & 59 deletions

File tree

server/cmd/chromium-launcher/main.go

Lines changed: 4 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"time"
1414

1515
"github.com/kernel/kernel-images/server/lib/chromiumflags"
16+
"github.com/kernel/kernel-images/server/lib/x11"
1617
)
1718

1819
func main() {
@@ -45,7 +46,9 @@ func main() {
4546
// Wait for the X server. The wrapper starts chromium in parallel with
4647
// xorg/xvfb, so the display socket may not be ready yet — without this
4748
// gate chromium would fail on connect and supervisord would restart us.
48-
waitForX(":1", 20*time.Second)
49+
if d := x11.WaitForDisplay(":1", 20*time.Second); d >= 20*time.Second {
50+
fmt.Fprintf(os.Stderr, "warning: X display :1 not responsive after %s\n", d)
51+
}
4952

5053
baseFlags := os.Getenv("CHROMIUM_FLAGS")
5154
runtimeTokens, err := chromiumflags.ReadOptionalFlagFile(*runtimeFlagsPath)
@@ -130,30 +133,6 @@ func execLookPath(file string) (string, error) {
130133
return exec.LookPath(file)
131134
}
132135

133-
// waitForX blocks until the X server is reachable on display :N. We try
134-
// both the named unix socket (Xorg, headful) and the abstract namespace
135-
// socket (Xvfb runs with -nolisten unix, which disables the named socket
136-
// but leaves the abstract one). Mirrors the wrapper's check so chromium
137-
// can be started in parallel with the X server without failing on connect.
138-
func waitForX(display string, timeout time.Duration) {
139-
num := strings.TrimPrefix(display, ":")
140-
named := "/tmp/.X11-unix/X" + num
141-
abstract := "@/tmp/.X11-unix/X" + num
142-
deadline := time.Now().Add(timeout)
143-
for time.Now().Before(deadline) {
144-
if c, err := net.DialTimeout("unix", named, 200*time.Millisecond); err == nil {
145-
_ = c.Close()
146-
return
147-
}
148-
if c, err := net.DialTimeout("unix", abstract, 200*time.Millisecond); err == nil {
149-
_ = c.Close()
150-
return
151-
}
152-
time.Sleep(20 * time.Millisecond)
153-
}
154-
fmt.Fprintf(os.Stderr, "warning: X display %s not responsive after %s\n", display, timeout)
155-
}
156-
157136
// waitForPort waits until the given port is available for binding on IPv4.
158137
// This handles the delay after SIGKILL before the kernel releases the socket.
159138
// We disable SO_REUSEADDR to get an accurate check matching chromium's bind behavior.

server/cmd/wrapper/display.go

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,15 @@
11
package main
22

33
import (
4-
"net"
5-
"strings"
64
"time"
5+
6+
"github.com/kernel/kernel-images/server/lib/x11"
77
)
88

9-
// waitForX waits until the X server is reachable on display :N. We try both
10-
// the named unix socket (Xorg, headful) and the abstract namespace socket
11-
// (Xvfb runs with -nolisten unix, which disables the named socket but leaves
12-
// the abstract one). Cheaper than spawning xdpyinfo in a loop.
9+
// waitForX blocks until the X server is reachable on display :N. Logs a
10+
// warning if the wait times out.
1311
func waitForX(display string, timeout time.Duration) {
14-
num := strings.TrimPrefix(display, ":")
15-
named := "/tmp/.X11-unix/X" + num
16-
abstract := "@/tmp/.X11-unix/X" + num // Linux abstract namespace
17-
deadline := time.Now().Add(timeout)
18-
for time.Now().Before(deadline) {
19-
if c, err := net.DialTimeout("unix", named, 200*time.Millisecond); err == nil {
20-
_ = c.Close()
21-
return
22-
}
23-
if c, err := net.DialTimeout("unix", abstract, 200*time.Millisecond); err == nil {
24-
_ = c.Close()
25-
return
26-
}
27-
time.Sleep(20 * time.Millisecond)
12+
if d := x11.WaitForDisplay(display, timeout); d >= timeout {
13+
logf("WARNING: X display %s not responsive after %s", display, timeout)
2814
}
29-
logf("WARNING: X display %s not responsive after %s", display, timeout)
3015
}

server/cmd/wrapper/main.go

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,13 @@ func main() {
5858
prof := detectProfile()
5959
logf("starting wrapper (profile=%s)", profileName(prof))
6060

61+
// Register signal handling early so a SIGTERM/SIGINT during the
62+
// seconds-long startup window queues into the channel instead of
63+
// triggering Go's default exit-immediately behavior. The handler
64+
// goroutine is installed below, once supervisord is running.
65+
sigs := make(chan os.Signal, 1)
66+
signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT)
67+
6168
// /dev/shm: only mount when not running under Docker (Docker manages it).
6269
if os.Getenv("WITHDOCKER") == "" {
6370
_ = os.MkdirAll("/dev/shm", 0o1777)
@@ -123,6 +130,15 @@ func main() {
123130
if err := supCmd.Start(); err != nil {
124131
fatalf("supervisord start: %v", err)
125132
}
133+
// Install the shutdown goroutine now so it can clean up if a signal
134+
// arrives during the readiness window. Any signal queued in `sigs`
135+
// before this point gets picked up on the first iteration.
136+
go func() {
137+
<-sigs
138+
logf("shutdown: stopping services")
139+
_ = exec.Command("supervisorctl", "-c", supervisorConf, "stop", "all").Run()
140+
_ = supCmd.Process.Signal(syscall.SIGTERM)
141+
}()
126142
waitForSocket(supervisorSock, 10*time.Second)
127143

128144
// Phase A: identity-free services. Chromium itself doesn't read any
@@ -184,7 +200,7 @@ func main() {
184200
// post-fork (stop+start to force a re-read of refreshed envs).
185201
phaseCStart := time.Now()
186202
if isExecutable("/usr/local/bin/init-envoy.sh") {
187-
runStream("envoy-init", "/usr/local/bin/init-envoy.sh")
203+
runStreamFatal("envoy-init", "/usr/local/bin/init-envoy.sh")
188204
}
189205
restartAll("kernel-images-api")
190206
phaseCDone := time.Now()
@@ -212,16 +228,6 @@ func main() {
212228
// Re-enable scale-to-zero now that the hot path is up.
213229
enableScaleToZero()
214230

215-
// Forward signals so cleanup runs and supervisord is taken down cleanly.
216-
sigs := make(chan os.Signal, 1)
217-
signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT)
218-
go func() {
219-
<-sigs
220-
logf("shutdown: stopping services")
221-
_ = exec.Command("supervisorctl", "-c", supervisorConf, "stop", "all").Run()
222-
_ = supCmd.Process.Signal(syscall.SIGTERM)
223-
}()
224-
225231
// Block on supervisord; container exits when it does.
226232
if err := supCmd.Wait(); err != nil {
227233
logf("supervisord exited: %v", err)

server/cmd/wrapper/supervisord.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,20 @@ func tailFile(path string) {
9696
}
9797
}
9898

99-
func runStream(label, name string, args ...string) {
99+
func runStream(label, name string, args ...string) error {
100100
cmd := exec.Command(name, args...)
101101
cmd.Stdout = prefixWriter{label: label, w: os.Stdout}
102102
cmd.Stderr = prefixWriter{label: label, w: os.Stderr}
103-
_ = cmd.Run()
103+
return cmd.Run()
104+
}
105+
106+
// runStreamFatal is runStream + fatalf on non-zero exit. Use for scripts the
107+
// boot path cannot proceed without (init-envoy). The old wrapper.sh ran under
108+
// `set -o errexit`, so these were already fatal there.
109+
func runStreamFatal(label, name string, args ...string) {
110+
if err := runStream(label, name, args...); err != nil {
111+
fatalf("%s failed: %v", label, err)
112+
}
104113
}
105114

106115
type prefixWriter struct {

server/lib/x11/x11.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Package x11 provides helpers for talking to a local X server.
2+
package x11
3+
4+
import (
5+
"net"
6+
"strings"
7+
"time"
8+
)
9+
10+
// WaitForDisplay blocks until the X server is reachable on display :N, returning
11+
// the time spent waiting. It tries both the named unix socket (Xorg, headful)
12+
// and the abstract namespace socket (Xvfb runs with -nolisten unix, which
13+
// disables the named socket but leaves the abstract one). Cheaper than spawning
14+
// xdpyinfo in a loop.
15+
//
16+
// If the deadline elapses, WaitForDisplay still returns; callers can compare
17+
// the returned duration against timeout to detect a miss.
18+
func WaitForDisplay(display string, timeout time.Duration) time.Duration {
19+
start := time.Now()
20+
num := strings.TrimPrefix(display, ":")
21+
named := "/tmp/.X11-unix/X" + num
22+
abstract := "@/tmp/.X11-unix/X" + num // Linux abstract namespace
23+
deadline := start.Add(timeout)
24+
for time.Now().Before(deadline) {
25+
if c, err := net.DialTimeout("unix", named, 200*time.Millisecond); err == nil {
26+
_ = c.Close()
27+
return time.Since(start)
28+
}
29+
if c, err := net.DialTimeout("unix", abstract, 200*time.Millisecond); err == nil {
30+
_ = c.Close()
31+
return time.Since(start)
32+
}
33+
time.Sleep(20 * time.Millisecond)
34+
}
35+
return time.Since(start)
36+
}

0 commit comments

Comments
 (0)