Skip to content

Commit c2e2525

Browse files
authored
backport supervisor/worker reaping (#452)
* backport supervisor/worker reaping * add more context to test_coprocess failures * make sure reaper can't block SIGHUP/SIGTERM handler
1 parent 02f0552 commit c2e2525

3 files changed

Lines changed: 94 additions & 8 deletions

File tree

integration_tests/tests/test_coprocess/run.sh

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,33 @@ if [ ! $? -eq 0 ] ; then exit 1 ; fi
2121
ID=$(docker ps -l -f "ancestor=cpfix2_app" --format="{{.ID}}")
2222

2323
# verify the coprocess is running
24-
docker exec -it "$ID" ps -ef | grep coprocess
24+
docker exec -it "$ID" ps -ef | grep coprocess \
25+
|| ( echo 'coprocess not running' && exit 1 )
2526

2627
# kill the coprocess and verify it restarts
2728
docker exec -it "$ID" pkill coprocess
2829
sleep 1
29-
docker exec -it "$ID" ps -ef | grep coprocess
30+
docker exec -it "$ID" ps -ef | grep coprocess \
31+
|| ( echo 'coprocess not restarted' && exit 1 )
3032

3133
# kill the coprocess and verify it doesn't restart again
3234
docker exec -it "$ID" pkill coprocess
3335
sleep 1
34-
35-
set +e
36-
docker exec -it "$ID" ps -ef | grep coprocess && exit 1
37-
set +e
36+
docker exec -it "$ID" ps -ef | grep coprocess \
37+
&& ( echo 'coprocess should be stopped' && exit 1 )
3838

3939
# update the ContainerPilot config and verify the coprocess is running
4040
# with the new flags (this resets the restart limit)
4141
docker exec -it "$ID" sed -i 's/arg1/arg2/' /app-with-coprocess.json
4242
docker exec -it "$ID" kill -SIGHUP 1
4343
sleep 1
44-
docker exec -it "$ID" ps -ef | grep coprocess | grep arg2
44+
docker exec -it "$ID" ps -ef | grep coprocess | grep arg2 \
45+
|| ( echo 'coprocess should use new args, got:' \
46+
&& docker exec -it "$ID" ps -ef \
47+
&& exit 1 )
4548

4649
# kill the coprocess and verify it restarts
4750
docker exec -it "$ID" pkill coprocess
4851
sleep 1
49-
docker exec -it "$ID" ps -ef | grep coprocess
52+
docker exec -it "$ID" ps -ef | grep coprocess \
53+
|| ( echo 'coprocess not restarted w/ new args' && exit 1 )

main.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package main // import "github.com/joyent/containerpilot"
22

33
import (
4+
"os"
45
"runtime"
56

67
"github.com/joyent/containerpilot/core"
8+
"github.com/joyent/containerpilot/sup"
79

810
log "github.com/sirupsen/logrus"
911

@@ -18,6 +20,15 @@ func main() {
1820
// contention on the main application
1921
runtime.GOMAXPROCS(1)
2022

23+
// If we're running as PID1, we fork and run as a supervisor
24+
// so that we can cleanly handle reaping of child processes.
25+
// We fork before doing *anything* else so we don't have to
26+
// worry about where any new threads spawned by the runtime.
27+
if os.Getpid() == 1 {
28+
sup.Run() // blocks forever
29+
return
30+
}
31+
2132
app, configErr := core.LoadApp()
2233
if configErr != nil {
2334
log.Fatal(configErr)

sup/sup.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package sup
2+
3+
import (
4+
"log"
5+
"os"
6+
"os/exec"
7+
"os/signal"
8+
"syscall"
9+
)
10+
11+
// Run forks the ContainerPilot process and then starts signal handlers
12+
// that will reap child processes and pass-thru SIGINT and SIGKILL to
13+
// the ContainerPilot worker process.
14+
func Run() {
15+
self, err := exec.LookPath(os.Args[0])
16+
if err != nil {
17+
log.Fatal("failed to find ContainerPilot binary: ", err)
18+
}
19+
proc, err := os.StartProcess(self, os.Args, &os.ProcAttr{Dir: "", Env: nil,
20+
Files: []*os.File{os.Stdin, os.Stdout, os.Stderr}, Sys: nil})
21+
if err != nil {
22+
log.Fatal("failed to start ContainerPilot worker process:", err)
23+
}
24+
handleSignals(proc.Pid)
25+
proc.Wait()
26+
}
27+
28+
// handleSignals listens for signals used to gracefully shutdown and
29+
// passes them thru to the ContainerPilot worker process.
30+
func handleSignals(pid int) {
31+
sig := make(chan os.Signal, 1)
32+
signal.Notify(sig, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGINT, syscall.SIGCHLD, syscall.SIGUSR1)
33+
go func() {
34+
for signal := range sig {
35+
switch signal {
36+
case syscall.SIGINT:
37+
syscall.Kill(pid, syscall.SIGINT)
38+
case syscall.SIGTERM:
39+
syscall.Kill(pid, syscall.SIGTERM)
40+
case syscall.SIGHUP:
41+
syscall.Kill(pid, syscall.SIGHUP)
42+
case syscall.SIGUSR1:
43+
syscall.Kill(pid, syscall.SIGUSR1)
44+
case syscall.SIGCHLD:
45+
go reap()
46+
}
47+
}
48+
}()
49+
}
50+
51+
// reaps child processes that have been reparented to PID1
52+
func reap() {
53+
for {
54+
POLL:
55+
var wstatus syscall.WaitStatus
56+
pid, err := syscall.Wait4(-1, &wstatus, 0, nil)
57+
switch err {
58+
case nil:
59+
if pid > 0 {
60+
goto POLL
61+
}
62+
return
63+
case syscall.ECHILD:
64+
return // no more children, we're done till next signal
65+
case syscall.EINTR:
66+
goto POLL
67+
default:
68+
return
69+
}
70+
}
71+
}

0 commit comments

Comments
 (0)