Skip to content

Commit 31b5a29

Browse files
committed
feat(ci): run tests on release branches also
1 parent 0f3b3e6 commit 31b5a29

3 files changed

Lines changed: 110 additions & 23 deletions

File tree

cmd/rune/main_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ func TestRunInstall_JSONMissingManifest(t *testing.T) {
176176
t.Errorf("exit = %d, want 2", code)
177177
}
178178

179-
// Emit stdout as JSON
179+
// Emit stdout as JSON
180180
var ev jsonEvent
181181
if err := json.Unmarshal(stdout.Bytes(), &ev); err != nil {
182182
t.Fatalf("stdout should be a JSON event; got %q (err %v)", stdout.String(), err)

internal/supervisor/supervisor.go

Lines changed: 57 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,37 @@ func runWatcher(ctx context.Context, cfg Config) error {
107107
var crashes []time.Time
108108
backoffIdx := 0
109109

110+
recordCrash := func(now time.Time) (int, bool) {
111+
crashes = append(crashes, now)
112+
cutoff := now.Add(-cfg.MaxCrashWindow) // crash older than 'now - cfg.MaxCrashWindow' is considered expired
113+
i := 0
114+
115+
for i < len(crashes) && crashes[i].Before(cutoff) {
116+
i++
117+
}
118+
crashes = crashes[i:] // sliding-window
119+
120+
return len(crashes), len(crashes) >= cfg.MaxCrashes
121+
}
122+
123+
// sleepBackoff waits the next backoff step; false means shutdown was
124+
// requested while waiting.
125+
sleepBackoff := func(crashCount int) bool {
126+
wait := cfg.BackoffSchedule[min(backoffIdx, len(cfg.BackoffSchedule)-1)]
127+
backoffIdx++
128+
129+
fmt.Fprintf(os.Stderr, "supervisor: backing off %s before restart (crash %d of max %d in %s window)\n", wait, crashCount, cfg.MaxCrashes, cfg.MaxCrashWindow)
130+
131+
select {
132+
case <-time.After(wait): // wait next backoff
133+
return true
134+
case <-ctx.Done(): // shutdown requested
135+
return false
136+
case <-sigCh: // shutdown requested
137+
return false
138+
}
139+
}
140+
110141
for {
111142
cmd := exec.Command(cfg.RunedBinary, cfg.RunedArgs...)
112143
cmd.Stdin = nil
@@ -115,8 +146,20 @@ func runWatcher(ctx context.Context, cfg Config) error {
115146

116147
fmt.Fprintf(os.Stderr, "supervisor: starting %s %v\n", cfg.RunedBinary, cfg.RunedArgs)
117148
started := time.Now()
149+
150+
// Share crash budget rather than end up supervision for retriable error
118151
if err := cmd.Start(); err != nil {
119-
return fmt.Errorf("supervisor: start %s: %w", cfg.RunedBinary, err)
152+
fmt.Fprintf(os.Stderr, "supervisor: start %s: %v\n", cfg.RunedBinary, err)
153+
154+
count, giveUp := recordCrash(time.Now())
155+
if giveUp {
156+
return fmt.Errorf("supervisor: start %s: %w (%d failures within %s - giving up)", cfg.RunedBinary, err, count, cfg.MaxCrashWindow)
157+
}
158+
if !sleepBackoff(count) {
159+
return nil
160+
}
161+
162+
continue
120163
}
121164

122165
done := make(chan error, 1)
@@ -146,27 +189,11 @@ func runWatcher(ctx context.Context, cfg Config) error {
146189
backoffIdx = 0
147190
}
148191

149-
crashes = append(crashes, now)
150-
cutoff := now.Add(-cfg.MaxCrashWindow) // crashes older than now - cfg.MaxCrashWindow are considered expired
151-
i := 0
152-
for i < len(crashes) && crashes[i].Before(cutoff) {
153-
i++
192+
count, giveUp := recordCrash(now)
193+
if giveUp {
194+
return fmt.Errorf("supervisor: %d crashes within %s - giving up", count, cfg.MaxCrashWindow)
154195
}
155-
crashes = crashes[i:] // sliding-window
156-
157-
if len(crashes) >= cfg.MaxCrashes {
158-
return fmt.Errorf("supervisor: %d crashes within %s - giving up", len(crashes), cfg.MaxCrashWindow)
159-
}
160-
161-
wait := cfg.BackoffSchedule[min(backoffIdx, len(cfg.BackoffSchedule)-1)]
162-
backoffIdx++
163-
fmt.Fprintf(os.Stderr, "supervisor: backing off %s before restart (crash %d of max %d in %s window)\n", wait, len(crashes), cfg.MaxCrashes, cfg.MaxCrashWindow)
164-
select {
165-
case <-time.After(wait):
166-
continue
167-
case <-ctx.Done():
168-
return nil
169-
case <-sigCh:
196+
if !sleepBackoff(count) {
170197
return nil
171198
}
172199
}
@@ -184,8 +211,16 @@ func shutdownChild(cmd *exec.Cmd, grace time.Duration, done <-chan error) error
184211
return nil
185212
case <-time.After(grace):
186213
fmt.Fprintf(os.Stderr, "supervisor: child didn't exit within %s, sending SIGKILL\n", grace)
214+
187215
_ = cmd.Process.Kill()
188-
<-done
216+
217+
// Also check if child still hasn't died for certain period
218+
select {
219+
case <-done:
220+
case <-time.After(grace):
221+
fmt.Fprintf(os.Stderr, "supervisor: child unresponsive to SIGKILL after %s, abandoning\n", grace)
222+
}
223+
189224
return nil
190225
}
191226
}

internal/supervisor/supervisor_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@ package supervisor
44

55
import (
66
"context"
7+
"errors"
78
"fmt"
89
"os"
10+
"os/exec"
911
"os/signal"
12+
"path/filepath"
1013
"strings"
1114
"sync/atomic"
1215
"syscall"
@@ -219,3 +222,52 @@ func TestWatcher_ForwardSignalToChild(t *testing.T) {
219222
t.Fatal("runWatcher did not return after SIGTERM - signal not forwarded")
220223
}
221224
}
225+
226+
func TestWatcher_RetriesStartFailure(t *testing.T) {
227+
cfg := testWatcherConfig(t)
228+
cfg.RunedBinary = filepath.Join(t.TempDir(), "no-such-runed")
229+
cfg.MaxCrashes = 3
230+
231+
err := runWatcher(context.Background(), cfg)
232+
if err == nil {
233+
t.Fatal("runWatcher should give up after MaxCrashes start failures")
234+
}
235+
if !strings.Contains(err.Error(), "giving up") {
236+
t.Errorf("error: got %q, want substring 'giving up'", err.Error())
237+
}
238+
if !strings.Contains(err.Error(), fmt.Sprintf("%d failures", cfg.MaxCrashes)) {
239+
t.Errorf("error: got %q, want %d retried attempts before giving up", err.Error(), cfg.MaxCrashes)
240+
}
241+
if !errors.Is(err, os.ErrNotExist) {
242+
t.Errorf("error: got %v, want wrapped os.ErrNotExist from the failed Start", err)
243+
}
244+
}
245+
246+
func TestShutdownChild_BoundedAfterSIGKILL(t *testing.T) {
247+
t.Setenv(fakeRunedEnv, "ignore_sigterm")
248+
cmd := exec.Command(os.Args[0])
249+
250+
if err := cmd.Start(); err != nil {
251+
t.Fatalf("start fake: %v", err)
252+
}
253+
defer func() {
254+
_ = cmd.Process.Kill()
255+
_, _ = cmd.Process.Wait()
256+
}()
257+
258+
done := make(chan error) // no signal
259+
260+
grace := 200 * time.Millisecond
261+
start := time.Now()
262+
if err := shutdownChild(cmd, grace, done); err != nil {
263+
t.Errorf("shutdownChild = %v, want nil", err)
264+
}
265+
266+
elapsed := time.Since(start)
267+
if elapsed < 2*grace {
268+
t.Errorf("returned in %s; want >= %s (SIGTERM grace + bounded kill-wait)", elapsed, 2*grace)
269+
}
270+
if elapsed > 2*grace+2*time.Second {
271+
t.Errorf("took %s; should wait after SIGKILL sended", elapsed)
272+
}
273+
}

0 commit comments

Comments
 (0)