Skip to content

Commit f6b9ba9

Browse files
committed
fix(services): use refresh channel in Register instead of inline check
Simpler approach: Register() sends a non-blocking signal on a new chRefresh channel; the run() goroutine picks it up and calls update() immediately. This avoids duplicating per-service check logic and removes the lock-ordering complexity from the previous implementation.
1 parent c51ec3e commit f6b9ba9

2 files changed

Lines changed: 50 additions & 53 deletions

File tree

pkg/services/health.go

Lines changed: 25 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,9 @@ func CopyHealth(dest, src map[string]error) {
4747
// HealthChecker is a services.Service which monitors other services and can be probed for system health.
4848
type HealthChecker struct {
4949
StateMachine
50-
chStop chan struct{}
51-
chDone chan struct{}
50+
chStop chan struct{}
51+
chDone chan struct{}
52+
chRefresh chan struct{}
5253

5354
cfg HealthCheckerConfig
5455

@@ -130,12 +131,13 @@ func (cfg HealthCheckerConfig) New() *HealthChecker {
130131
cfg.initVerSha()
131132
cfg.setNoopHooks()
132133
return &HealthChecker{
133-
cfg: cfg,
134-
services: make(map[string]HealthReporter, 10),
135-
healthy: make(map[string]error, 10),
136-
ready: make(map[string]error, 10),
137-
chStop: make(chan struct{}),
138-
chDone: make(chan struct{}),
134+
cfg: cfg,
135+
services: make(map[string]HealthReporter, 10),
136+
healthy: make(map[string]error, 10),
137+
ready: make(map[string]error, 10),
138+
chStop: make(chan struct{}),
139+
chDone: make(chan struct{}),
140+
chRefresh: make(chan struct{}, 1),
139141
}
140142
}
141143

@@ -171,6 +173,8 @@ func (c *HealthChecker) run() {
171173
select {
172174
case <-ticker.C:
173175
c.update(ctx)
176+
case <-c.chRefresh:
177+
c.update(ctx)
174178
case <-c.chStop:
175179
return
176180
}
@@ -218,38 +222,21 @@ func (c *HealthChecker) Register(service HealthReporter) error {
218222
return fmt.Errorf("misconfigured check %#v for %T", name, service)
219223
}
220224

221-
func() {
222-
c.servicesMu.Lock()
223-
defer c.servicesMu.Unlock()
224-
if testing.Testing() {
225-
if orig, ok := c.services[name]; ok {
226-
panic(fmt.Errorf("duplicate name %q: service names must be unique: types %T & %T", name, service, orig))
227-
}
225+
c.servicesMu.Lock()
226+
defer c.servicesMu.Unlock()
227+
if testing.Testing() {
228+
if orig, ok := c.services[name]; ok {
229+
panic(fmt.Errorf("duplicate name %q: service names must be unique: types %T & %T", name, service, orig))
228230
}
229-
c.services[name] = service
230-
}()
231-
232-
// If already started, immediately populate the health state for this
233-
// service so callers don't wait up to <interval> for the next tick.
234-
// servicesMu must be released before calling IfStarted to avoid a
235-
// deadlock with Start(), which holds the StateMachine lock while
236-
// calling update() (which acquires servicesMu.RLock).
237-
c.IfStarted(func() {
238-
ctx := context.Background()
239-
ready := service.Ready()
231+
}
232+
c.services[name] = service
240233

241-
c.stateMu.Lock()
242-
defer c.stateMu.Unlock()
243-
c.ready[name] = ready
244-
for n, err := range service.HealthReport() {
245-
c.healthy[n] = err
246-
value := 0
247-
if err == nil {
248-
value = 1
249-
}
250-
c.cfg.SetStatus(ctx, name, value)
251-
}
252-
})
234+
// Signal the polling goroutine to refresh immediately so the newly
235+
// registered service is visible without waiting for the next tick.
236+
select {
237+
case c.chRefresh <- struct{}{}:
238+
default: // refresh already pending
239+
}
253240

254241
return nil
255242
}

pkg/services/health_test.go

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"errors"
55
"fmt"
66
"testing"
7+
"time"
78

89
"github.com/stretchr/testify/assert"
910
)
@@ -70,9 +71,9 @@ func (r *testHealthReporter) HealthReport() map[string]error {
7071
return map[string]error{r.name: r.healthy}
7172
}
7273

73-
// TestHealthChecker_Register_ImmediateUpdate verifies that Register() immediately
74-
// populates the health state when the HealthChecker is already running, so
75-
// callers don't wait up to 15 s for the next polling tick.
74+
// TestHealthChecker_Register_ImmediateUpdate verifies that Register() triggers an
75+
// immediate poll when the HealthChecker is already running, so the newly
76+
// registered service is visible well within a polling interval (15 s).
7677
func TestHealthChecker_Register_ImmediateUpdate(t *testing.T) {
7778
hc := HealthCheckerConfig{Ver: "test", Sha: "abc1234"}.New()
7879
if err := hc.Start(); err != nil {
@@ -85,18 +86,23 @@ func TestHealthChecker_Register_ImmediateUpdate(t *testing.T) {
8586
t.Fatalf("Register: %v", err)
8687
}
8788

88-
// No sleep needed — Register() must have synchronously populated the state.
89-
ready, readyErrs := hc.IsReady()
90-
assert.True(t, ready, "expected ready after Register, got errors: %v", readyErrs)
91-
assert.Nil(t, readyErrs["svc"], "expected svc ready, got: %v", readyErrs["svc"])
89+
// The refresh is async (goroutine scheduling); a short poll is sufficient
90+
// and far faster than the 15-second polling interval.
91+
assert.Eventually(t, func() bool {
92+
_, errs := hc.IsReady()
93+
_, ok := errs["svc"]
94+
return ok && errs["svc"] == nil
95+
}, time.Second, time.Millisecond, "service should become ready quickly after Register")
9296

93-
healthy, healthyErrs := hc.IsHealthy()
94-
assert.True(t, healthy, "expected healthy after Register, got errors: %v", healthyErrs)
95-
assert.Nil(t, healthyErrs["svc"], "expected svc healthy, got: %v", healthyErrs["svc"])
97+
assert.Eventually(t, func() bool {
98+
_, errs := hc.IsHealthy()
99+
_, ok := errs["svc"]
100+
return ok && errs["svc"] == nil
101+
}, time.Second, time.Millisecond, "service should become healthy quickly after Register")
96102
}
97103

98104
// TestHealthChecker_Register_ImmediateUpdate_Unhealthy verifies that an unhealthy
99-
// service registered on a running HealthChecker is immediately visible.
105+
// service registered on a running HealthChecker is reflected quickly.
100106
func TestHealthChecker_Register_ImmediateUpdate_Unhealthy(t *testing.T) {
101107
hc := HealthCheckerConfig{Ver: "test", Sha: "abc1234"}.New()
102108
if err := hc.Start(); err != nil {
@@ -110,11 +116,15 @@ func TestHealthChecker_Register_ImmediateUpdate_Unhealthy(t *testing.T) {
110116
t.Fatalf("Register: %v", err)
111117
}
112118

113-
ready, readyErrs := hc.IsReady()
114-
assert.False(t, ready, "expected not ready")
119+
assert.Eventually(t, func() bool {
120+
_, errs := hc.IsReady()
121+
_, ok := errs["svc2"]
122+
return ok
123+
}, time.Second, time.Millisecond, "service should appear in ready map quickly after Register")
124+
125+
_, readyErrs := hc.IsReady()
115126
assert.Equal(t, notReady, readyErrs["svc2"])
116127

117-
healthy, healthyErrs := hc.IsHealthy()
118-
assert.False(t, healthy, "expected not healthy")
128+
_, healthyErrs := hc.IsHealthy()
119129
assert.Equal(t, notReady, healthyErrs["svc2"])
120130
}

0 commit comments

Comments
 (0)