Skip to content

Commit 0e77f30

Browse files
committed
refactor: Improve endpoints_per_pool metric reporting and cleanup
Rename DeleteEndpointsPerPool to UncaptureEndpointsPerPool, add metric cleanup on route pruning and LB algorithm changes, and add tests.
1 parent 94e2f8c commit 0e77f30

9 files changed

Lines changed: 428 additions & 96 deletions

File tree

src/code.cloudfoundry.org/gorouter/metrics/compositereporter.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ type MetricReporter interface {
4848
CaptureNATSDroppedMessages(messages int)
4949
UnmuzzleRouteRegistrationLatency()
5050
CaptureEndpointsPerPool(endpoints int, route string, lbAlgo string)
51-
DeleteEndpointsPerPool(route, lbAlgo string)
51+
UncaptureEndpointsPerPool(route, lbAlgo string)
5252
}
5353

5454
type ComponentTagged interface {
@@ -238,9 +238,9 @@ func (m MultiMetricReporter) CaptureEndpointsPerPool(endpoints int, route string
238238
}
239239
}
240240

241-
func (m MultiMetricReporter) DeleteEndpointsPerPool(route, lbAlgo string) {
241+
func (m MultiMetricReporter) UncaptureEndpointsPerPool(route, lbAlgo string) {
242242
for _, r := range m {
243-
r.DeleteEndpointsPerPool(route, lbAlgo)
243+
r.UncaptureEndpointsPerPool(route, lbAlgo)
244244
}
245245
}
246246

@@ -276,6 +276,6 @@ func (c *CompositeReporter) CaptureEndpointsPerPool(endpoints int, route string,
276276
c.MetricReporter.CaptureEndpointsPerPool(endpoints, route, lbAlgo)
277277
}
278278

279-
func (c *CompositeReporter) DeleteEndpointsPerPool(route, lbAlgo string) {
280-
c.MetricReporter.DeleteEndpointsPerPool(route, lbAlgo)
279+
func (c *CompositeReporter) UncaptureEndpointsPerPool(route, lbAlgo string) {
280+
c.MetricReporter.UncaptureEndpointsPerPool(route, lbAlgo)
281281
}

src/code.cloudfoundry.org/gorouter/metrics/fakes/fake_metricreporter.go

Lines changed: 80 additions & 54 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/code.cloudfoundry.org/gorouter/metrics/fakes/fake_subscriber.go

Lines changed: 0 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/code.cloudfoundry.org/gorouter/metrics/fakes/fake_varzreporter.go

Lines changed: 0 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/code.cloudfoundry.org/gorouter/metrics/metricsreporter.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ func (m *Metrics) CaptureEndpointsPerPool(endpoints int, route string, lbAlgo st
206206
}
207207

208208
// Empty implementation here is to fulfil interface
209-
func (m *Metrics) DeleteEndpointsPerPool(route string, lbAlgo string) {
209+
func (m *Metrics) UncaptureEndpointsPerPool(route string, lbAlgo string) {
210210
}
211211

212212
func getResponseCounterName(statusCode int) string {

src/code.cloudfoundry.org/gorouter/metrics_prometheus/metrics.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ func NewMetrics(registry *mr.Registry, perRequestMetricsReporting bool) *Metrics
9292
NATSDroppedMessages: registry.NewGauge("total_dropped_messages", "number of total dropped messages in NATS"),
9393
HTTPLatency: registry.NewGaugeVec("http_latency_seconds", "the latency of http requests from gorouter and back in sec", []string{"source_id"}),
9494
perRequestMetricsReporting: perRequestMetricsReporting,
95-
EndpointsPerPool: registry.NewGaugeVec("endpoints_per_pool", "number of endpoints per pool", []string{"route", "LB_algorithm"}),
95+
EndpointsPerPool: registry.NewGaugeVec("endpoints_per_pool", "number of endpoints per pool", []string{"route", "lb_algorithm"}),
9696
}
9797
}
9898

@@ -222,12 +222,12 @@ func (metrics *Metrics) CaptureHTTPLatency(d time.Duration, sourceID string) {
222222
metrics.HTTPLatency.Set(float64(d)/float64(time.Second), []string{sourceID})
223223
}
224224

225-
// CaptureEndpoints sets the number of endpoints for a given route and load balancing algorithm
225+
// CaptureEndpointsPerPool sets the number of endpoints for a given route and load balancing algorithm
226226
func (metrics *Metrics) CaptureEndpointsPerPool(count int, route string, loadBalancingAlgo string) {
227227
metrics.EndpointsPerPool.Set(float64(count), []string{route, loadBalancingAlgo})
228228
}
229229

230-
func (metrics *Metrics) DeleteEndpointsPerPool(route string, lbAlgo string) {
230+
func (metrics *Metrics) UncaptureEndpointsPerPool(route string, lbAlgo string) {
231231
metrics.EndpointsPerPool.Delete([]string{route, lbAlgo})
232232
}
233233

src/code.cloudfoundry.org/gorouter/metrics_prometheus/metrics_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -461,41 +461,41 @@ var _ = Describe("Metrics", func() {
461461
It("reports the number of endpoints per pool with correct labels", func() {
462462
m.CaptureEndpointsPerPool(5, "routeA", "round_robin")
463463
metricsOutput := getMetrics(r.Port())
464-
expected := "endpoints_per_pool{LB_algorithm=\"round_robin\",route=\"routeA\"} 5"
464+
expected := "endpoints_per_pool{lb_algorithm=\"round_robin\",route=\"routeA\"} 5"
465465
Expect(metricsOutput).To(ContainSubstring(expected))
466466
})
467467

468468
It("updates the value for the same label combination", func() {
469469
m.CaptureEndpointsPerPool(5, "routeA", "round_robin")
470470
m.CaptureEndpointsPerPool(7, "routeA", "round_robin")
471471
metricsOutput := getMetrics(r.Port())
472-
expected := "endpoints_per_pool{LB_algorithm=\"round_robin\",route=\"routeA\"} 7"
472+
expected := "endpoints_per_pool{lb_algorithm=\"round_robin\",route=\"routeA\"} 7"
473473
Expect(metricsOutput).To(ContainSubstring(expected))
474474
})
475475

476476
It("reports multiple values for different label combinations", func() {
477477
m.CaptureEndpointsPerPool(5, "routeA", "round_robin")
478478
m.CaptureEndpointsPerPool(3, "routeB", "least_conn")
479479
metricsOutput := getMetrics(r.Port())
480-
expectedA := "endpoints_per_pool{LB_algorithm=\"round_robin\",route=\"routeA\"} 5"
481-
expectedB := "endpoints_per_pool{LB_algorithm=\"least_conn\",route=\"routeB\"} 3"
480+
expectedA := "endpoints_per_pool{lb_algorithm=\"round_robin\",route=\"routeA\"} 5"
481+
expectedB := "endpoints_per_pool{lb_algorithm=\"least_conn\",route=\"routeB\"} 3"
482482
Expect(metricsOutput).To(ContainSubstring(expectedA))
483483
Expect(metricsOutput).To(ContainSubstring(expectedB))
484484
})
485485

486486
It("deletes the metric for a given route and LB algorithm", func() {
487487
m.CaptureEndpointsPerPool(5, "routeA", "round_robin")
488-
Expect(getMetrics(r.Port())).To(ContainSubstring("endpoints_per_pool{LB_algorithm=\"round_robin\",route=\"routeA\"} 5"))
488+
Expect(getMetrics(r.Port())).To(ContainSubstring("endpoints_per_pool{lb_algorithm=\"round_robin\",route=\"routeA\"} 5"))
489489

490-
m.DeleteEndpointsPerPool("routeA", "round_robin")
491-
Expect(getMetrics(r.Port())).NotTo(ContainSubstring("endpoints_per_pool{LB_algorithm=\"round_robin\",route=\"routeA\"}"))
490+
m.UncaptureEndpointsPerPool("routeA", "round_robin")
491+
Expect(getMetrics(r.Port())).NotTo(ContainSubstring("endpoints_per_pool{lb_algorithm=\"round_robin\",route=\"routeA\"}"))
492492
})
493493

494494
It("does nothing when deleting a non-existent label combination", func() {
495495
m.CaptureEndpointsPerPool(5, "routeA", "round_robin")
496496

497-
m.DeleteEndpointsPerPool("routeX", "round_robin")
498-
Expect(getMetrics(r.Port())).To(ContainSubstring("endpoints_per_pool{LB_algorithm=\"round_robin\",route=\"routeA\"} 5"))
497+
m.UncaptureEndpointsPerPool("routeX", "round_robin")
498+
Expect(getMetrics(r.Port())).To(ContainSubstring("endpoints_per_pool{lb_algorithm=\"round_robin\",route=\"routeA\"} 5"))
499499
})
500500
})
501501
})

src/code.cloudfoundry.org/gorouter/registry/registry.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ func (r *RouteRegistry) Register(uri route.Uri, endpoint *route.Endpoint) {
104104
}
105105

106106
switch poolPutResult {
107-
108107
case route.EndpointAdded:
109108
if r.logger.Enabled(context.Background(), slog.LevelInfo) {
110109
r.logger.Info("endpoint-registered", buildSlogAttrs(uri, endpoint)...)
@@ -424,6 +423,8 @@ func (r *RouteRegistry) pruneStaleDroplets() {
424423

425424
r.byURI.EachNodeWithPool(func(t *container.Trie) {
426425
endpoints := t.Pool.PruneEndpoints()
426+
lbAlgo := t.Pool.LoadBalancingAlgorithm
427+
uri := t.ToPath()
427428
if r.EmptyPoolResponseCode503 && r.EmptyPoolTimeout > 0 {
428429
if time.Since(t.Pool.LastUpdated()) > r.EmptyPoolTimeout {
429430
t.Snip()
@@ -442,11 +443,19 @@ func (r *RouteRegistry) pruneStaleDroplets() {
442443
isolationSegment = "-"
443444
}
444445
r.logger.Info("pruned-route",
445-
slog.String("uri", t.ToPath()),
446+
slog.String("uri", uri),
446447
slog.Any("endpoints", addresses),
447448
slog.String("isolation_segment", isolationSegment),
448449
)
449450
r.reporter.CaptureRoutesPruned(uint64(len(endpoints)))
451+
452+
if lbAlgo == config.LOAD_BALANCE_HB {
453+
if t.Pool == nil || t.Pool.NumEndpoints() == 0 {
454+
r.reporter.UncaptureEndpointsPerPool(uri, config.LOAD_BALANCE_HB)
455+
} else {
456+
r.reporter.CaptureEndpointsPerPool(t.Pool.NumEndpoints(), uri, config.LOAD_BALANCE_HB)
457+
}
458+
}
450459
}
451460
})
452461
}
@@ -465,17 +474,23 @@ func (r *RouteRegistry) freshenRoutes() {
465474
})
466475
}
467476

477+
// reportEndpointsPerPool reports the endpoints_per_pool metric for hash-based (HB) route pools.
478+
// For non-HB endpoints, it deletes any stale HB metric entries (e.g. after switching away from HB).
468479
func (r *RouteRegistry) reportEndpointsPerPool(uri route.Uri, endpoint *route.Endpoint) {
469-
if endpoint.LoadBalancingAlgorithm == config.LOAD_BALANCE_HB {
470-
pool := r.byURI.Find(uri.RouteKey())
471-
if pool == nil || pool.NumEndpoints() == 0 {
472-
r.reporter.DeleteEndpointsPerPool(string(uri), config.LOAD_BALANCE_HB)
473-
return
474-
}
475-
r.reporter.CaptureEndpointsPerPool(pool.NumEndpoints(), string(uri), config.LOAD_BALANCE_HB)
480+
if endpoint.LoadBalancingAlgorithm != config.LOAD_BALANCE_HB {
481+
r.reporter.UncaptureEndpointsPerPool(string(uri), config.LOAD_BALANCE_HB)
482+
return
483+
}
484+
485+
r.RLock()
486+
pool := r.byURI.Find(uri.RouteKey())
487+
r.RUnlock()
488+
489+
if pool == nil || pool.NumEndpoints() == 0 {
490+
r.reporter.UncaptureEndpointsPerPool(string(uri), config.LOAD_BALANCE_HB)
491+
return
476492
}
477-
// Delete stale entries for routes that have switched from HB to another load balancing algorithm.
478-
r.reporter.DeleteEndpointsPerPool(uri.String(), config.LOAD_BALANCE_HB)
493+
r.reporter.CaptureEndpointsPerPool(pool.NumEndpoints(), string(uri), config.LOAD_BALANCE_HB)
479494
}
480495

481496
func splitHostAndContextPath(uri route.Uri) (string, string) {

0 commit comments

Comments
 (0)