Skip to content

Commit 1cd1ad6

Browse files
feat(metrics)!: add "server" label to per-worker series
Workers declared in distinct php_server blocks can share a name now that the per-scope routing landed; without a second label dimension, their metric series collide on the existing "worker" label. This adds a sibling "server" label (resolved via ScopeLabel(w.backgroundScope) at every call site) so each (server, worker) pair stays on its own series. BREAKING CHANGE: every Metrics interface method that took (name string) now takes (server, name string). Embedders implementing frankenphp.Metrics need to widen their signatures; PrometheusMetrics and the null impl are updated in-tree. Mirrors the shape of #1376 (which introduced the "worker" label the same way).
1 parent 0e7fa25 commit 1cd1ad6

7 files changed

Lines changed: 89 additions & 79 deletions

File tree

docs/metrics.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@ When [Caddy metrics](https://caddyserver.com/docs/metrics) are enabled, FrankenP
1313
- `frankenphp_total_threads`: The total number of PHP threads.
1414
- `frankenphp_busy_threads`: The number of PHP threads currently processing a request (running workers always consume a thread).
1515
- `frankenphp_queue_depth`: The number of regular queued requests
16-
- `frankenphp_total_workers{worker="[worker_name]"}`: The total number of workers.
17-
- `frankenphp_busy_workers{worker="[worker_name]"}`: The number of workers currently processing a request.
18-
- `frankenphp_worker_request_time{worker="[worker_name]"}`: The time spent processing requests by all workers.
19-
- `frankenphp_worker_request_count{worker="[worker_name]"}`: The number of requests processed by all workers.
20-
- `frankenphp_ready_workers{worker="[worker_name]"}`: The number of workers that have called `frankenphp_handle_request` at least once.
21-
- `frankenphp_worker_crashes{worker="[worker_name]"}`: The number of times a worker has unexpectedly terminated.
22-
- `frankenphp_worker_restarts{worker="[worker_name]"}`: The number of times a worker has been deliberately restarted.
23-
- `frankenphp_worker_queue_depth{worker="[worker_name]"}`: The number of queued requests.
16+
- `frankenphp_total_workers{server="[server]",worker="[worker_name]"}`: The total number of workers.
17+
- `frankenphp_busy_workers{server="[server]",worker="[worker_name]"}`: The number of workers currently processing a request.
18+
- `frankenphp_worker_request_time{server="[server]",worker="[worker_name]"}`: The time spent processing requests by all workers.
19+
- `frankenphp_worker_request_count{server="[server]",worker="[worker_name]"}`: The number of requests processed by all workers.
20+
- `frankenphp_ready_workers{server="[server]",worker="[worker_name]"}`: The number of workers that have called `frankenphp_handle_request` at least once.
21+
- `frankenphp_worker_crashes{server="[server]",worker="[worker_name]"}`: The number of times a worker has unexpectedly terminated.
22+
- `frankenphp_worker_restarts{server="[server]",worker="[worker_name]"}`: The number of times a worker has been deliberately restarted.
23+
- `frankenphp_worker_queue_depth{server="[server]",worker="[worker_name]"}`: The number of queued requests.
2424

25-
For worker metrics, the `[worker_name]` placeholder is replaced by the worker name in the Caddyfile, otherwise the absolute path of the worker file will be used.
25+
For worker metrics, the `[worker_name]` placeholder is replaced by the worker name in the Caddyfile, otherwise the absolute path of the worker file will be used. The `[server]` label identifies the `php_server` block that declared the worker; the Caddy module resolves it to the first host of the route's host matcher (e.g. `api.example.com`), falling back to the user-set Caddy server name and finally to the first listener address. Same-named workers in distinct `php_server` blocks therefore stay on distinct series.

frankenphp.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,15 +182,15 @@ func calculateMaxThreads(opt *opt) (numWorkers int, _ error) {
182182
// Register the expected worker count for metrics too: without
183183
// this, a bg-worker-only deployment never initialises
184184
// totalWorkers, and StartWorker calls become silent no-ops.
185-
metrics.TotalWorkers(w.name, extra)
185+
metrics.TotalWorkers(ScopeLabel(w.backgroundScope), w.name, extra)
186186
continue
187187
}
188188

189189
if w.num <= 0 {
190190
// https://github.com/php/frankenphp/issues/126
191191
opt.workers[i].num = maxProcs
192192
}
193-
metrics.TotalWorkers(w.name, w.num)
193+
metrics.TotalWorkers(ScopeLabel(w.backgroundScope), w.name, w.num)
194194

195195
numWorkers += opt.workers[i].num
196196

metrics.go

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -16,44 +16,49 @@ const (
1616

1717
type StopReason int
1818

19+
// Metrics is the worker-level instrumentation surface. Every method that
20+
// identifies a specific worker takes a (server, name) pair: server is the
21+
// per-php_server label resolved via ScopeLabel, name is the worker name.
22+
// The pair is what disambiguates same-named workers declared in distinct
23+
// php_server blocks.
1924
type Metrics interface {
2025
// StartWorker collects started workers
21-
StartWorker(name string)
26+
StartWorker(server, name string)
2227
// ReadyWorker collects ready workers
23-
ReadyWorker(name string)
28+
ReadyWorker(server, name string)
2429
// StopWorker collects stopped workers
25-
StopWorker(name string, reason StopReason)
30+
StopWorker(server, name string, reason StopReason)
2631
// TotalWorkers collects expected workers
27-
TotalWorkers(name string, num int)
32+
TotalWorkers(server, name string, num int)
2833
// TotalThreads collects total threads
2934
TotalThreads(num int)
3035
// StartRequest collects started requests
3136
StartRequest()
3237
// StopRequest collects stopped requests
3338
StopRequest()
3439
// StopWorkerRequest collects stopped worker requests
35-
StopWorkerRequest(name string, duration time.Duration)
40+
StopWorkerRequest(server, name string, duration time.Duration)
3641
// StartWorkerRequest collects started worker requests
37-
StartWorkerRequest(name string)
42+
StartWorkerRequest(server, name string)
3843
Shutdown()
39-
QueuedWorkerRequest(name string)
40-
DequeuedWorkerRequest(name string)
44+
QueuedWorkerRequest(server, name string)
45+
DequeuedWorkerRequest(server, name string)
4146
QueuedRequest()
4247
DequeuedRequest()
4348
}
4449

4550
type nullMetrics struct{}
4651

47-
func (n nullMetrics) StartWorker(string) {
52+
func (n nullMetrics) StartWorker(string, string) {
4853
}
4954

50-
func (n nullMetrics) ReadyWorker(string) {
55+
func (n nullMetrics) ReadyWorker(string, string) {
5156
}
5257

53-
func (n nullMetrics) StopWorker(string, StopReason) {
58+
func (n nullMetrics) StopWorker(string, string, StopReason) {
5459
}
5560

56-
func (n nullMetrics) TotalWorkers(string, int) {
61+
func (n nullMetrics) TotalWorkers(string, string, int) {
5762
}
5863

5964
func (n nullMetrics) TotalThreads(int) {
@@ -65,18 +70,18 @@ func (n nullMetrics) StartRequest() {
6570
func (n nullMetrics) StopRequest() {
6671
}
6772

68-
func (n nullMetrics) StopWorkerRequest(string, time.Duration) {
73+
func (n nullMetrics) StopWorkerRequest(string, string, time.Duration) {
6974
}
7075

71-
func (n nullMetrics) StartWorkerRequest(string) {
76+
func (n nullMetrics) StartWorkerRequest(string, string) {
7277
}
7378

7479
func (n nullMetrics) Shutdown() {
7580
}
7681

77-
func (n nullMetrics) QueuedWorkerRequest(string) {}
82+
func (n nullMetrics) QueuedWorkerRequest(string, string) {}
7883

79-
func (n nullMetrics) DequeuedWorkerRequest(string) {}
84+
func (n nullMetrics) DequeuedWorkerRequest(string, string) {}
8085

8186
func (n nullMetrics) QueuedRequest() {}
8287
func (n nullMetrics) DequeuedRequest() {}
@@ -97,54 +102,54 @@ type PrometheusMetrics struct {
97102
mu sync.Mutex
98103
}
99104

100-
func (m *PrometheusMetrics) StartWorker(name string) {
105+
func (m *PrometheusMetrics) StartWorker(server, name string) {
101106
m.busyThreads.Inc()
102107

103108
// tests do not register workers before starting them
104109
if m.totalWorkers == nil {
105110
return
106111
}
107112

108-
m.totalWorkers.WithLabelValues(name).Inc()
113+
m.totalWorkers.WithLabelValues(server, name).Inc()
109114
}
110115

111-
func (m *PrometheusMetrics) ReadyWorker(name string) {
116+
func (m *PrometheusMetrics) ReadyWorker(server, name string) {
112117
if m.totalWorkers == nil {
113118
return
114119
}
115120

116-
m.readyWorkers.WithLabelValues(name).Inc()
121+
m.readyWorkers.WithLabelValues(server, name).Inc()
117122
}
118123

119-
func (m *PrometheusMetrics) StopWorker(name string, reason StopReason) {
124+
func (m *PrometheusMetrics) StopWorker(server, name string, reason StopReason) {
120125
m.busyThreads.Dec()
121126

122127
// tests do not register workers before starting them
123128
if m.totalWorkers == nil {
124129
return
125130
}
126131

127-
m.totalWorkers.WithLabelValues(name).Dec()
132+
m.totalWorkers.WithLabelValues(server, name).Dec()
128133

129134
// only decrement readyWorkers if the worker actually reached frankenphp_handle_request
130135
if reason != StopReasonBootFailure {
131-
m.readyWorkers.WithLabelValues(name).Dec()
136+
m.readyWorkers.WithLabelValues(server, name).Dec()
132137
}
133138

134139
switch reason {
135140
case StopReasonCrash, StopReasonBootFailure:
136-
m.workerCrashes.WithLabelValues(name).Inc()
141+
m.workerCrashes.WithLabelValues(server, name).Inc()
137142
case StopReasonRestart:
138-
m.workerRestarts.WithLabelValues(name).Inc()
143+
m.workerRestarts.WithLabelValues(server, name).Inc()
139144
}
140145
}
141146

142-
func (m *PrometheusMetrics) TotalWorkers(string, int) {
147+
func (m *PrometheusMetrics) TotalWorkers(string, string, int) {
143148
m.mu.Lock()
144149
defer m.mu.Unlock()
145150

146151
const ns, sub = "frankenphp", "worker"
147-
basicLabels := []string{"worker"}
152+
basicLabels := []string{"server", "worker"}
148153

149154
if m.totalWorkers == nil {
150155
m.totalWorkers = prometheus.NewGaugeVec(prometheus.GaugeOpts{
@@ -257,35 +262,35 @@ func (m *PrometheusMetrics) StopRequest() {
257262
m.busyThreads.Dec()
258263
}
259264

260-
func (m *PrometheusMetrics) StopWorkerRequest(name string, duration time.Duration) {
265+
func (m *PrometheusMetrics) StopWorkerRequest(server, name string, duration time.Duration) {
261266
if m.workerRequestTime == nil {
262267
return
263268
}
264269

265-
m.workerRequestCount.WithLabelValues(name).Inc()
266-
m.busyWorkers.WithLabelValues(name).Dec()
267-
m.workerRequestTime.WithLabelValues(name).Add(duration.Seconds())
270+
m.workerRequestCount.WithLabelValues(server, name).Inc()
271+
m.busyWorkers.WithLabelValues(server, name).Dec()
272+
m.workerRequestTime.WithLabelValues(server, name).Add(duration.Seconds())
268273
}
269274

270-
func (m *PrometheusMetrics) StartWorkerRequest(name string) {
275+
func (m *PrometheusMetrics) StartWorkerRequest(server, name string) {
271276
if m.busyWorkers == nil {
272277
return
273278
}
274-
m.busyWorkers.WithLabelValues(name).Inc()
279+
m.busyWorkers.WithLabelValues(server, name).Inc()
275280
}
276281

277-
func (m *PrometheusMetrics) QueuedWorkerRequest(name string) {
282+
func (m *PrometheusMetrics) QueuedWorkerRequest(server, name string) {
278283
if m.workerQueueDepth == nil {
279284
return
280285
}
281-
m.workerQueueDepth.WithLabelValues(name).Inc()
286+
m.workerQueueDepth.WithLabelValues(server, name).Inc()
282287
}
283288

284-
func (m *PrometheusMetrics) DequeuedWorkerRequest(name string) {
289+
func (m *PrometheusMetrics) DequeuedWorkerRequest(server, name string) {
285290
if m.workerQueueDepth == nil {
286291
return
287292
}
288-
m.workerQueueDepth.WithLabelValues(name).Dec()
293+
m.workerQueueDepth.WithLabelValues(server, name).Dec()
289294
}
290295

291296
func (m *PrometheusMetrics) QueuedRequest() {

metrics_test.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ func TestPrometheusMetrics_TotalWorkers(t *testing.T) {
3232
require.Nil(t, m.workerRequestTime)
3333
require.Nil(t, m.workerRequestCount)
3434

35-
m.TotalWorkers("test_worker", 2)
35+
m.TotalWorkers("test_server", "test_worker", 2)
3636

3737
require.NotNil(t, m.totalWorkers)
3838
require.NotNil(t, m.busyWorkers)
@@ -45,8 +45,8 @@ func TestPrometheusMetrics_TotalWorkers(t *testing.T) {
4545

4646
func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) {
4747
m := createPrometheusMetrics()
48-
m.TotalWorkers("test_worker", 2)
49-
m.StopWorkerRequest("test_worker", 2*time.Second)
48+
m.TotalWorkers("test_server", "test_worker", 2)
49+
m.StopWorkerRequest("test_server", "test_worker", 2*time.Second)
5050

5151
inputs := []struct {
5252
name string
@@ -62,7 +62,7 @@ func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) {
6262
# TYPE frankenphp_worker_request_count counter
6363
`,
6464
expect: `
65-
frankenphp_worker_request_count{worker="test_worker"} 1
65+
frankenphp_worker_request_count{server="test_server",worker="test_worker"} 1
6666
`,
6767
},
6868
{
@@ -73,7 +73,7 @@ func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) {
7373
# TYPE frankenphp_busy_workers gauge
7474
`,
7575
expect: `
76-
frankenphp_busy_workers{worker="test_worker"} -1
76+
frankenphp_busy_workers{server="test_server",worker="test_worker"} -1
7777
`,
7878
},
7979
{
@@ -84,7 +84,7 @@ func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) {
8484
# TYPE frankenphp_worker_request_time counter
8585
`,
8686
expect: `
87-
frankenphp_worker_request_time{worker="test_worker"} 2
87+
frankenphp_worker_request_time{server="test_server",worker="test_worker"} 2
8888
`,
8989
},
9090
}
@@ -99,8 +99,8 @@ func TestPrometheusMetrics_StopWorkerRequest(t *testing.T) {
9999

100100
func TestPrometheusMetrics_StartWorkerRequest(t *testing.T) {
101101
m := createPrometheusMetrics()
102-
m.TotalWorkers("test_worker", 2)
103-
m.StartWorkerRequest("test_worker")
102+
m.TotalWorkers("test_server", "test_worker", 2)
103+
m.StartWorkerRequest("test_server", "test_worker")
104104

105105
inputs := []struct {
106106
name string
@@ -116,7 +116,7 @@ func TestPrometheusMetrics_StartWorkerRequest(t *testing.T) {
116116
# TYPE frankenphp_busy_workers gauge
117117
`,
118118
expect: `
119-
frankenphp_busy_workers{worker="test_worker"} 1
119+
frankenphp_busy_workers{server="test_server",worker="test_worker"} 1
120120
`,
121121
},
122122
}
@@ -131,8 +131,8 @@ func TestPrometheusMetrics_StartWorkerRequest(t *testing.T) {
131131

132132
func TestPrometheusMetrics_TestStopReasonCrash(t *testing.T) {
133133
m := createPrometheusMetrics()
134-
m.TotalWorkers("test_worker", 2)
135-
m.StopWorker("test_worker", StopReasonCrash)
134+
m.TotalWorkers("test_server", "test_worker", 2)
135+
m.StopWorker("test_server", "test_worker", StopReasonCrash)
136136

137137
inputs := []struct {
138138
name string
@@ -159,7 +159,7 @@ func TestPrometheusMetrics_TestStopReasonCrash(t *testing.T) {
159159
# TYPE frankenphp_total_workers gauge
160160
`,
161161
expect: `
162-
frankenphp_total_workers{worker="test_worker"} -1
162+
frankenphp_total_workers{server="test_server",worker="test_worker"} -1
163163
`,
164164
},
165165
{
@@ -170,7 +170,7 @@ func TestPrometheusMetrics_TestStopReasonCrash(t *testing.T) {
170170
# TYPE frankenphp_ready_workers gauge
171171
`,
172172
expect: `
173-
frankenphp_ready_workers{worker="test_worker"} -1
173+
frankenphp_ready_workers{server="test_server",worker="test_worker"} -1
174174
`,
175175
},
176176
{
@@ -181,7 +181,7 @@ func TestPrometheusMetrics_TestStopReasonCrash(t *testing.T) {
181181
# TYPE frankenphp_worker_crashes counter
182182
`,
183183
expect: `
184-
frankenphp_worker_crashes{worker="test_worker"} 1
184+
frankenphp_worker_crashes{server="test_server",worker="test_worker"} 1
185185
`,
186186
},
187187
}

threadbackgroundworker.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ func (handler *backgroundWorkerThread) setupScript() {
136136
if handler.runtimeName == "" {
137137
handler.runtimeName = handler.worker.name
138138
}
139-
metrics.StartWorker(handler.runtimeName)
139+
metrics.StartWorker(ScopeLabel(handler.worker.backgroundScope), handler.runtimeName)
140140

141141
opts := append([]RequestOption(nil), handler.worker.requestOptions...)
142142
C.frankenphp_set_background_worker(C._Bool(true))
@@ -185,9 +185,11 @@ func (handler *backgroundWorkerThread) afterScriptExecution(exitStatus int) {
185185
return
186186
}
187187

188+
server := ScopeLabel(worker.backgroundScope)
189+
188190
// Cooperative exit: re-run, reset backoff.
189191
if exitStatus == 0 {
190-
metrics.StopWorker(runtimeName, StopReasonRestart)
192+
metrics.StopWorker(server, runtimeName, StopReasonRestart)
191193

192194
if globalLogger.Enabled(globalCtx, slog.LevelDebug) {
193195
globalLogger.LogAttrs(globalCtx, slog.LevelDebug, "restarting background worker", slog.String("worker", runtimeName), slog.Int("thread", handler.thread.threadIndex), slog.Int("exit_status", exitStatus))
@@ -197,7 +199,7 @@ func (handler *backgroundWorkerThread) afterScriptExecution(exitStatus int) {
197199
return
198200
}
199201

200-
metrics.StopWorker(runtimeName, StopReasonCrash)
202+
metrics.StopWorker(server, runtimeName, StopReasonCrash)
201203

202204
// Pre-readiness crash: stash metadata for a timing-out ensure(). Post-
203205
// readiness crashes don't update this (the worker already signalled OK).

0 commit comments

Comments
 (0)