Skip to content

Commit 4c728fe

Browse files
grussorussomatnar
andcommitted
Refactored Prometheus integration
Co-authored-by: Matteo Nardelli <matnar@gmail.com>
1 parent 132a7e6 commit 4c728fe

10 files changed

Lines changed: 196 additions & 120 deletions

File tree

cmd/serverledge/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ func main() {
5959
}
6060
node.NodeIdentifier = myKey
6161

62-
go metrics.Init()
62+
metrics.Init()
6363

6464
if config.GetBool(config.TRACING_ENABLED, false) {
6565
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)

docs/metrics.md

Lines changed: 14 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,90 +1,26 @@
11
# Metrics
22

3+
A simple metrics system is available to export/retrieve metrics to/from Prometheus.
4+
35
The metrics system must be enabled via `metrics.enabled`.
4-
If enabled, metrics are exposed at `http://localhost:2112/metrics`.
6+
If enabled, metrics can be scraped by any Prometheus server through the `/metrics`
7+
API of every Serverledge node.
58

6-
You can check that the metrics system is working without starting a Prometheus
7-
server:
9+
If you have a local Serverledge node, you can check that the metrics system is
10+
working without starting a Prometheus server:
811

9-
$ curl 127.0.0.1:2112/metrics
12+
$ curl 127.0.0.1:1323/metrics
1013

14+
If you start a local Prometheus server, you can browse `http://127.0.0.1:9090`.
1115

1216
## Available metrics
1317

14-
A few metrics are currently exposed (just for demonstration purposes):
15-
16-
- `sedge_completed_total`: number of completed invocations (Counter, per function)
17-
- `sedge_exectime`: execution time for each function (Histogram, per function)
18-
19-
20-
## Prometheus Integration
21-
22-
Various Prometheus configurations can be considered to scrape Serverledge
23-
metrics:
24-
25-
- A centralized Prometheus server in the Cloud (likely not scalable...)
26-
- A Prometheus server in each Edge zone
27-
- A Prometheus server in the Cloud with a Prometheus Agent on each Serverledge
28-
node (details below)
29-
30-
### Example: Prometheus Agent + Cloud
31-
32-
As regards the last option, it requires Prometheus instances to use the
33-
following (minimal) configuration.
34-
35-
In the Serverledge node,
36-
Prometheus must be started with `--enable-feature=agent` and the following
37-
lines in the configuration:
38-
39-
remote_write:
40-
- url: "http://<prometheus_cloud_host>:9091/api/v1/write"
41-
42-
Example configuration:
43-
44-
global:
45-
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
46-
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
47-
# scrape_timeout is set to the global default (10s).
48-
49-
# A scrape configuration containing exactly one endpoint to scrape:
50-
scrape_configs:
51-
- job_name: "serverledge"
52-
# metrics_path defaults to '/metrics'
53-
# scheme defaults to 'http'.
54-
static_configs:
55-
- targets: ["<serverledge_host>:2112"]
56-
57-
remote_write:
58-
- url: "http://<prometheus_cloud_host>:9091/api/v1/write"
59-
60-
In the Cloud, Prometheus must be started with `--web.enable-remote-write-receiver`.
61-
Example configuration:
62-
63-
global:
64-
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
65-
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
66-
67-
Example script to launch both Prometheus instances on the same host (for
68-
testing):
69-
70-
docker run \
71-
--name prom \
72-
-d \
73-
-p 9090:9090 \
74-
-v $(pwd)/prometheus.yml:/etc/prometheus/prometheus.yml \
75-
prom/prometheus --enable-feature=agent \
76-
--config.file=/etc/prometheus/prometheus.yml
18+
A few metrics are currently updated (see `internal/metrics/metrics.go`).
7719

78-
docker run \
79-
--name promRemote \
80-
-d\
81-
\
82-
-p 9091:9090 \
83-
-v $(pwd)/prometheus_remote.yml:/etc/prometheus/prometheus.yml \
84-
prom/prometheus --web.enable-remote-write-receiver \
85-
--config.file=/etc/prometheus/prometheus.yml
20+
## Configuration
8621

87-
### References
22+
Relevant configuration options:
8823

89-
- [Prometheus Agent Mode](https://prometheus.io/blog/2021/11/16/agent/)
90-
- [Prometheus + Go](https://prometheus.io/docs/guides/go-application/)
24+
- `metrics.prometheus.host`: Prometheus server IP/hostname (for queries)
25+
- `metrics.prometheus.port`: Prometheus server port (for queries)
26+
- `metrics.retriever.interval`: Interval (in seconds) for metrics retrieval from Prometheus

go.mod

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,15 @@ toolchain go1.22.5
77
require (
88
github.com/LK4D4/trylock v0.0.0-20191027065348-ff7e133a5c54
99
github.com/buger/jsonparser v1.1.1
10-
github.com/cornelk/hashmap v1.0.8
1110
github.com/docker/docker v24.0.5+incompatible
1211
github.com/hexablock/vivaldi v0.0.0-20180727225019-07adad3f2b5f
1312
github.com/labstack/echo/v4 v4.6.1
1413
github.com/labstack/gommon v0.3.0
1514
github.com/lithammer/shortuuid v3.0.0+incompatible
1615
github.com/mikoim/go-loadavg v0.0.0-20150917074714-35ece5f6d547
1716
github.com/prometheus/client_golang v1.14.0
17+
github.com/prometheus/common v0.37.0
18+
github.com/spf13/cast v1.3.0
1819
github.com/spf13/cobra v1.9.1
1920
github.com/spf13/viper v1.4.0
2021
go.etcd.io/etcd/client/v3 v3.5.1
@@ -45,24 +46,25 @@ require (
4546
github.com/google/uuid v1.6.0 // indirect
4647
github.com/hashicorp/hcl v1.0.0 // indirect
4748
github.com/inconshreveable/mousetrap v1.1.0 // indirect
49+
github.com/json-iterator/go v1.1.12 // indirect
4850
github.com/kr/pretty v0.3.0 // indirect
4951
github.com/magiconair/properties v1.8.7 // indirect
5052
github.com/mattn/go-colorable v0.1.8 // indirect
5153
github.com/mattn/go-isatty v0.0.14 // indirect
5254
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
5355
github.com/mitchellh/mapstructure v1.1.2 // indirect
5456
github.com/moby/term v0.5.0 // indirect
57+
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
58+
github.com/modern-go/reflect2 v1.0.2 // indirect
5559
github.com/morikuni/aec v1.0.0 // indirect
5660
github.com/opencontainers/go-digest v1.0.0 // indirect
5761
github.com/opencontainers/image-spec v1.1.0-rc4 // indirect
5862
github.com/pelletier/go-toml v1.9.5 // indirect
5963
github.com/pkg/errors v0.9.1 // indirect
6064
github.com/prometheus/client_model v0.3.0 // indirect
61-
github.com/prometheus/common v0.37.0 // indirect
6265
github.com/prometheus/procfs v0.8.0 // indirect
6366
github.com/rogpeppe/go-internal v1.8.1 // indirect
6467
github.com/spf13/afero v1.2.2 // indirect
65-
github.com/spf13/cast v1.3.0 // indirect
6668
github.com/spf13/jwalterweatherman v1.0.0 // indirect
6769
github.com/spf13/pflag v1.0.6 // indirect
6870
github.com/valyala/bytebufferpool v1.0.0 // indirect

go.sum

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,6 @@ github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
7575
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
7676
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
7777
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
78-
github.com/cornelk/hashmap v1.0.8 h1:nv0AWgw02n+iDcawr5It4CjQIAcdMMKRrs10HOJYlrc=
79-
github.com/cornelk/hashmap v1.0.8/go.mod h1:RfZb7JO3RviW/rT6emczVuC/oxpdz4UsSB2LJSclR1k=
8078
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
8179
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
8280
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -200,10 +198,12 @@ github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:
200198
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
201199
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
202200
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
201+
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
203202
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
204203
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
205204
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
206205
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
206+
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
207207
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
208208
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
209209
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
@@ -250,13 +250,16 @@ github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh
250250
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
251251
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
252252
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
253+
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
253254
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
254255
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
255256
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
257+
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
256258
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
257259
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
258260
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
259261
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
262+
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
260263
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
261264
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
262265
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
@@ -475,6 +478,8 @@ golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4Iltr
475478
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
476479
golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
477480
golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc=
481+
golang.org/x/oauth2 v0.7.0 h1:qe6s0zUXlPX80/dITx3440hWZ7GwMwgDDyrSGTPJG/g=
482+
golang.org/x/oauth2 v0.7.0/go.mod h1:hPLQkd9LyjfXTiRohC/41GhcFqxisoUQ99sCUOHO9x4=
478483
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
479484
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
480485
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -629,6 +634,8 @@ google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
629634
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
630635
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
631636
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
637+
google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
638+
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
632639
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
633640
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
634641
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=

internal/api/server.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"github.com/serverledge-faas/serverledge/internal/metrics"
78
"log"
89
"net/http"
910
"os"
@@ -30,6 +31,14 @@ func StartAPIServer(e *echo.Echo) {
3031
e.GET("/function", GetFunctions)
3132
e.GET("/poll/:reqId", PollAsyncResult)
3233
e.GET("/status", GetServerStatus)
34+
35+
if config.GetBool(config.METRICS_ENABLED, false) {
36+
e.GET("/metrics", func(c echo.Context) error {
37+
metrics.ScrapingHandler.ServeHTTP(c.Response(), c.Request())
38+
return nil
39+
})
40+
}
41+
3342
// Workflow routes
3443
e.POST("/workflow/invoke/:workflow", InvokeWorkflow)
3544
e.POST("/workflow/resume/:workflow", ResumeWorkflow)

internal/config/keys.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,15 @@ const LISTEN_UDP_PORT = "registry.udp.port"
5959
// enable metrics system
6060
const METRICS_ENABLED = "metrics.enabled"
6161

62-
const METRICS_PROMETHEUS_HOST = "metrics.prometheus.host"
62+
// Port used by Prometheus server
6363
const METRICS_PROMETHEUS_PORT = "metrics.prometheus.port"
6464

65+
// Prometheus IP address / hostname
66+
const METRICS_PROMETHEUS_HOST = "metrics.prometheus.host"
67+
68+
// Interval (in seconds) for metrics retriever
69+
const METRICS_RETRIEVER_INTERVAL = "metrics.retriever.interval"
70+
6571
// Scheduling policy to use
6672
// Possible values: "qosaware", "default", "cloudonly"
6773
const SCHEDULING_POLICY = "scheduler.policy"

internal/metrics/metrics.go

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,30 @@ import (
1515

1616
var Enabled bool
1717
var registry = prometheus.NewRegistry()
18-
var nodeIdentifier string
18+
var ScrapingHandler http.Handler = nil
19+
var durationBuckets = []float64{0.002, 0.005, 0.010, 0.02, 0.03, 0.05, 0.1, 0.15, 0.3, 0.6, 1.0}
20+
21+
const (
22+
COMPLETIONS = "completed_total"
23+
EXECUTION_TIME = "execution_time"
24+
)
25+
26+
var (
27+
metricCompletions = promauto.NewCounterVec(prometheus.CounterOpts{
28+
Name: COMPLETIONS,
29+
Help: "Number of completed function invocations",
30+
}, []string{"node", "function"})
31+
metricExecutionTime = promauto.NewHistogramVec(prometheus.HistogramOpts{
32+
Name: EXECUTION_TIME,
33+
Help: "Function duration",
34+
Buckets: durationBuckets,
35+
}, []string{"node", "function"})
36+
)
37+
38+
type RetrievedMetrics struct {
39+
Completions map[string]float64
40+
AvgExecutionTime map[string]float64
41+
}
1942

2043
func Init() {
2144
if config.GetBool(config.METRICS_ENABLED, false) {
@@ -26,43 +49,18 @@ func Init() {
2649
return
2750
}
2851

29-
nodeIdentifier = node.NodeIdentifier
30-
registerGlobalMetrics()
52+
registry.MustRegister(metricCompletions)
53+
registry.MustRegister(metricExecutionTime)
3154

32-
handler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
55+
ScrapingHandler = promhttp.HandlerFor(registry, promhttp.HandlerOpts{
3356
EnableOpenMetrics: true})
34-
http.Handle("/metrics", handler)
35-
err := http.ListenAndServe(":2112", nil)
36-
if err != nil {
37-
log.Printf("Listen and serve terminated with error: %s\n", err)
38-
return
39-
}
40-
}
41-
42-
// Global metrics
43-
var (
44-
CompletedInvocations = promauto.NewCounterVec(prometheus.CounterOpts{
45-
Name: "sedge_completed_total",
46-
Help: "The total number of completed function invocations",
47-
}, []string{"node", "function"})
48-
ExecutionTimes = promauto.NewHistogramVec(prometheus.HistogramOpts{
49-
Name: "sedge_exectime",
50-
Help: "Function duration",
51-
Buckets: durationBuckets,
52-
},
53-
[]string{"node", "function"})
54-
)
5557

56-
var durationBuckets = []float64{0.002, 0.005, 0.010, 0.02, 0.03, 0.05, 0.1, 0.15, 0.3, 0.6, 1.0}
58+
go MetricsRetriever()
59+
}
5760

5861
func AddCompletedInvocation(funcName string) {
59-
CompletedInvocations.With(prometheus.Labels{"function": funcName, "node": nodeIdentifier}).Inc()
62+
metricCompletions.With(prometheus.Labels{"function": funcName, "node": node.NodeIdentifier}).Inc()
6063
}
6164
func AddFunctionDurationValue(funcName string, duration float64) {
62-
ExecutionTimes.With(prometheus.Labels{"function": funcName, "node": nodeIdentifier}).Observe(duration)
63-
}
64-
65-
func registerGlobalMetrics() {
66-
registry.MustRegister(CompletedInvocations)
67-
registry.MustRegister(ExecutionTimes)
65+
metricExecutionTime.With(prometheus.Labels{"function": funcName, "node": node.NodeIdentifier}).Observe(duration)
6866
}

0 commit comments

Comments
 (0)