Skip to content

Commit 816aab5

Browse files
author
Codex
committed
Rebase metrics endpoint onto upstream runtime metrics
Preserve the new runtime traffic and latency sources from upstream main, then layer the old endpoint/Prometheus surface back on top with portable local tests for the HTTP/auth/config slices. Constraint: Local macOS environment cannot perform Linux/eBPF package builds or generated-bpf compilation Rejected: Re-apply old metrics branch wholesale | it deleted upstream runtime_stats and latency_probe Rejected: Keep endpoint HTTP helpers inside pkg/metrics | prevented portable local tests because collectors pull in Linux-only control dependencies Confidence: medium Scope-risk: moderate Reversibility: clean Directive: Keep PR#968 runtime sources as the single source of truth and extend Prometheus collectors around them instead of duplicating runtime logic Tested: go test ./common Tested: go test ./pkg/metricshttp Tested: (cd cmd && go test endpoint_config.go run_test.go) Not-tested: Full ./control ./pkg/metrics ./cmd build on Linux with generated eBPF artifacts Not-tested: GitHub Actions kernel/eBPF workflows
1 parent 39831d5 commit 816aab5

29 files changed

Lines changed: 1856 additions & 29 deletions

cmd/endpoint_config.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* SPDX-License-Identifier: AGPL-3.0-only
3+
* Copyright (c) 2022-2025, daeuniverse Organization <dae@v2raya.org>
4+
*/
5+
6+
package cmd
7+
8+
import (
9+
"fmt"
10+
"os"
11+
12+
"github.com/daeuniverse/dae/common"
13+
"github.com/daeuniverse/dae/config"
14+
"github.com/daeuniverse/dae/pkg/metricshttp"
15+
"github.com/sirupsen/logrus"
16+
)
17+
18+
func endpointConfigFromGlobal(conf *config.Config, log *logrus.Logger) metricshttp.EndpointConfig {
19+
cfg := metricshttp.EndpointConfig{
20+
ListenAddress: conf.Global.EndpointListenAddress,
21+
Username: conf.Global.EndpointUsername,
22+
Password: conf.Global.EndpointPassword,
23+
TlsCertificate: conf.Global.EndpointTlsCertificate,
24+
TlsKey: conf.Global.EndpointTlsKey,
25+
PrometheusEnabled: conf.Global.EndpointPrometheusEnabled,
26+
PrometheusPath: conf.Global.EndpointPrometheusPath,
27+
PprofEnabled: conf.Global.PprofPort != 0,
28+
}
29+
if cfg.ListenAddress == "" && conf.Global.PprofPort != 0 {
30+
log.Warnln("pprof_port is deprecated, please use endpoint_listen_address instead")
31+
cfg.ListenAddress = fmt.Sprintf("localhost:%d", conf.Global.PprofPort)
32+
}
33+
return cfg
34+
}
35+
36+
func endpointConfigChanged(a, b metricshttp.EndpointConfig) bool {
37+
return a != b
38+
}
39+
40+
func validateEndpointTLSFiles(cfg metricshttp.EndpointConfig) error {
41+
if cfg.TlsCertificate == "" && cfg.TlsKey == "" {
42+
return nil
43+
}
44+
if cfg.TlsCertificate == "" || cfg.TlsKey == "" {
45+
return fmt.Errorf("endpoint_tls_certificate and endpoint_tls_key must be configured together")
46+
}
47+
48+
certFile, err := os.Open(cfg.TlsCertificate)
49+
if err != nil {
50+
return fmt.Errorf("cannot open endpoint_tls_certificate '%s': %w", cfg.TlsCertificate, err)
51+
}
52+
certFi, err := certFile.Stat()
53+
certFile.Close()
54+
if err != nil {
55+
return fmt.Errorf("cannot stat endpoint_tls_certificate '%s': %w", cfg.TlsCertificate, err)
56+
}
57+
if err = common.ValidateFilePermissionAllowed(cfg.TlsCertificate, certFi, 0o640, 0o644); err != nil {
58+
return fmt.Errorf("invalid endpoint_tls_certificate: %w", err)
59+
}
60+
61+
keyFile, err := os.Open(cfg.TlsKey)
62+
if err != nil {
63+
return fmt.Errorf("cannot open endpoint_tls_key '%s': %w", cfg.TlsKey, err)
64+
}
65+
keyFi, err := keyFile.Stat()
66+
keyFile.Close()
67+
if err != nil {
68+
return fmt.Errorf("cannot stat endpoint_tls_key '%s': %w", cfg.TlsKey, err)
69+
}
70+
if err = common.ValidateFilePermissionAllowed(cfg.TlsKey, keyFi, 0o600); err != nil {
71+
return fmt.Errorf("invalid endpoint_tls_key: %w", err)
72+
}
73+
return nil
74+
}

cmd/run.go

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ import (
3737
"github.com/daeuniverse/dae/control"
3838
"github.com/daeuniverse/dae/pkg/config_parser"
3939
"github.com/daeuniverse/dae/pkg/logger"
40+
"github.com/daeuniverse/dae/pkg/metrics"
41+
"github.com/daeuniverse/dae/pkg/metricshttp"
4042
"github.com/mohae/deepcopy"
4143
"github.com/okzk/sdnotify"
4244
"github.com/sirupsen/logrus"
@@ -128,18 +130,35 @@ func Run(log *logrus.Logger, conf *config.Config, externGeoDataDirs []string) (e
128130
// Remove AbortFile at beginning.
129131
_ = os.Remove(AbortFile)
130132

133+
endpointCfg := endpointConfigFromGlobal(conf, log)
134+
if err = validateEndpointTLSFiles(endpointCfg); err != nil {
135+
return fmt.Errorf("invalid endpoint tls config: %w", err)
136+
}
137+
131138
// New ControlPlane.
132139
c, err := newControlPlane(log, nil, nil, conf, externGeoDataDirs)
133140
if err != nil {
134141
return err
135142
}
136143

137-
var pprofServer *http.Server
138-
if conf.Global.PprofPort != 0 {
139-
pprofAddr := fmt.Sprintf("localhost:%d", conf.Global.PprofPort)
140-
pprofServer = &http.Server{Addr: pprofAddr, Handler: nil}
141-
go pprofServer.ListenAndServe()
144+
metricsState := metrics.NewState()
145+
metricsState.SetControlPlane(c)
146+
metricsRegistry := metrics.NewRegistry(metricsState)
147+
148+
var endpointServer *http.Server
149+
startEndpointServer := func(cfg metricshttp.EndpointConfig) {
150+
if cfg.ListenAddress == "" {
151+
endpointServer = nil
152+
return
153+
}
154+
endpointServer = metricshttp.NewEndpointServer(cfg, metricsRegistry)
155+
go func(server *http.Server, endpointCfg metricshttp.EndpointConfig) {
156+
if e := metricshttp.StartEndpointServer(server, endpointCfg); e != nil && !errors.Is(e, http.ErrServerClosed) {
157+
log.WithError(e).Errorln("Endpoint server stopped with error")
158+
}
159+
}(endpointServer, cfg)
142160
}
161+
startEndpointServer(endpointCfg)
143162

144163
// Serve tproxy TCP/UDP server util signals.
145164
var listener *control.Listener
@@ -252,6 +271,16 @@ loop:
252271
log.SetOutput(oldLogOutput) // FIXME: THIS IS A HACK.
253272
logrus.SetOutput(oldLogOutput)
254273

274+
newEndpointCfg := endpointConfigFromGlobal(newConf, log)
275+
if err = validateEndpointTLSFiles(newEndpointCfg); err != nil {
276+
log.WithFields(logrus.Fields{
277+
"err": err,
278+
}).Errorln("[Reload] Failed to reload")
279+
sdnotify.Ready()
280+
_ = os.WriteFile(SignalProgressFilePath, append([]byte{consts.ReloadError}, []byte("\n"+err.Error())...), 0644)
281+
continue
282+
}
283+
255284
// New control plane.
256285
obj := c.EjectBpf()
257286
var dnsCache map[string]*control.DnsCache
@@ -263,7 +292,7 @@ loop:
263292
if err := c.StopDNSListener(); err != nil {
264293
log.Warnf("[Reload] Failed to stop old DNS listener: %v", err)
265294
}
266-
295+
267296
log.Warnln("[Reload] Load new control plane")
268297
newC, err := newControlPlane(log, obj, dnsCache, newConf, externGeoDataDirs)
269298
if err != nil {
@@ -295,21 +324,20 @@ loop:
295324
c = newC
296325
conf = newConf
297326
reloading = true
327+
metricsState.SetControlPlane(newC)
298328

299329
// Ready to close.
300330
if abortConnections {
301331
oldC.AbortConnections()
302332
}
303333
oldC.Close()
304334

305-
if pprofServer != nil {
306-
pprofServer.Shutdown(context.Background())
307-
pprofServer = nil
308-
}
309-
if newConf.Global.PprofPort != 0 {
310-
pprofAddr := fmt.Sprintf("localhost:%d", conf.Global.PprofPort)
311-
pprofServer = &http.Server{Addr: pprofAddr, Handler: nil}
312-
go pprofServer.ListenAndServe()
335+
if endpointConfigChanged(endpointCfg, newEndpointCfg) {
336+
if endpointServer != nil {
337+
_ = endpointServer.Shutdown(context.Background())
338+
}
339+
endpointCfg = newEndpointCfg
340+
startEndpointServer(endpointCfg)
313341
}
314342
case syscall.SIGHUP:
315343
// Ignore.
@@ -321,6 +349,9 @@ loop:
321349
}
322350
defer os.Remove(PidFilePath)
323351
defer control.GetDaeNetns().Close()
352+
if endpointServer != nil {
353+
_ = endpointServer.Shutdown(context.Background())
354+
}
324355
if e := c.Close(); e != nil {
325356
return fmt.Errorf("close control plane: %w", e)
326357
}

cmd/run_test.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* SPDX-License-Identifier: AGPL-3.0-only
3+
* Copyright (c) 2022-2025, daeuniverse Organization <dae@v2raya.org>
4+
*/
5+
6+
package cmd
7+
8+
import (
9+
"os"
10+
"path/filepath"
11+
"testing"
12+
13+
"github.com/daeuniverse/dae/config"
14+
"github.com/daeuniverse/dae/pkg/metricshttp"
15+
"github.com/sirupsen/logrus"
16+
)
17+
18+
func writeEndpointFile(t *testing.T, name string, mode os.FileMode) string {
19+
t.Helper()
20+
path := filepath.Join(t.TempDir(), name)
21+
if err := os.WriteFile(path, []byte("x"), 0o600); err != nil {
22+
t.Fatalf("write endpoint file: %v", err)
23+
}
24+
if err := os.Chmod(path, mode); err != nil {
25+
t.Fatalf("chmod endpoint file: %v", err)
26+
}
27+
return path
28+
}
29+
30+
func TestEndpointConfigFromGlobalUsesEndpointSettings(t *testing.T) {
31+
conf := &config.Config{}
32+
conf.Global.EndpointListenAddress = "127.0.0.1:5556"
33+
conf.Global.EndpointUsername = "dae"
34+
conf.Global.EndpointPassword = "secret"
35+
conf.Global.EndpointTlsCertificate = "/tmp/cert.pem"
36+
conf.Global.EndpointTlsKey = "/tmp/key.pem"
37+
conf.Global.EndpointPrometheusEnabled = true
38+
conf.Global.EndpointPrometheusPath = "/custom-metrics"
39+
conf.Global.PprofPort = 0
40+
41+
cfg := endpointConfigFromGlobal(conf, logrus.New())
42+
if cfg.ListenAddress != "127.0.0.1:5556" {
43+
t.Fatalf("unexpected listen address: got=%q", cfg.ListenAddress)
44+
}
45+
if cfg.Username != "dae" || cfg.Password != "secret" {
46+
t.Fatalf("unexpected credentials: got=%q/%q", cfg.Username, cfg.Password)
47+
}
48+
if !cfg.PrometheusEnabled || cfg.PrometheusPath != "/custom-metrics" {
49+
t.Fatalf("unexpected prometheus settings: enabled=%v path=%q", cfg.PrometheusEnabled, cfg.PrometheusPath)
50+
}
51+
if cfg.PprofEnabled {
52+
t.Fatal("pprof should be disabled when pprof_port is zero")
53+
}
54+
}
55+
56+
func TestEndpointConfigFromGlobalFallsBackToPprofPort(t *testing.T) {
57+
conf := &config.Config{}
58+
conf.Global.PprofPort = 6060
59+
60+
cfg := endpointConfigFromGlobal(conf, logrus.New())
61+
if cfg.ListenAddress != "localhost:6060" {
62+
t.Fatalf("unexpected pprof fallback address: got=%q want=%q", cfg.ListenAddress, "localhost:6060")
63+
}
64+
if !cfg.PprofEnabled {
65+
t.Fatal("pprof should be enabled when pprof_port is non-zero")
66+
}
67+
}
68+
69+
func TestValidateEndpointTLSFilesRequiresPair(t *testing.T) {
70+
err := validateEndpointTLSFiles(endpointConfigFromGlobal(&config.Config{}, logrus.New()))
71+
if err != nil {
72+
t.Fatalf("empty endpoint tls config should pass: %v", err)
73+
}
74+
75+
err = validateEndpointTLSFiles(metricshttp.EndpointConfig{
76+
TlsCertificate: "/tmp/cert.pem",
77+
})
78+
if err == nil {
79+
t.Fatal("expected certificate-only config to fail")
80+
}
81+
}
82+
83+
func TestValidateEndpointTLSFilesChecksPermissions(t *testing.T) {
84+
cert := writeEndpointFile(t, "cert.pem", 0o640)
85+
key := writeEndpointFile(t, "key.pem", 0o600)
86+
87+
if err := validateEndpointTLSFiles(metricshttp.EndpointConfig{
88+
TlsCertificate: cert,
89+
TlsKey: key,
90+
}); err != nil {
91+
t.Fatalf("expected valid certificate/key permissions to pass: %v", err)
92+
}
93+
94+
tooOpenCert := writeEndpointFile(t, "cert-open.pem", 0o666)
95+
err := validateEndpointTLSFiles(metricshttp.EndpointConfig{
96+
TlsCertificate: tooOpenCert,
97+
TlsKey: key,
98+
})
99+
if err == nil {
100+
t.Fatal("expected too-open certificate permissions to fail")
101+
}
102+
}

common/file_permission.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
* SPDX-License-Identifier: AGPL-3.0-only
3+
* Copyright (c) 2022-2025, daeuniverse Organization <dae@v2raya.org>
4+
*/
5+
6+
package common
7+
8+
import (
9+
"fmt"
10+
"os"
11+
"sort"
12+
"strings"
13+
)
14+
15+
func ValidateFilePermissionNotTooOpen(path string, fi os.FileInfo) error {
16+
if fi.IsDir() {
17+
return fmt.Errorf("cannot read a directory: %v", path)
18+
}
19+
if fi.Mode()&0o037 > 0 {
20+
return fmt.Errorf("permissions %04o for '%v' are too open; requires the file is NOT writable by the same group and NOT accessible by others; suggest 0640 or 0600", fi.Mode()&0o777, path)
21+
}
22+
return nil
23+
}
24+
25+
func ValidateFilePermissionAllowed(path string, fi os.FileInfo, allowedModes ...os.FileMode) error {
26+
if fi.IsDir() {
27+
return fmt.Errorf("cannot read a directory: %v", path)
28+
}
29+
perm := fi.Mode().Perm()
30+
for _, mode := range allowedModes {
31+
if perm == mode.Perm() {
32+
return nil
33+
}
34+
}
35+
if len(allowedModes) == 0 {
36+
return fmt.Errorf("permissions %04o for '%v' are invalid", perm, path)
37+
}
38+
allowed := make([]string, 0, len(allowedModes))
39+
for _, mode := range allowedModes {
40+
allowed = append(allowed, fmt.Sprintf("%04o", mode.Perm()))
41+
}
42+
sort.Strings(allowed)
43+
return fmt.Errorf("permissions %04o for '%v' are invalid; allowed: %s", perm, path, strings.Join(allowed, ", "))
44+
}

0 commit comments

Comments
 (0)