From 842b2462cb8e98654e8a43d683c3a00aaddacea2 Mon Sep 17 00:00:00 2001 From: ilopezluna Date: Thu, 12 Jun 2025 13:57:16 +0200 Subject: [PATCH 1/7] Adds metrics endpoint --- METRICS.md | 84 ++++++++ README.md | 22 ++ main.go | 12 ++ .../backends/llamacpp/llamacpp_config.go | 2 +- .../backends/llamacpp/llamacpp_config_test.go | 2 + pkg/inference/scheduling/scheduler.go | 68 ++++++ pkg/metrics/aggregated_handler.go | 199 ++++++++++++++++++ pkg/metrics/label_ordering_test.go | 88 ++++++++ pkg/metrics/prometheus_parser.go | 155 ++++++++++++++ pkg/metrics/prometheus_parser_test.go | 177 ++++++++++++++++ pkg/metrics/scheduler_proxy.go | 107 ++++++++++ 11 files changed, 915 insertions(+), 1 deletion(-) create mode 100644 METRICS.md create mode 100644 pkg/metrics/aggregated_handler.go create mode 100644 pkg/metrics/label_ordering_test.go create mode 100644 pkg/metrics/prometheus_parser.go create mode 100644 pkg/metrics/prometheus_parser_test.go create mode 100644 pkg/metrics/scheduler_proxy.go diff --git a/METRICS.md b/METRICS.md new file mode 100644 index 000000000..ab64ea635 --- /dev/null +++ b/METRICS.md @@ -0,0 +1,84 @@ +# Aggregated Metrics Endpoint + +The model-runner now exposes an aggregated `/metrics` endpoint that collects and labels metrics from all active llama.cpp runners. + +## Overview + +When llama.cpp models are running, each server automatically exposes Prometheus-compatible metrics at its `/metrics` endpoint. The model-runner now aggregates these metrics from all active runners, adds identifying labels, and serves them through a unified `/metrics` endpoint. This provides a comprehensive view of all running models with proper Prometheus labeling. + +## Aggregated Metrics Format + +Instead of exposing metrics from a single runner, the endpoint now aggregates metrics from all active runners and adds labels to identify the source: + +### Example Output + +```prometheus +# HELP llama_prompt_tokens_total Total number of prompt tokens processed +# TYPE llama_prompt_tokens_total counter +llama_prompt_tokens_total{backend="llama.cpp",model="llama3.2:latest",mode="completion"} 4934 +llama_prompt_tokens_total{backend="llama.cpp",model="ai/mxbai-embed-large:335M-F16",mode="embedding"} 4525 + +# HELP llama_generation_tokens_total Total number of tokens generated +# TYPE llama_generation_tokens_total counter +llama_generation_tokens_total{backend="llama.cpp",model="llama3.2:latest",mode="completion"} 2156 + +# HELP llama_requests_total Total number of requests processed +# TYPE llama_requests_total counter +llama_requests_total{backend="llama.cpp",model="llama3.2:latest",mode="completion"} 127 +llama_requests_total{backend="llama.cpp",model="ai/mxbai-embed-large:335M-F16",mode="embedding"} 89 +``` + +### Labels Added + +Each metric is automatically labeled with: +- **`backend`**: The inference backend (e.g., "llama.cpp") +- **`model`**: The model name (e.g., "llama3.2:latest") +- **`mode`**: The operation mode ("completion" or "embedding") + +## Usage + +### Enabling Metrics (Default) + +By default, the aggregated metrics endpoint is enabled. When the model-runner starts with active runners, you can access metrics at: + +``` +GET /metrics +``` + +### Disabling Metrics + +To disable the metrics endpoint, set the `DISABLE_METRICS` environment variable: + +```bash +export DISABLE_METRICS=1 +``` + +### TCP Port Access + +If you're running the model-runner with a TCP port (using `MODEL_RUNNER_PORT`), you can access metrics via HTTP: + +```bash +# If MODEL_RUNNER_PORT=8080 +curl http://localhost:8080/metrics +``` + +### Unix Socket Access + +If using Unix sockets (default), you'll need to use a tool that supports Unix socket HTTP requests: + +```bash +# Using curl with Unix socket +curl --unix-socket model-runner.sock http://localhost/metrics +``` + +## Metrics Available + +The aggregated endpoint exposes all metrics from active llama.cpp runners, typically including: + +- **Request metrics**: Total requests, request duration, queue statistics +- **Token metrics**: Prompt tokens, generation tokens, tokens per second +- **Memory metrics**: Memory usage, cache statistics +- **Model metrics**: Model loading status, context usage +- **Performance metrics**: Processing latency, throughput + +All metrics retain their original names and types but gain the additional identifying labels. diff --git a/README.md b/README.md index dcfa54ba8..7de6e1702 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,9 @@ curl http://localhost:8080/engines/llama.cpp/v1/chat/completions -X POST -d '{ # Delete a model curl http://localhost:8080/models/ai/smollm2 -X DELETE + +# Get metrics +curl http://localhost:8080/metrics ``` The response will contain the model's reply: @@ -122,3 +125,22 @@ The response will contain the model's reply: } } ``` + +## Metrics + +The Model Runner exposes [the metrics endpoint](https://github.com/ggml-org/llama.cpp/tree/master/tools/server#get-metrics-prometheus-compatible-metrics-exporter) of llama.cpp server at the `/metrics` endpoint. This allows you to monitor model performance, request statistics, and resource usage. + +### Accessing Metrics + +```sh +# Get metrics in Prometheus format +curl http://localhost:8080/metrics +``` + +### Configuration + +- **Enable metrics (default)**: Metrics are enabled by default +- **Disable metrics**: Set `DISABLE_METRICS=1` environment variable +- **Monitoring integration**: Add the endpoint to your Prometheus configuration + +Check [METRICS.md](./METRICS.md) for more details. diff --git a/main.go b/main.go index 35a165ff5..ccf730e8d 100644 --- a/main.go +++ b/main.go @@ -112,6 +112,18 @@ func main() { router.Handle(route, scheduler) } + // Add metrics endpoint if enabled + if os.Getenv("DISABLE_METRICS") != "1" { + metricsHandler := metrics.NewAggregatedMetricsHandler( + log.WithField("component", "metrics"), + scheduler, + ) + router.Handle("/metrics", metricsHandler) + log.Info("Metrics endpoint enabled at /metrics") + } else { + log.Info("Metrics endpoint disabled") + } + server := &http.Server{Handler: router} serverErrors := make(chan error, 1) diff --git a/pkg/inference/backends/llamacpp/llamacpp_config.go b/pkg/inference/backends/llamacpp/llamacpp_config.go index 767cbb17c..c7990ce97 100644 --- a/pkg/inference/backends/llamacpp/llamacpp_config.go +++ b/pkg/inference/backends/llamacpp/llamacpp_config.go @@ -15,7 +15,7 @@ type Config struct { // NewDefaultLlamaCppConfig creates a new LlamaCppConfig with default values. func NewDefaultLlamaCppConfig() *Config { - args := append([]string{"--jinja", "-ngl", "100"}) + args := append([]string{"--jinja", "-ngl", "100", "--metrics"}) // Special case for Windows ARM64 if runtime.GOOS == "windows" && runtime.GOARCH == "arm64" { diff --git a/pkg/inference/backends/llamacpp/llamacpp_config_test.go b/pkg/inference/backends/llamacpp/llamacpp_config_test.go index c427a4e56..d1b937b28 100644 --- a/pkg/inference/backends/llamacpp/llamacpp_config_test.go +++ b/pkg/inference/backends/llamacpp/llamacpp_config_test.go @@ -81,6 +81,7 @@ func TestGetArgs(t *testing.T) { expected: []string{ "--jinja", "-ngl", "100", + "--metrics", "--model", modelPath, "--host", socket, }, @@ -91,6 +92,7 @@ func TestGetArgs(t *testing.T) { expected: []string{ "--jinja", "-ngl", "100", + "--metrics", "--model", modelPath, "--host", socket, "--embeddings", diff --git a/pkg/inference/scheduling/scheduler.go b/pkg/inference/scheduling/scheduler.go index 40e536ed6..42fb70e7c 100644 --- a/pkg/inference/scheduling/scheduler.go +++ b/pkg/inference/scheduling/scheduler.go @@ -347,6 +347,74 @@ func (s *Scheduler) Unload(w http.ResponseWriter, r *http.Request) { } } +// GetAllActiveRunners returns information about all active runners +func (s *Scheduler) GetAllActiveRunners() []metrics.ActiveRunner { + runningBackends := s.getLoaderStatus(context.Background()) + var activeRunners []metrics.ActiveRunner + + for _, backend := range runningBackends { + // Find the runner slot for this backend/model combination + key := runnerKey{ + backend: backend.BackendName, + model: backend.ModelName, + mode: parseBackendMode(backend.Mode), + } + + if slot, exists := s.loader.runners[key]; exists { + socket, err := RunnerSocketPath(slot) + if err != nil { + s.log.Warnf("Failed to get socket path for runner %s/%s: %v", backend.BackendName, backend.ModelName, err) + continue + } + + activeRunners = append(activeRunners, metrics.ActiveRunner{ + BackendName: backend.BackendName, + ModelName: backend.ModelName, + Mode: backend.Mode, + Socket: socket, + }) + } + } + + return activeRunners +} + +// GetLlamaCppSocket returns the Unix socket path for an active llama.cpp runner +func (s *Scheduler) GetLlamaCppSocket() (string, error) { + runningBackends := s.getLoaderStatus(context.Background()) + + // Look for an active llama.cpp backend + for _, backend := range runningBackends { + if backend.BackendName == "llama.cpp" { + // Find the runner slot for this backend/model combination + key := runnerKey{ + backend: backend.BackendName, + model: backend.ModelName, + mode: parseBackendMode(backend.Mode), + } + + if slot, exists := s.loader.runners[key]; exists { + // Use the RunnerSocketPath function to get the socket path + return RunnerSocketPath(slot) + } + } + } + + return "", fmt.Errorf("no active llama.cpp backend found") +} + +// parseBackendMode converts a string mode to BackendMode +func parseBackendMode(mode string) inference.BackendMode { + switch mode { + case "completion": + return inference.BackendModeCompletion + case "embedding": + return inference.BackendModeEmbedding + default: + return inference.BackendModeCompletion + } +} + // ServeHTTP implements net/http.Handler.ServeHTTP. func (s *Scheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.lock.Lock() diff --git a/pkg/metrics/aggregated_handler.go b/pkg/metrics/aggregated_handler.go new file mode 100644 index 000000000..b4be0ec2d --- /dev/null +++ b/pkg/metrics/aggregated_handler.go @@ -0,0 +1,199 @@ +package metrics + +import ( + "context" + "fmt" + "io" + "net" + "net/http" + "sort" + "sync" + "time" + + "github.com/docker/model-runner/pkg/logging" +) + +// AggregatedMetricsHandler collects metrics from all active runners and aggregates them with labels +type AggregatedMetricsHandler struct { + log logging.Logger + scheduler SchedulerInterface + parser *PrometheusParser +} + +// NewAggregatedMetricsHandler creates a new aggregated metrics handler +func NewAggregatedMetricsHandler(log logging.Logger, scheduler SchedulerInterface) *AggregatedMetricsHandler { + return &AggregatedMetricsHandler{ + log: log, + scheduler: scheduler, + parser: NewPrometheusParser(), + } +} + +// ServeHTTP implements http.Handler for aggregated metrics +func (h *AggregatedMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + runners := h.scheduler.GetAllActiveRunners() + if len(runners) == 0 { + w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, "# No active runners\n") + return + } + + // Collect metrics from all runners concurrently + allMetrics, helpMap, typeMap := h.collectMetricsFromRunners(r.Context(), runners) + + // Generate aggregated response + h.writeAggregatedMetrics(w, allMetrics, helpMap, typeMap) +} + +// collectMetricsFromRunners fetches metrics from all runners concurrently +func (h *AggregatedMetricsHandler) collectMetricsFromRunners(ctx context.Context, runners []ActiveRunner) ([]PrometheusMetric, map[string]string, map[string]string) { + var wg sync.WaitGroup + var mu sync.Mutex + var allMetrics []PrometheusMetric + helpMap := make(map[string]string) + typeMap := make(map[string]string) + + for _, runner := range runners { + wg.Add(1) + go func(runner ActiveRunner) { + defer wg.Done() + + metrics, help, types, err := h.fetchRunnerMetrics(ctx, runner) + if err != nil { + h.log.Warnf("Failed to fetch metrics from runner %s/%s: %v", runner.BackendName, runner.ModelName, err) + return + } + + mu.Lock() + allMetrics = append(allMetrics, metrics...) + // Merge help and type maps + for k, v := range help { + helpMap[k] = v + } + for k, v := range types { + typeMap[k] = v + } + mu.Unlock() + }(runner) + } + + wg.Wait() + return allMetrics, helpMap, typeMap +} + +// fetchRunnerMetrics fetches metrics from a single runner +func (h *AggregatedMetricsHandler) fetchRunnerMetrics(ctx context.Context, runner ActiveRunner) ([]PrometheusMetric, map[string]string, map[string]string, error) { + // Create HTTP client for Unix socket communication + client := &http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + return net.DialTimeout("unix", runner.Socket, 5*time.Second) + }, + }, + Timeout: 10 * time.Second, + } + + // Create request to the runner's metrics endpoint + req, err := http.NewRequestWithContext(ctx, "GET", "http://unix/metrics", nil) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to create metrics request: %w", err) + } + + // Make the request + resp, err := client.Do(req) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to fetch metrics: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, nil, nil, fmt.Errorf("metrics endpoint returned status %d", resp.StatusCode) + } + + // Read response body + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to read metrics response: %w", err) + } + + // Parse metrics + metrics, err := h.parser.ParseMetrics(string(body)) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to parse metrics: %w", err) + } + + // Add labels to each metric + labels := map[string]string{ + "backend": runner.BackendName, + "model": runner.ModelName, + "mode": runner.Mode, + } + + for i := range metrics { + metrics[i].AddLabels(labels) + } + + // Extract help and type information + helpMap := make(map[string]string) + typeMap := make(map[string]string) + for _, metric := range metrics { + if metric.Help != "" { + helpMap[metric.Name] = metric.Help + } + if metric.Type != "" { + typeMap[metric.Name] = metric.Type + } + } + + return metrics, helpMap, typeMap, nil +} + +// writeAggregatedMetrics writes the aggregated metrics response +func (h *AggregatedMetricsHandler) writeAggregatedMetrics(w http.ResponseWriter, metrics []PrometheusMetric, helpMap map[string]string, typeMap map[string]string) { + w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") + w.WriteHeader(http.StatusOK) + + // Group metrics by name for better organization + metricGroups := make(map[string][]PrometheusMetric) + for _, metric := range metrics { + metricGroups[metric.Name] = append(metricGroups[metric.Name], metric) + } + + // Sort metric names for consistent output + var metricNames []string + for name := range metricGroups { + metricNames = append(metricNames, name) + } + sort.Strings(metricNames) + + // Write metrics grouped by name + for _, name := range metricNames { + group := metricGroups[name] + + // Write HELP comment if available + if help, exists := helpMap[name]; exists { + fmt.Fprintf(w, "# HELP %s %s\n", name, help) + } + + // Write TYPE comment if available + if metricType, exists := typeMap[name]; exists { + fmt.Fprintf(w, "# TYPE %s %s\n", name, metricType) + } + + // Write all metrics with this name + for _, metric := range group { + fmt.Fprintf(w, "%s\n", metric.FormatMetric()) + } + + // Add blank line between metric groups for readability + fmt.Fprintf(w, "\n") + } + + h.log.Debugf("Successfully served aggregated metrics for %d metric groups", len(metricGroups)) +} diff --git a/pkg/metrics/label_ordering_test.go b/pkg/metrics/label_ordering_test.go new file mode 100644 index 000000000..5a832b520 --- /dev/null +++ b/pkg/metrics/label_ordering_test.go @@ -0,0 +1,88 @@ +package metrics + +import ( + "testing" +) + +func TestConsistentLabelOrdering(t *testing.T) { + // Create a metric with labels in different orders + metric := PrometheusMetric{ + Name: "test_metric", + Labels: map[string]string{ + "model": "ai/llama3.2", + "mode": "completion", + "backend": "llama.cpp", + }, + Value: "123", + } + + // Format the metric multiple times + results := make([]string, 10) + for i := 0; i < 10; i++ { + results[i] = metric.FormatMetric() + } + + // All results should be identical + expected := `test_metric{backend="llama.cpp",mode="completion",model="ai/llama3.2"} 123` + for i, result := range results { + if result != expected { + t.Errorf("Iteration %d: Expected '%s', got '%s'", i, expected, result) + } + } + + // Verify all results are the same + for i := 1; i < len(results); i++ { + if results[i] != results[0] { + t.Errorf("Inconsistent ordering: result[0]='%s', result[%d]='%s'", results[0], i, results[i]) + } + } +} + +func TestLabelOrderingWithDifferentKeys(t *testing.T) { + tests := []struct { + name string + labels map[string]string + expected string + }{ + { + name: "backend_model_mode", + labels: map[string]string{ + "backend": "llama.cpp", + "model": "ai/llama3.2", + "mode": "completion", + }, + expected: `test{backend="llama.cpp",mode="completion",model="ai/llama3.2"} 42`, + }, + { + name: "alphabetical_order", + labels: map[string]string{ + "z_last": "last", + "a_first": "first", + "m_mid": "middle", + }, + expected: `test{a_first="first",m_mid="middle",z_last="last"} 42`, + }, + { + name: "single_label", + labels: map[string]string{ + "single": "value", + }, + expected: `test{single="value"} 42`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metric := PrometheusMetric{ + Name: "test", + Labels: tt.labels, + Value: "42", + } + + result := metric.FormatMetric() + if result != tt.expected { + t.Errorf("Expected '%s', got '%s'", tt.expected, result) + } + }) + } +} diff --git a/pkg/metrics/prometheus_parser.go b/pkg/metrics/prometheus_parser.go new file mode 100644 index 000000000..4ec476b5b --- /dev/null +++ b/pkg/metrics/prometheus_parser.go @@ -0,0 +1,155 @@ +package metrics + +import ( + "bufio" + "fmt" + "regexp" + "sort" + "strings" +) + +// PrometheusMetric represents a single Prometheus metric +type PrometheusMetric struct { + Name string + Labels map[string]string + Value string + Help string + Type string +} + +// PrometheusParser parses Prometheus text format metrics +type PrometheusParser struct { + commentRegex *regexp.Regexp + metricRegex *regexp.Regexp +} + +// NewPrometheusParser creates a new Prometheus metrics parser +func NewPrometheusParser() *PrometheusParser { + return &PrometheusParser{ + // Matches # HELP and # TYPE comments + commentRegex: regexp.MustCompile(`^#\s+(HELP|TYPE)\s+(\S+)\s+(.*)$`), + // Matches metric lines with optional labels + metricRegex: regexp.MustCompile(`^([a-zA-Z_:][a-zA-Z0-9_:]*?)(\{[^}]*})?\s+(\S+)(\s+\d+)?$`), + } +} + +// ParseMetrics parses Prometheus text format and returns structured metrics +func (p *PrometheusParser) ParseMetrics(content string) ([]PrometheusMetric, error) { + var metrics []PrometheusMetric + helpMap := make(map[string]string) + typeMap := make(map[string]string) + + scanner := bufio.NewScanner(strings.NewReader(content)) + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + // Skip empty lines + if line == "" { + continue + } + + // Handle comments (HELP and TYPE) + if strings.HasPrefix(line, "#") { + matches := p.commentRegex.FindStringSubmatch(line) + if len(matches) == 4 { + directive := matches[1] + metricName := matches[2] + content := matches[3] + + switch directive { + case "HELP": + helpMap[metricName] = content + case "TYPE": + typeMap[metricName] = content + } + } + continue + } + + // Parse metric lines + matches := p.metricRegex.FindStringSubmatch(line) + if len(matches) >= 4 { + metricName := matches[1] + labelsStr := matches[2] + value := matches[3] + + // Parse labels if present + labels := make(map[string]string) + if labelsStr != "" { + // Remove surrounding braces + labelsStr = strings.Trim(labelsStr, "{}") + if labelsStr != "" { + parsedLabels, err := p.parseLabels(labelsStr) + if err != nil { + continue // Skip malformed metrics + } + labels = parsedLabels + } + } + + metric := PrometheusMetric{ + Name: metricName, + Labels: labels, + Value: value, + Help: helpMap[metricName], + Type: typeMap[metricName], + } + + metrics = append(metrics, metric) + } + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("error scanning metrics: %w", err) + } + + return metrics, nil +} + +// parseLabels parses label string like 'key1="value1",key2="value2"' +func (p *PrometheusParser) parseLabels(labelsStr string) (map[string]string, error) { + labels := make(map[string]string) + // Split by comma, then key="value" + re := regexp.MustCompile(`(\w+)="([^"]*)"`) + + matches := re.FindAllStringSubmatch(labelsStr, -1) + for _, match := range matches { + if len(match) == 3 { + labels[match[1]] = match[2] + } + } + return labels, nil +} + +// AddLabels adds additional labels to a metric +func (m *PrometheusMetric) AddLabels(additionalLabels map[string]string) { + if m.Labels == nil { + m.Labels = make(map[string]string) + } + + for key, value := range additionalLabels { + m.Labels[key] = value + } +} + +// FormatMetric formats a metric back to Prometheus text format +func (m *PrometheusMetric) FormatMetric() string { + if len(m.Labels) == 0 { + return fmt.Sprintf("%s %s", m.Name, m.Value) + } + + // Sort label keys to ensure consistent output order + var keys []string + for key := range m.Labels { + keys = append(keys, key) + } + sort.Strings(keys) + + var labelPairs []string + for _, key := range keys { + labelPairs = append(labelPairs, fmt.Sprintf(`%s="%s"`, key, m.Labels[key])) + } + + return fmt.Sprintf("%s{%s} %s", m.Name, strings.Join(labelPairs, ","), m.Value) +} diff --git a/pkg/metrics/prometheus_parser_test.go b/pkg/metrics/prometheus_parser_test.go new file mode 100644 index 000000000..6094df465 --- /dev/null +++ b/pkg/metrics/prometheus_parser_test.go @@ -0,0 +1,177 @@ +package metrics + +import ( + "testing" +) + +func TestPrometheusParser_ParseMetrics(t *testing.T) { + parser := NewPrometheusParser() + + input := `# HELP http_requests_total Total number of HTTP requests +# TYPE http_requests_total counter +http_requests_total{method="get",code="200"} 1027 +http_requests_total{method="post",code="200"} 3 +http_requests_total{method="post",code="400"} 3 + +# HELP memory_usage_bytes Memory usage in bytes +# TYPE memory_usage_bytes gauge +memory_usage_bytes 1234567890 + +# Simple metric without labels +simple_metric 42 +` + + metrics, err := parser.ParseMetrics(input) + if err != nil { + t.Fatalf("Failed to parse metrics: %v", err) + } + + if len(metrics) != 5 { + t.Errorf("Expected 5 metrics, got %d", len(metrics)) + } + + // Test first metric with labels + metric := metrics[0] + if metric.Name != "http_requests_total" { + t.Errorf("Expected name 'http_requests_total', got '%s'", metric.Name) + } + if metric.Value != "1027" { + t.Errorf("Expected value '1027', got '%s'", metric.Value) + } + if metric.Help != "Total number of HTTP requests" { + t.Errorf("Expected help 'Total number of HTTP requests', got '%s'", metric.Help) + } + if metric.Type != "counter" { + t.Errorf("Expected type 'counter', got '%s'", metric.Type) + } + if len(metric.Labels) != 2 { + t.Errorf("Expected 2 labels, got %d", len(metric.Labels)) + } + if metric.Labels["method"] != "get" { + t.Errorf("Expected method='get', got '%s'", metric.Labels["method"]) + } + if metric.Labels["code"] != "200" { + t.Errorf("Expected code='200', got '%s'", metric.Labels["code"]) + } + + // Test simple metric without labels + simpleMetric := metrics[4] + if simpleMetric.Name != "simple_metric" { + t.Errorf("Expected name 'simple_metric', got '%s'", simpleMetric.Name) + } + if simpleMetric.Value != "42" { + t.Errorf("Expected value '42', got '%s'", simpleMetric.Value) + } + if len(simpleMetric.Labels) != 0 { + t.Errorf("Expected 0 labels, got %d", len(simpleMetric.Labels)) + } +} + +func TestPrometheusMetric_AddLabels(t *testing.T) { + metric := PrometheusMetric{ + Name: "test_metric", + Labels: map[string]string{"existing": "value"}, + Value: "123", + } + + additionalLabels := map[string]string{ + "backend": "llama.cpp", + "model": "test-model", + } + + metric.AddLabels(additionalLabels) + + if len(metric.Labels) != 3 { + t.Errorf("Expected 3 labels, got %d", len(metric.Labels)) + } + + if metric.Labels["existing"] != "value" { + t.Errorf("Expected existing='value', got '%s'", metric.Labels["existing"]) + } + if metric.Labels["backend"] != "llama.cpp" { + t.Errorf("Expected backend='llama.cpp', got '%s'", metric.Labels["backend"]) + } + if metric.Labels["model"] != "test-model" { + t.Errorf("Expected model='test-model', got '%s'", metric.Labels["model"]) + } +} + +func TestPrometheusMetric_FormatMetric(t *testing.T) { + // Test metric without labels + metric1 := PrometheusMetric{ + Name: "simple_metric", + Value: "42", + } + + expected1 := "simple_metric 42" + result1 := metric1.FormatMetric() + if result1 != expected1 { + t.Errorf("Expected '%s', got '%s'", expected1, result1) + } + + // Test metric with labels + metric2 := PrometheusMetric{ + Name: "labeled_metric", + Labels: map[string]string{ + "backend": "llama.cpp", + "model": "test-model", + }, + Value: "123", + } + + result2 := metric2.FormatMetric() + // With sorted keys, the order should always be: backend, model (alphabetical) + expected2 := `labeled_metric{backend="llama.cpp",model="test-model"} 123` + + if result2 != expected2 { + t.Errorf("Expected '%s', got '%s'", expected2, result2) + } +} + +func TestPrometheusParser_parseLabels(t *testing.T) { + parser := NewPrometheusParser() + + tests := []struct { + input string + expected map[string]string + }{ + { + input: `method="get",code="200"`, + expected: map[string]string{ + "method": "get", + "code": "200", + }, + }, + { + input: `single="value"`, + expected: map[string]string{ + "single": "value", + }, + }, + { + input: ``, + expected: map[string]string{}, + }, + } + + for _, test := range tests { + result, err := parser.parseLabels(test.input) + if err != nil { + t.Errorf("Failed to parse labels '%s': %v", test.input, err) + continue + } + + if len(result) != len(test.expected) { + t.Errorf("For input '%s', expected %d labels, got %d", test.input, len(test.expected), len(result)) + continue + } + + for key, expectedValue := range test.expected { + if actualValue, exists := result[key]; !exists { + t.Errorf("For input '%s', missing expected key '%s'", test.input, key) + } else if actualValue != expectedValue { + t.Errorf("For input '%s', key '%s': expected '%s', got '%s'", test.input, key, expectedValue, actualValue) + } + } + } +} diff --git a/pkg/metrics/scheduler_proxy.go b/pkg/metrics/scheduler_proxy.go new file mode 100644 index 000000000..f02ffd2bc --- /dev/null +++ b/pkg/metrics/scheduler_proxy.go @@ -0,0 +1,107 @@ +package metrics + +import ( + "io" + "net" + "net/http" + "time" + + "github.com/docker/model-runner/pkg/logging" +) + +// SchedulerMetricsHandler handles metrics requests by finding active llama.cpp runners +type SchedulerMetricsHandler struct { + log logging.Logger + scheduler SchedulerInterface +} + +// SchedulerInterface defines the methods we need from the scheduler +type SchedulerInterface interface { + GetRunningBackends(w http.ResponseWriter, r *http.Request) + GetLlamaCppSocket() (string, error) + GetAllActiveRunners() []ActiveRunner +} + +// ActiveRunner contains information about an active runner +type ActiveRunner struct { + BackendName string + ModelName string + Mode string + Socket string +} + +// NewSchedulerMetricsHandler creates a new scheduler-based metrics handler +func NewSchedulerMetricsHandler(log logging.Logger, scheduler SchedulerInterface) *SchedulerMetricsHandler { + return &SchedulerMetricsHandler{ + log: log, + scheduler: scheduler, + } +} + +// ServeHTTP implements http.Handler for metrics proxying via scheduler +func (h *SchedulerMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + // Get the socket path for the active llama.cpp runner + socket, err := h.scheduler.GetLlamaCppSocket() + if err != nil { + h.log.Errorf("Failed to get llama.cpp socket: %v", err) + http.Error(w, "Metrics endpoint not available", http.StatusServiceUnavailable) + return + } + + // Create HTTP client for Unix socket communication + client := &http.Client{ + Transport: &http.Transport{ + Dial: func(network, addr string) (net.Conn, error) { + return net.DialTimeout("unix", socket, 5*time.Second) + }, + }, + Timeout: 10 * time.Second, + } + + // Create request to the backend metrics endpoint + req, err := http.NewRequestWithContext(r.Context(), "GET", "http://unix/metrics", nil) + if err != nil { + h.log.Errorf("Failed to create metrics request: %v", err) + http.Error(w, "Failed to create metrics request", http.StatusInternalServerError) + return + } + + // Forward relevant headers + for key, values := range r.Header { + for _, value := range values { + req.Header.Add(key, value) + } + } + + // Make the request to the backend + resp, err := client.Do(req) + if err != nil { + h.log.Errorf("Failed to fetch metrics from backend: %v", err) + http.Error(w, "Backend metrics unavailable", http.StatusServiceUnavailable) + return + } + defer resp.Body.Close() + + // Copy response headers + for key, values := range resp.Header { + for _, value := range values { + w.Header().Add(key, value) + } + } + + // Set status code + w.WriteHeader(resp.StatusCode) + + // Copy response body + if _, err := io.Copy(w, resp.Body); err != nil { + h.log.Errorf("Failed to copy metrics response: %v", err) + return + } + + h.log.Debugf("Successfully proxied metrics request") +} From dac689435a3ed42849dee9d45f27a60ed409d3a8 Mon Sep 17 00:00:00 2001 From: ilopezluna Date: Fri, 13 Jun 2025 10:56:27 +0200 Subject: [PATCH 2/7] Remove NewSchedulerMetricsHandler, not used --- pkg/metrics/scheduler_proxy.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pkg/metrics/scheduler_proxy.go b/pkg/metrics/scheduler_proxy.go index f02ffd2bc..d462d8430 100644 --- a/pkg/metrics/scheduler_proxy.go +++ b/pkg/metrics/scheduler_proxy.go @@ -30,14 +30,6 @@ type ActiveRunner struct { Socket string } -// NewSchedulerMetricsHandler creates a new scheduler-based metrics handler -func NewSchedulerMetricsHandler(log logging.Logger, scheduler SchedulerInterface) *SchedulerMetricsHandler { - return &SchedulerMetricsHandler{ - log: log, - scheduler: scheduler, - } -} - // ServeHTTP implements http.Handler for metrics proxying via scheduler func (h *SchedulerMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { From d8e2620afe2ca2c172657594167ea48121085065 Mon Sep 17 00:00:00 2001 From: ilopezluna Date: Fri, 13 Jun 2025 12:13:43 +0200 Subject: [PATCH 3/7] replace custom parser with official Prometheus libraries - Remove custom prometheus_metrics.go - Use expfmt.TextParser for parsing and expfmt.NewEncoder for output --- go.mod | 26 ++-- go.sum | 66 ++++++---- pkg/metrics/aggregated_handler.go | 150 +++++++++------------- pkg/metrics/label_ordering_test.go | 88 ------------- pkg/metrics/prometheus_parser.go | 155 ---------------------- pkg/metrics/prometheus_parser_test.go | 177 -------------------------- 6 files changed, 118 insertions(+), 544 deletions(-) delete mode 100644 pkg/metrics/label_ordering_test.go delete mode 100644 pkg/metrics/prometheus_parser.go delete mode 100644 pkg/metrics/prometheus_parser_test.go diff --git a/go.mod b/go.mod index 91ce38df1..1dfe2b3cc 100644 --- a/go.mod +++ b/go.mod @@ -10,9 +10,12 @@ require ( github.com/jaypipes/ghw v0.16.0 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.1 + github.com/prometheus/client_model v0.6.2 + github.com/prometheus/common v0.64.0 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.10.0 - golang.org/x/sync v0.12.0 + golang.org/x/sync v0.14.0 + google.golang.org/protobuf v1.36.6 ) require ( @@ -21,7 +24,7 @@ require ( github.com/containerd/errdefs v1.0.0 // indirect github.com/containerd/log v0.1.0 // indirect github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/distribution/reference v0.6.0 // indirect github.com/docker/cli v27.5.0+incompatible // indirect github.com/docker/distribution v2.8.3+incompatible // indirect @@ -34,27 +37,30 @@ require ( github.com/henvic/httpretty v0.1.4 // indirect github.com/jaypipes/pcidb v1.0.1 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.11 // indirect + github.com/klauspost/compress v1.18.0 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/moby/locker v1.0.1 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect github.com/vbatts/tar-split v0.11.6 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 // indirect go.opentelemetry.io/otel v1.35.0 // indirect go.opentelemetry.io/otel/metric v1.35.0 // indirect go.opentelemetry.io/otel/trace v1.35.0 // indirect - golang.org/x/crypto v0.35.0 // indirect - golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect - golang.org/x/mod v0.22.0 // indirect - golang.org/x/sys v0.31.0 // indirect - golang.org/x/tools v0.29.0 // indirect + golang.org/x/crypto v0.37.0 // indirect + golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 // indirect + golang.org/x/mod v0.24.0 // indirect + golang.org/x/sys v0.33.0 // indirect + golang.org/x/tools v0.32.0 // indirect gonum.org/v1/gonum v0.15.1 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250414145226-207652e42e2e // indirect + google.golang.org/grpc v1.72.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect howett.net/plist v1.0.0 // indirect ) diff --git a/go.sum b/go.sum index 2333131bf..bb62a23d8 100644 --- a/go.sum +++ b/go.sum @@ -27,8 +27,9 @@ github.com/containerd/stargz-snapshotter/estargz v0.16.3/go.mod h1:uyr4BfYfOj3G9 github.com/containerd/typeurl/v2 v2.2.3 h1:yNA/94zxWdvYACdYO8zofhrTVuQY73fFU1y++dYSw40= github.com/containerd/typeurl/v2 v2.2.3/go.mod h1:95ljDnPfD3bAbDJRugOiShd/DlAAsxGtUBhJxIn7SCk= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= github.com/docker/cli v27.5.0+incompatible h1:aMphQkcGtpHixwwhAXJT1rrK/detk2JIvDaFkLctbGM= @@ -58,6 +59,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/go-containerregistry v0.20.3 h1:oNx7IdTI936V8CQRveCjaxOiegWwvM7kqkbXTpyiovI= github.com/google/go-containerregistry v0.20.3/go.mod h1:w00pIgBRDVUDFM6bq+Qx8lwNWK+cxgCuX1vd3PIBDNI= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gpustack/gguf-parser-go v0.14.1 h1:tmz2eTnSEFfE52V10FESqo9oAUquZ6JKQFntWC/wrEg= github.com/gpustack/gguf-parser-go v0.14.1/go.mod h1:GvHh1Kvvq5ojCOsJ5UpwiJJmIjFw3Qk5cW7R+CZ3IJo= github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU= @@ -69,8 +72,8 @@ github.com/jaypipes/pcidb v1.0.1/go.mod h1:6xYUz/yYEyOkIkUt2t2J2folIuZ4Yg6uByCGF github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= -github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -88,14 +91,21 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.64.0 h1:pdZeA+g617P7oGv1CzdTzyeShxAGrTBsolKNOLQPGO4= +github.com/prometheus/common v0.64.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 h1:18kd+8ZUlt/ARXhljq+14TwAoKa61q6dX8jtwOf6DH8= @@ -115,39 +125,43 @@ go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0 h1:yd02MEjBdJkG3uabWP9apV+OuWRIXGDuJEUJbOHmCFU= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.58.0/go.mod h1:umTcuxiv1n/s/S6/c2AT/g2CQ7u5C59sHDNmfSwgz7Q= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ= go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y= go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M= go.opentelemetry.io/otel/metric v1.35.0/go.mod h1:nKVFgxBZ2fReX6IlyW28MgZojkoAkJGaE8CpgeAU3oE= +go.opentelemetry.io/otel/sdk v1.35.0 h1:iPctf8iprVySXSKJffSS79eOjl9pvxV9ZqOWT0QejKY= +go.opentelemetry.io/otel/sdk v1.35.0/go.mod h1:+ga1bZliga3DxJ3CQGg3updiaAJoNECOgJREo9KHGQg= +go.opentelemetry.io/otel/sdk/metric v1.35.0 h1:1RriWBmCKgkeHEhM7a2uMjMUfP7MsOF5JpUCaEqEI9o= +go.opentelemetry.io/otel/sdk/metric v1.35.0/go.mod h1:is6XYCUMpcKi+ZsOvfluY5YstFnhW0BidkR+gL+qN+w= go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt/xgMs= go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc= -golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs= -golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= -golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= -golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/crypto v0.37.0 h1:kJNSjF/Xp7kU0iB2Z+9viTPMW4EqqsrywMXLJOOsXSE= +golang.org/x/crypto v0.37.0/go.mod h1:vg+k43peMZ0pUMhYmVAWysMK35e6ioLh3wB8ZCAfbVc= +golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8 h1:yqrTHse8TCMW1M1ZCP+VAR/l0kKxwaAIqN/il7x4voA= +golang.org/x/exp v0.0.0-20250106191152-7588d65b2ba8/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU= +golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU= +golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= -golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= +golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= -golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= -golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= -golang.org/x/tools v0.29.0 h1:Xx0h3TtM9rzQpQuR4dKLrdglAmCEN5Oi+P74JdhdzXE= -golang.org/x/tools v0.29.0/go.mod h1:KMQVMRsVxU6nHCFXrBPhDB8XncLNLM0lIy/F14RP588= +golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= +golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/term v0.31.0 h1:erwDkOK1Msy6offm1mOgvspSkslFnIGsFnxOKoufg3o= +golang.org/x/term v0.31.0/go.mod h1:R4BeIy7D95HzImkxGkTW1UQTtP54tio2RyHz7PwK0aw= +golang.org/x/tools v0.32.0 h1:Q7N1vhpkQv7ybVzLFtTjvQya2ewbwNDZzUgfXGqtMWU= +golang.org/x/tools v0.32.0/go.mod h1:ZxrU41P/wAbZD8EDa6dDCa6XfpkhJ7HFMjHJXfBDu8s= gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= -google.golang.org/grpc v1.68.1 h1:oI5oTa11+ng8r8XMMN7jAOmWfPZWbYpCFaMUTACxkM0= -google.golang.org/grpc v1.68.1/go.mod h1:+q1XYFJjShcqn0QZHvCyeR4CXPA+llXIeUIfIe00waw= -google.golang.org/protobuf v1.36.3 h1:82DV7MYdb8anAVi3qge1wSnMDrnKK7ebr+I0hHRN1BU= -google.golang.org/protobuf v1.36.3/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250414145226-207652e42e2e h1:ztQaXfzEXTmCBvbtWYRhJxW+0iJcz2qXfd38/e9l7bA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250414145226-207652e42e2e/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/grpc v1.72.0 h1:S7UkcVa60b5AAQTaO6ZKamFp1zMZSU0fGDK2WZLbBnM= +google.golang.org/grpc v1.72.0/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= +google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= +google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/pkg/metrics/aggregated_handler.go b/pkg/metrics/aggregated_handler.go index b4be0ec2d..ccebab6ed 100644 --- a/pkg/metrics/aggregated_handler.go +++ b/pkg/metrics/aggregated_handler.go @@ -6,10 +6,14 @@ import ( "io" "net" "net/http" - "sort" + "strings" "sync" "time" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + "google.golang.org/protobuf/proto" + "github.com/docker/model-runner/pkg/logging" ) @@ -17,7 +21,6 @@ import ( type AggregatedMetricsHandler struct { log logging.Logger scheduler SchedulerInterface - parser *PrometheusParser } // NewAggregatedMetricsHandler creates a new aggregated metrics handler @@ -25,7 +28,6 @@ func NewAggregatedMetricsHandler(log logging.Logger, scheduler SchedulerInterfac return &AggregatedMetricsHandler{ log: log, scheduler: scheduler, - parser: NewPrometheusParser(), } } @@ -44,51 +46,49 @@ func (h *AggregatedMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Requ return } - // Collect metrics from all runners concurrently - allMetrics, helpMap, typeMap := h.collectMetricsFromRunners(r.Context(), runners) + // Collect and aggregate metrics from all runners + allFamilies := h.collectAndAggregateMetrics(r.Context(), runners) - // Generate aggregated response - h.writeAggregatedMetrics(w, allMetrics, helpMap, typeMap) + // Write aggregated response using Prometheus encoder + h.writeAggregatedMetrics(w, allFamilies) } -// collectMetricsFromRunners fetches metrics from all runners concurrently -func (h *AggregatedMetricsHandler) collectMetricsFromRunners(ctx context.Context, runners []ActiveRunner) ([]PrometheusMetric, map[string]string, map[string]string) { +// collectAndAggregateMetrics fetches metrics from all runners and aggregates them +func (h *AggregatedMetricsHandler) collectAndAggregateMetrics(ctx context.Context, runners []ActiveRunner) map[string]*dto.MetricFamily { var wg sync.WaitGroup var mu sync.Mutex - var allMetrics []PrometheusMetric - helpMap := make(map[string]string) - typeMap := make(map[string]string) + allFamilies := make(map[string]*dto.MetricFamily) for _, runner := range runners { wg.Add(1) go func(runner ActiveRunner) { defer wg.Done() - metrics, help, types, err := h.fetchRunnerMetrics(ctx, runner) + families, err := h.fetchRunnerMetrics(ctx, runner) if err != nil { h.log.Warnf("Failed to fetch metrics from runner %s/%s: %v", runner.BackendName, runner.ModelName, err) return } - mu.Lock() - allMetrics = append(allMetrics, metrics...) - // Merge help and type maps - for k, v := range help { - helpMap[k] = v - } - for k, v := range types { - typeMap[k] = v + // Add labels to metrics and merge into allFamilies + labels := map[string]string{ + "backend": runner.BackendName, + "model": runner.ModelName, + "mode": runner.Mode, } + + mu.Lock() + h.addLabelsAndMerge(families, labels, allFamilies) mu.Unlock() }(runner) } wg.Wait() - return allMetrics, helpMap, typeMap + return allFamilies } -// fetchRunnerMetrics fetches metrics from a single runner -func (h *AggregatedMetricsHandler) fetchRunnerMetrics(ctx context.Context, runner ActiveRunner) ([]PrometheusMetric, map[string]string, map[string]string, error) { +// fetchRunnerMetrics fetches and parses metrics from a single runner +func (h *AggregatedMetricsHandler) fetchRunnerMetrics(ctx context.Context, runner ActiveRunner) (map[string]*dto.MetricFamily, error) { // Create HTTP client for Unix socket communication client := &http.Client{ Transport: &http.Transport{ @@ -102,98 +102,72 @@ func (h *AggregatedMetricsHandler) fetchRunnerMetrics(ctx context.Context, runne // Create request to the runner's metrics endpoint req, err := http.NewRequestWithContext(ctx, "GET", "http://unix/metrics", nil) if err != nil { - return nil, nil, nil, fmt.Errorf("failed to create metrics request: %w", err) + return nil, fmt.Errorf("failed to create metrics request: %w", err) } // Make the request resp, err := client.Do(req) if err != nil { - return nil, nil, nil, fmt.Errorf("failed to fetch metrics: %w", err) + return nil, fmt.Errorf("failed to fetch metrics: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, nil, nil, fmt.Errorf("metrics endpoint returned status %d", resp.StatusCode) + return nil, fmt.Errorf("metrics endpoint returned status %d", resp.StatusCode) } // Read response body body, err := io.ReadAll(resp.Body) if err != nil { - return nil, nil, nil, fmt.Errorf("failed to read metrics response: %w", err) + return nil, fmt.Errorf("failed to read metrics response: %w", err) } - // Parse metrics - metrics, err := h.parser.ParseMetrics(string(body)) + // Parse metrics using official Prometheus parser + parser := expfmt.TextParser{} + families, err := parser.TextToMetricFamilies(strings.NewReader(string(body))) if err != nil { - return nil, nil, nil, fmt.Errorf("failed to parse metrics: %w", err) - } - - // Add labels to each metric - labels := map[string]string{ - "backend": runner.BackendName, - "model": runner.ModelName, - "mode": runner.Mode, + return nil, fmt.Errorf("failed to parse metrics: %w", err) } - for i := range metrics { - metrics[i].AddLabels(labels) - } + return families, nil +} - // Extract help and type information - helpMap := make(map[string]string) - typeMap := make(map[string]string) - for _, metric := range metrics { - if metric.Help != "" { - helpMap[metric.Name] = metric.Help +// addLabelsAndMerge adds labels to metrics and merges them into the aggregated families +func (h *AggregatedMetricsHandler) addLabelsAndMerge(families map[string]*dto.MetricFamily, labels map[string]string, allFamilies map[string]*dto.MetricFamily) { + for name, family := range families { + // Add labels to each metric in the family + for _, metric := range family.GetMetric() { + // Add our labels to the existing label pairs + for key, value := range labels { + metric.Label = append(metric.Label, &dto.LabelPair{ + Name: proto.String(key), + Value: proto.String(value), + }) + } } - if metric.Type != "" { - typeMap[metric.Name] = metric.Type + + // Merge into allFamilies + if existingFamily, exists := allFamilies[name]; exists { + // Append metrics to existing family + existingFamily.Metric = append(existingFamily.Metric, family.GetMetric()...) + } else { + // Create new family + allFamilies[name] = family } } - - return metrics, helpMap, typeMap, nil } -// writeAggregatedMetrics writes the aggregated metrics response -func (h *AggregatedMetricsHandler) writeAggregatedMetrics(w http.ResponseWriter, metrics []PrometheusMetric, helpMap map[string]string, typeMap map[string]string) { +// writeAggregatedMetrics writes the aggregated metrics using Prometheus encoder +func (h *AggregatedMetricsHandler) writeAggregatedMetrics(w http.ResponseWriter, families map[string]*dto.MetricFamily) { w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8") w.WriteHeader(http.StatusOK) - // Group metrics by name for better organization - metricGroups := make(map[string][]PrometheusMetric) - for _, metric := range metrics { - metricGroups[metric.Name] = append(metricGroups[metric.Name], metric) - } - - // Sort metric names for consistent output - var metricNames []string - for name := range metricGroups { - metricNames = append(metricNames, name) - } - sort.Strings(metricNames) - - // Write metrics grouped by name - for _, name := range metricNames { - group := metricGroups[name] - - // Write HELP comment if available - if help, exists := helpMap[name]; exists { - fmt.Fprintf(w, "# HELP %s %s\n", name, help) + // Use Prometheus encoder to write metrics + encoder := expfmt.NewEncoder(w, expfmt.NewFormat(expfmt.TypeTextPlain)) + for _, family := range families { + if err := encoder.Encode(family); err != nil { + h.log.Errorf("Failed to encode metric family %s: %v", *family.Name, err) + continue } - - // Write TYPE comment if available - if metricType, exists := typeMap[name]; exists { - fmt.Fprintf(w, "# TYPE %s %s\n", name, metricType) - } - - // Write all metrics with this name - for _, metric := range group { - fmt.Fprintf(w, "%s\n", metric.FormatMetric()) - } - - // Add blank line between metric groups for readability - fmt.Fprintf(w, "\n") } - - h.log.Debugf("Successfully served aggregated metrics for %d metric groups", len(metricGroups)) } diff --git a/pkg/metrics/label_ordering_test.go b/pkg/metrics/label_ordering_test.go deleted file mode 100644 index 5a832b520..000000000 --- a/pkg/metrics/label_ordering_test.go +++ /dev/null @@ -1,88 +0,0 @@ -package metrics - -import ( - "testing" -) - -func TestConsistentLabelOrdering(t *testing.T) { - // Create a metric with labels in different orders - metric := PrometheusMetric{ - Name: "test_metric", - Labels: map[string]string{ - "model": "ai/llama3.2", - "mode": "completion", - "backend": "llama.cpp", - }, - Value: "123", - } - - // Format the metric multiple times - results := make([]string, 10) - for i := 0; i < 10; i++ { - results[i] = metric.FormatMetric() - } - - // All results should be identical - expected := `test_metric{backend="llama.cpp",mode="completion",model="ai/llama3.2"} 123` - for i, result := range results { - if result != expected { - t.Errorf("Iteration %d: Expected '%s', got '%s'", i, expected, result) - } - } - - // Verify all results are the same - for i := 1; i < len(results); i++ { - if results[i] != results[0] { - t.Errorf("Inconsistent ordering: result[0]='%s', result[%d]='%s'", results[0], i, results[i]) - } - } -} - -func TestLabelOrderingWithDifferentKeys(t *testing.T) { - tests := []struct { - name string - labels map[string]string - expected string - }{ - { - name: "backend_model_mode", - labels: map[string]string{ - "backend": "llama.cpp", - "model": "ai/llama3.2", - "mode": "completion", - }, - expected: `test{backend="llama.cpp",mode="completion",model="ai/llama3.2"} 42`, - }, - { - name: "alphabetical_order", - labels: map[string]string{ - "z_last": "last", - "a_first": "first", - "m_mid": "middle", - }, - expected: `test{a_first="first",m_mid="middle",z_last="last"} 42`, - }, - { - name: "single_label", - labels: map[string]string{ - "single": "value", - }, - expected: `test{single="value"} 42`, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - metric := PrometheusMetric{ - Name: "test", - Labels: tt.labels, - Value: "42", - } - - result := metric.FormatMetric() - if result != tt.expected { - t.Errorf("Expected '%s', got '%s'", tt.expected, result) - } - }) - } -} diff --git a/pkg/metrics/prometheus_parser.go b/pkg/metrics/prometheus_parser.go deleted file mode 100644 index 4ec476b5b..000000000 --- a/pkg/metrics/prometheus_parser.go +++ /dev/null @@ -1,155 +0,0 @@ -package metrics - -import ( - "bufio" - "fmt" - "regexp" - "sort" - "strings" -) - -// PrometheusMetric represents a single Prometheus metric -type PrometheusMetric struct { - Name string - Labels map[string]string - Value string - Help string - Type string -} - -// PrometheusParser parses Prometheus text format metrics -type PrometheusParser struct { - commentRegex *regexp.Regexp - metricRegex *regexp.Regexp -} - -// NewPrometheusParser creates a new Prometheus metrics parser -func NewPrometheusParser() *PrometheusParser { - return &PrometheusParser{ - // Matches # HELP and # TYPE comments - commentRegex: regexp.MustCompile(`^#\s+(HELP|TYPE)\s+(\S+)\s+(.*)$`), - // Matches metric lines with optional labels - metricRegex: regexp.MustCompile(`^([a-zA-Z_:][a-zA-Z0-9_:]*?)(\{[^}]*})?\s+(\S+)(\s+\d+)?$`), - } -} - -// ParseMetrics parses Prometheus text format and returns structured metrics -func (p *PrometheusParser) ParseMetrics(content string) ([]PrometheusMetric, error) { - var metrics []PrometheusMetric - helpMap := make(map[string]string) - typeMap := make(map[string]string) - - scanner := bufio.NewScanner(strings.NewReader(content)) - - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - - // Skip empty lines - if line == "" { - continue - } - - // Handle comments (HELP and TYPE) - if strings.HasPrefix(line, "#") { - matches := p.commentRegex.FindStringSubmatch(line) - if len(matches) == 4 { - directive := matches[1] - metricName := matches[2] - content := matches[3] - - switch directive { - case "HELP": - helpMap[metricName] = content - case "TYPE": - typeMap[metricName] = content - } - } - continue - } - - // Parse metric lines - matches := p.metricRegex.FindStringSubmatch(line) - if len(matches) >= 4 { - metricName := matches[1] - labelsStr := matches[2] - value := matches[3] - - // Parse labels if present - labels := make(map[string]string) - if labelsStr != "" { - // Remove surrounding braces - labelsStr = strings.Trim(labelsStr, "{}") - if labelsStr != "" { - parsedLabels, err := p.parseLabels(labelsStr) - if err != nil { - continue // Skip malformed metrics - } - labels = parsedLabels - } - } - - metric := PrometheusMetric{ - Name: metricName, - Labels: labels, - Value: value, - Help: helpMap[metricName], - Type: typeMap[metricName], - } - - metrics = append(metrics, metric) - } - } - - if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("error scanning metrics: %w", err) - } - - return metrics, nil -} - -// parseLabels parses label string like 'key1="value1",key2="value2"' -func (p *PrometheusParser) parseLabels(labelsStr string) (map[string]string, error) { - labels := make(map[string]string) - // Split by comma, then key="value" - re := regexp.MustCompile(`(\w+)="([^"]*)"`) - - matches := re.FindAllStringSubmatch(labelsStr, -1) - for _, match := range matches { - if len(match) == 3 { - labels[match[1]] = match[2] - } - } - return labels, nil -} - -// AddLabels adds additional labels to a metric -func (m *PrometheusMetric) AddLabels(additionalLabels map[string]string) { - if m.Labels == nil { - m.Labels = make(map[string]string) - } - - for key, value := range additionalLabels { - m.Labels[key] = value - } -} - -// FormatMetric formats a metric back to Prometheus text format -func (m *PrometheusMetric) FormatMetric() string { - if len(m.Labels) == 0 { - return fmt.Sprintf("%s %s", m.Name, m.Value) - } - - // Sort label keys to ensure consistent output order - var keys []string - for key := range m.Labels { - keys = append(keys, key) - } - sort.Strings(keys) - - var labelPairs []string - for _, key := range keys { - labelPairs = append(labelPairs, fmt.Sprintf(`%s="%s"`, key, m.Labels[key])) - } - - return fmt.Sprintf("%s{%s} %s", m.Name, strings.Join(labelPairs, ","), m.Value) -} diff --git a/pkg/metrics/prometheus_parser_test.go b/pkg/metrics/prometheus_parser_test.go deleted file mode 100644 index 6094df465..000000000 --- a/pkg/metrics/prometheus_parser_test.go +++ /dev/null @@ -1,177 +0,0 @@ -package metrics - -import ( - "testing" -) - -func TestPrometheusParser_ParseMetrics(t *testing.T) { - parser := NewPrometheusParser() - - input := `# HELP http_requests_total Total number of HTTP requests -# TYPE http_requests_total counter -http_requests_total{method="get",code="200"} 1027 -http_requests_total{method="post",code="200"} 3 -http_requests_total{method="post",code="400"} 3 - -# HELP memory_usage_bytes Memory usage in bytes -# TYPE memory_usage_bytes gauge -memory_usage_bytes 1234567890 - -# Simple metric without labels -simple_metric 42 -` - - metrics, err := parser.ParseMetrics(input) - if err != nil { - t.Fatalf("Failed to parse metrics: %v", err) - } - - if len(metrics) != 5 { - t.Errorf("Expected 5 metrics, got %d", len(metrics)) - } - - // Test first metric with labels - metric := metrics[0] - if metric.Name != "http_requests_total" { - t.Errorf("Expected name 'http_requests_total', got '%s'", metric.Name) - } - if metric.Value != "1027" { - t.Errorf("Expected value '1027', got '%s'", metric.Value) - } - if metric.Help != "Total number of HTTP requests" { - t.Errorf("Expected help 'Total number of HTTP requests', got '%s'", metric.Help) - } - if metric.Type != "counter" { - t.Errorf("Expected type 'counter', got '%s'", metric.Type) - } - if len(metric.Labels) != 2 { - t.Errorf("Expected 2 labels, got %d", len(metric.Labels)) - } - if metric.Labels["method"] != "get" { - t.Errorf("Expected method='get', got '%s'", metric.Labels["method"]) - } - if metric.Labels["code"] != "200" { - t.Errorf("Expected code='200', got '%s'", metric.Labels["code"]) - } - - // Test simple metric without labels - simpleMetric := metrics[4] - if simpleMetric.Name != "simple_metric" { - t.Errorf("Expected name 'simple_metric', got '%s'", simpleMetric.Name) - } - if simpleMetric.Value != "42" { - t.Errorf("Expected value '42', got '%s'", simpleMetric.Value) - } - if len(simpleMetric.Labels) != 0 { - t.Errorf("Expected 0 labels, got %d", len(simpleMetric.Labels)) - } -} - -func TestPrometheusMetric_AddLabels(t *testing.T) { - metric := PrometheusMetric{ - Name: "test_metric", - Labels: map[string]string{"existing": "value"}, - Value: "123", - } - - additionalLabels := map[string]string{ - "backend": "llama.cpp", - "model": "test-model", - } - - metric.AddLabels(additionalLabels) - - if len(metric.Labels) != 3 { - t.Errorf("Expected 3 labels, got %d", len(metric.Labels)) - } - - if metric.Labels["existing"] != "value" { - t.Errorf("Expected existing='value', got '%s'", metric.Labels["existing"]) - } - if metric.Labels["backend"] != "llama.cpp" { - t.Errorf("Expected backend='llama.cpp', got '%s'", metric.Labels["backend"]) - } - if metric.Labels["model"] != "test-model" { - t.Errorf("Expected model='test-model', got '%s'", metric.Labels["model"]) - } -} - -func TestPrometheusMetric_FormatMetric(t *testing.T) { - // Test metric without labels - metric1 := PrometheusMetric{ - Name: "simple_metric", - Value: "42", - } - - expected1 := "simple_metric 42" - result1 := metric1.FormatMetric() - if result1 != expected1 { - t.Errorf("Expected '%s', got '%s'", expected1, result1) - } - - // Test metric with labels - metric2 := PrometheusMetric{ - Name: "labeled_metric", - Labels: map[string]string{ - "backend": "llama.cpp", - "model": "test-model", - }, - Value: "123", - } - - result2 := metric2.FormatMetric() - // With sorted keys, the order should always be: backend, model (alphabetical) - expected2 := `labeled_metric{backend="llama.cpp",model="test-model"} 123` - - if result2 != expected2 { - t.Errorf("Expected '%s', got '%s'", expected2, result2) - } -} - -func TestPrometheusParser_parseLabels(t *testing.T) { - parser := NewPrometheusParser() - - tests := []struct { - input string - expected map[string]string - }{ - { - input: `method="get",code="200"`, - expected: map[string]string{ - "method": "get", - "code": "200", - }, - }, - { - input: `single="value"`, - expected: map[string]string{ - "single": "value", - }, - }, - { - input: ``, - expected: map[string]string{}, - }, - } - - for _, test := range tests { - result, err := parser.parseLabels(test.input) - if err != nil { - t.Errorf("Failed to parse labels '%s': %v", test.input, err) - continue - } - - if len(result) != len(test.expected) { - t.Errorf("For input '%s', expected %d labels, got %d", test.input, len(test.expected), len(result)) - continue - } - - for key, expectedValue := range test.expected { - if actualValue, exists := result[key]; !exists { - t.Errorf("For input '%s', missing expected key '%s'", test.input, key) - } else if actualValue != expectedValue { - t.Errorf("For input '%s', key '%s': expected '%s', got '%s'", test.input, key, expectedValue, actualValue) - } - } - } -} From 17047a1413ab29a1853ed90b9ef994d0074e1bad Mon Sep 17 00:00:00 2001 From: ilopezluna Date: Fri, 13 Jun 2025 12:23:59 +0200 Subject: [PATCH 4/7] acquire/release the loader's lock --- pkg/inference/scheduling/scheduler.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/inference/scheduling/scheduler.go b/pkg/inference/scheduling/scheduler.go index 42fb70e7c..7f10b6caa 100644 --- a/pkg/inference/scheduling/scheduler.go +++ b/pkg/inference/scheduling/scheduler.go @@ -352,6 +352,11 @@ func (s *Scheduler) GetAllActiveRunners() []metrics.ActiveRunner { runningBackends := s.getLoaderStatus(context.Background()) var activeRunners []metrics.ActiveRunner + if !s.loader.lock(context.Background()) { + return activeRunners + } + defer s.loader.unlock() + for _, backend := range runningBackends { // Find the runner slot for this backend/model combination key := runnerKey{ @@ -383,6 +388,11 @@ func (s *Scheduler) GetAllActiveRunners() []metrics.ActiveRunner { func (s *Scheduler) GetLlamaCppSocket() (string, error) { runningBackends := s.getLoaderStatus(context.Background()) + if !s.loader.lock(context.Background()) { + return "", fmt.Errorf("failed to acquire loader lock") + } + defer s.loader.unlock() + // Look for an active llama.cpp backend for _, backend := range runningBackends { if backend.BackendName == "llama.cpp" { From c304305804f1acb091df819c7e51c783785a8fb3 Mon Sep 17 00:00:00 2001 From: ilopezluna Date: Fri, 13 Jun 2025 12:27:39 +0200 Subject: [PATCH 5/7] I missed commiting this --- go.mod | 1 + 1 file changed, 1 insertion(+) diff --git a/go.mod b/go.mod index 1dfe2b3cc..563b964fa 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/docker/model-distribution v0.0.0-20250512190053-b3792c042d57 github.com/google/go-containerregistry v0.20.3 github.com/jaypipes/ghw v0.16.0 + github.com/mattn/go-shellwords v1.0.12 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.1 github.com/prometheus/client_model v0.6.2 From bb394fb4e8eb1ff664cce49ddc901f671042a0a0 Mon Sep 17 00:00:00 2001 From: ilopezluna Date: Fri, 13 Jun 2025 12:30:38 +0200 Subject: [PATCH 6/7] remove unneeded dep --- go.mod | 2 +- pkg/metrics/aggregated_handler.go | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index 563b964fa..f122d223a 100644 --- a/go.mod +++ b/go.mod @@ -16,7 +16,6 @@ require ( github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.10.0 golang.org/x/sync v0.14.0 - google.golang.org/protobuf v1.36.6 ) require ( @@ -62,6 +61,7 @@ require ( gonum.org/v1/gonum v0.15.1 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20250414145226-207652e42e2e // indirect google.golang.org/grpc v1.72.0 // indirect + google.golang.org/protobuf v1.36.6 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect howett.net/plist v1.0.0 // indirect ) diff --git a/pkg/metrics/aggregated_handler.go b/pkg/metrics/aggregated_handler.go index ccebab6ed..7a03eab52 100644 --- a/pkg/metrics/aggregated_handler.go +++ b/pkg/metrics/aggregated_handler.go @@ -10,11 +10,9 @@ import ( "sync" "time" + "github.com/docker/model-runner/pkg/logging" dto "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" - "google.golang.org/protobuf/proto" - - "github.com/docker/model-runner/pkg/logging" ) // AggregatedMetricsHandler collects metrics from all active runners and aggregates them with labels @@ -140,8 +138,8 @@ func (h *AggregatedMetricsHandler) addLabelsAndMerge(families map[string]*dto.Me // Add our labels to the existing label pairs for key, value := range labels { metric.Label = append(metric.Label, &dto.LabelPair{ - Name: proto.String(key), - Value: proto.String(value), + Name: &key, + Value: &value, }) } } From 9c4674edcf045e12581563b0c2028b7bd00ac353 Mon Sep 17 00:00:00 2001 From: ilopezluna Date: Mon, 16 Jun 2025 09:32:42 +0200 Subject: [PATCH 7/7] clean up --- pkg/inference/scheduling/scheduler.go | 4 ++-- pkg/metrics/aggregated_handler.go | 2 +- pkg/metrics/scheduler_proxy.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/inference/scheduling/scheduler.go b/pkg/inference/scheduling/scheduler.go index ef7309f9a..9b9e3fc46 100644 --- a/pkg/inference/scheduling/scheduler.go +++ b/pkg/inference/scheduling/scheduler.go @@ -447,7 +447,7 @@ func (s *Scheduler) GetLlamaCppSocket() (string, error) { runningBackends := s.getLoaderStatus(context.Background()) if !s.loader.lock(context.Background()) { - return "", fmt.Errorf("failed to acquire loader lock") + return "", errors.New("failed to acquire loader lock") } defer s.loader.unlock() @@ -468,7 +468,7 @@ func (s *Scheduler) GetLlamaCppSocket() (string, error) { } } - return "", fmt.Errorf("no active llama.cpp backend found") + return "", errors.New("no active llama.cpp backend found") } // parseBackendMode converts a string mode to BackendMode diff --git a/pkg/metrics/aggregated_handler.go b/pkg/metrics/aggregated_handler.go index 7a03eab52..adaa40c41 100644 --- a/pkg/metrics/aggregated_handler.go +++ b/pkg/metrics/aggregated_handler.go @@ -32,7 +32,7 @@ func NewAggregatedMetricsHandler(log logging.Logger, scheduler SchedulerInterfac // ServeHTTP implements http.Handler for aggregated metrics func (h *AggregatedMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { - http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed) return } diff --git a/pkg/metrics/scheduler_proxy.go b/pkg/metrics/scheduler_proxy.go index d462d8430..fd5565831 100644 --- a/pkg/metrics/scheduler_proxy.go +++ b/pkg/metrics/scheduler_proxy.go @@ -33,7 +33,7 @@ type ActiveRunner struct { // ServeHTTP implements http.Handler for metrics proxying via scheduler func (h *SchedulerMetricsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { - http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + http.Error(w, http.StatusText(http.StatusMethodNotAllowed), http.StatusMethodNotAllowed) return }