Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/operator/virtualmcpserver-observability.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ the workflow name as an attribute.

## Distributed Tracing

The vMCP creates spans for each individual backend operation as well as workflow executions, enabling the attribution of workflow exection errors or latency to specific tool calls.
The vMCP creates spans for each individual backend operation as well as workflow executions, enabling the attribution of workflow execution errors or latency to specific tool calls.


## Configuration
Expand Down
2 changes: 1 addition & 1 deletion pkg/vmcp/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@
// backend calls are instrumented when they occur during workflow execution.
if cfg.TelemetryProvider != nil {
var err error
backendClient, err = monitorBackends(context.Background(), cfg.TelemetryProvider.MeterProvider(), cfg.TelemetryProvider.TracerProvider(), backends, backendClient)

Check failure on line 207 in pkg/vmcp/server/server.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 164 characters long, which exceeds the maximum of 130 characters. (lll)

Check failure on line 207 in pkg/vmcp/server/server.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 164 characters long, which exceeds the maximum of 130 characters. (lll)
if err != nil {
return nil, fmt.Errorf("failed to monitor backends: %w", err)
}
Expand All @@ -224,7 +224,7 @@

// Decorate workflow executors with telemetry if provider is configured
if cfg.TelemetryProvider != nil {
workflowExecutors, err = monitorWorkflowExecutors(cfg.TelemetryProvider.MeterProvider(), cfg.TelemetryProvider.TracerProvider(), workflowExecutors)

Check failure on line 227 in pkg/vmcp/server/server.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 149 characters long, which exceeds the maximum of 130 characters. (lll)

Check failure on line 227 in pkg/vmcp/server/server.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 149 characters long, which exceeds the maximum of 130 characters. (lll)
if err != nil {
return nil, fmt.Errorf("failed to monitor workflow executors: %w", err)
}
Expand Down Expand Up @@ -375,7 +375,7 @@
logger.Info("RFC 9728 OAuth discovery endpoints enabled at /.well-known/")
}

// MCP endpoint - apply middleware chain: auth → discovery
// MCP endpoint - apply middleware chain: auth → discovery → telemetry
var mcpHandler http.Handler = streamableServer

if s.config.TelemetryProvider != nil {
Expand Down
17 changes: 11 additions & 6 deletions pkg/vmcp/server/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,19 @@
}
backendCount.Record(ctx, int64(len(backends)))

requestsTotal, err := meter.Int64Counter("toolhive_vmcp_backend_requests_total", metric.WithDescription("Total number of requests per backend"))
requestsTotal, err := meter.Int64Counter("toolhive_vmcp_backend_requests", metric.WithDescription("Total number of requests per backend"))

Check failure on line 41 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 139 characters long, which exceeds the maximum of 130 characters. (lll)

Check failure on line 41 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 139 characters long, which exceeds the maximum of 130 characters. (lll)
if err != nil {
return nil, fmt.Errorf("failed to create requests total counter: %w", err)
}
errorsTotal, err := meter.Int64Counter("toolhive_vmcp_backend_errors_total", metric.WithDescription("Total number of errors per backend"))
errorsTotal, err := meter.Int64Counter("toolhive_vmcp_backend_errors", metric.WithDescription("Total number of errors per backend"))

Check failure on line 45 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 133 characters long, which exceeds the maximum of 130 characters. (lll)

Check failure on line 45 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 133 characters long, which exceeds the maximum of 130 characters. (lll)
if err != nil {
return nil, fmt.Errorf("failed to create errors total counter: %w", err)
}
requestsDuration, err := meter.Float64Histogram("toolhive_vmcp_backend_requests_duration", metric.WithDescription("Duration of requests in seconds per backend"))
requestsDuration, err := meter.Float64Histogram(
"toolhive_vmcp_backend_requests_duration",
metric.WithDescription("Duration of requests in seconds per backend"),
metric.WithUnit("s"),
)
if err != nil {
return nil, fmt.Errorf("failed to create requests duration histogram: %w", err)
}
Expand All @@ -73,7 +77,7 @@

// record updates the metrics and creates a span for each method on the BackendClient interface.
// It returns a function that should be deferred to record the duration, error, and end the span.
func (t telemetryBackendClient) record(ctx context.Context, target *vmcp.BackendTarget, action string, err *error) (context.Context, func()) {

Check failure on line 80 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 142 characters long, which exceeds the maximum of 130 characters. (lll)

Check failure on line 80 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 142 characters long, which exceeds the maximum of 130 characters. (lll)
// Create span attributes
commonAttrs := []attribute.KeyValue{
attribute.String("target.workload_id", target.WorkloadID),
Expand Down Expand Up @@ -104,25 +108,25 @@
}
}

func (t telemetryBackendClient) CallTool(ctx context.Context, target *vmcp.BackendTarget, toolName string, arguments map[string]any) (_ map[string]any, retErr error) {

Check failure on line 111 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 167 characters long, which exceeds the maximum of 130 characters. (lll)

Check failure on line 111 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 167 characters long, which exceeds the maximum of 130 characters. (lll)
ctx, done := t.record(ctx, target, "call_tool", &retErr)
defer done()
return t.backendClient.CallTool(ctx, target, toolName, arguments)
}

func (t telemetryBackendClient) ReadResource(ctx context.Context, target *vmcp.BackendTarget, uri string) (_ []byte, retErr error) {

Check failure on line 117 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 132 characters long, which exceeds the maximum of 130 characters. (lll)

Check failure on line 117 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 132 characters long, which exceeds the maximum of 130 characters. (lll)
ctx, done := t.record(ctx, target, "read_resource", &retErr)
defer done()
return t.backendClient.ReadResource(ctx, target, uri)
}

func (t telemetryBackendClient) GetPrompt(ctx context.Context, target *vmcp.BackendTarget, name string, arguments map[string]any) (_ string, retErr error) {

Check failure on line 123 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 156 characters long, which exceeds the maximum of 130 characters. (lll)

Check failure on line 123 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 156 characters long, which exceeds the maximum of 130 characters. (lll)
ctx, done := t.record(ctx, target, "get_prompt", &retErr)
defer done()
return t.backendClient.GetPrompt(ctx, target, name, arguments)
}

func (t telemetryBackendClient) ListCapabilities(ctx context.Context, target *vmcp.BackendTarget) (_ *vmcp.CapabilityList, retErr error) {

Check failure on line 129 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 138 characters long, which exceeds the maximum of 130 characters. (lll)

Check failure on line 129 in pkg/vmcp/server/telemetry.go

View workflow job for this annotation

GitHub Actions / Linting / Lint Go Code

The line is 138 characters long, which exceeds the maximum of 130 characters. (lll)
ctx, done := t.record(ctx, target, "list_capabilities", &retErr)
defer done()
return t.backendClient.ListCapabilities(ctx, target)
Expand All @@ -142,24 +146,25 @@
meter := meterProvider.Meter(instrumentationName)

executionsTotal, err := meter.Int64Counter(
"toolhive_vmcp_workflow_executions_total",
"toolhive_vmcp_workflow_executions",
metric.WithDescription("Total number of workflow executions"),
)
if err != nil {
return nil, fmt.Errorf("failed to create workflow executions counter: %w", err)
}

errorsTotal, err := meter.Int64Counter(
"toolhive_vmcp_workflow_errors_total",
"toolhive_vmcp_workflow_errors",
metric.WithDescription("Total number of workflow execution errors"),
)
if err != nil {
return nil, fmt.Errorf("failed to create workflow errors counter: %w", err)
}

executionDuration, err := meter.Float64Histogram(
"toolhive_vmcp_workflow_duration_seconds",
"toolhive_vmcp_workflow_duration",
metric.WithDescription("Duration of workflow executions in seconds"),
metric.WithUnit("s"),
)
if err != nil {
return nil, fmt.Errorf("failed to create workflow duration histogram: %w", err)
Expand Down
Loading