mattermost · crspeller · May 7, 2026 · Feb 19, 2026 · Mar 3, 2026 · Mar 5, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -13,9 +13,22 @@
 - Run evals with multiple providers: `LLM_PROVIDER=openai,anthropic make evals-ci`
 - Run evals with OpenAI compatible API (e.g., local LLMs): `LLM_PROVIDER=openaicompatible OPENAI_COMPATIBLE_API_URL=http://localhost:8080/v1 OPENAI_COMPATIBLE_MODEL=llama-3 make evals-ci`
 - Run streaming benchmarks: `go test -bench=. -benchmem ./llm/... ./streaming/...`
+- Run telemetry tests: `go test -v ./telemetry/...`
 - Validate e2e CI shard coverage: `cd e2e && node scripts/ci-test-groups.mjs validate`
 - List files assigned to a specific e2e CI shard/group: `cd e2e && node scripts/ci-test-groups.mjs list <group-name>`
 
+## OpenTelemetry / Tracing
+
+The plugin uses OpenTelemetry for distributed tracing. Key architecture points:
+
+- **Telemetry package** (`telemetry/`): Owns OTel initialization, attribute constants, and helpers. Use `telemetry.Tracer()` to get a tracer and `telemetry.SpanFromContext(ctx)` to get the current span.
+- **context.Context threading**: All functions in the request pipeline accept `ctx context.Context` as the first parameter. Always propagate ctx from entry points (HTTP handlers, plugin hooks) through to LLM calls and external services.
+- **Span instrumentation**: Spans are created in `bifrost/` (LLM calls), `llm/tools.go` (tool resolution), `conversations/tool_handling.go` (tool call handling), `mcp/` (MCP tool calls), `search/` (semantic search), `websearch/` (Brave/Google), and `streaming/` (post streaming). The `otelgin` middleware auto-creates HTTP spans.
+- **Adding new spans**: Use `ctx, span := telemetry.Tracer().Start(ctx, "span name", trace.WithAttributes(...))` and `defer span.End()`. Record errors with `span.RecordError(err)` and `span.SetStatus(codes.Error, msg)`. Use attribute keys from `telemetry/attributes.go`.
+- **Config**: `TelemetryOutput` (string: `off` / `logs` / `otlp`) and `OpenTelemetryEndpoint` (string, e.g. `localhost:4317`) in plugin settings. `logs` mode pipes finished spans through `pluginapi.LogService` via `telemetry.NewLogSpanProcessor` for admins without an OTLP collector. `otlp` mode requires `OpenTelemetryEndpoint`.
+- **Local testing**: `docker compose -f dev/docker-compose.otel.yml up -d` starts Grafana Tempo (OTLP on `localhost:4317`) and Grafana at `http://localhost:3001` (anonymous Admin, Tempo datasource preprovisioned). Open Explore → Tempo to view traces.
+- **Context aliasing**: In files where a `context *llm.Context` parameter shadows the `context` package, use `stdcontext` as the import alias for `"context"`.
+
 ## Code Style Guidelines
 - Go: Follow Go standard formatting conventions according to goimports
 - TypeScript/React: Use 4-space indentation, PascalCase for components, strict typing, always use styled-components, never use style properties

diff --git a/api/api.go b/api/api.go
@@ -13,6 +13,8 @@ import (
 	"time"
 
 	"github.com/gin-gonic/gin"
+	"go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin"
+
 	"github.com/mattermost/mattermost-plugin-agents/bifrost"
 	"github.com/mattermost/mattermost-plugin-agents/bots"
 	"github.com/mattermost/mattermost-plugin-agents/config"
@@ -216,6 +218,7 @@ func (a *API) SetConversationService(svc *conversation.Service) {
 // ServeHTTP handles HTTP requests to the plugin
 func (a *API) ServeHTTP(c *plugin.Context, w http.ResponseWriter, r *http.Request) {
 	router := gin.Default()
+	router.Use(otelgin.Middleware("mattermost-ai-agents"))
 	router.Use(a.ginlogger)
 	router.Use(a.metricsMiddleware)
 

diff --git a/api/api_channel.go b/api/api_channel.go
@@ -4,20 +4,19 @@
 package api
 
 import (
-	stdcontext "context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"net/http"
 
-	"errors"
-
 	"github.com/gin-gonic/gin"
 	"github.com/gin-gonic/gin/render"
 	"github.com/mattermost/mattermost-plugin-agents/bots"
 	"github.com/mattermost/mattermost-plugin-agents/channels"
 	"github.com/mattermost/mattermost-plugin-agents/llm"
 	"github.com/mattermost/mattermost-plugin-agents/prompts"
 	"github.com/mattermost/mattermost-plugin-agents/streaming"
+	"github.com/mattermost/mattermost-plugin-agents/telemetry"
 	"github.com/mattermost/mattermost/server/public/model"
 )
 
@@ -151,7 +150,7 @@ func (a *API) handleChannelAnalysis(c *gin.Context) {
 		"Prompt":       data.Prompt,
 	}
 
-	result, err := analyzer.AnalyzeChannel(llmContext, channel.Id, userID, bot.GetMMBot().UserId, analysisData)
+	result, err := analyzer.AnalyzeChannel(c.Request.Context(), llmContext, channel.Id, userID, bot.GetMMBot().UserId, analysisData)
 	if err != nil {
 		c.AbortWithError(http.StatusInternalServerError, fmt.Errorf("failed to analyze channel: %w", err))
 		return
@@ -160,7 +159,7 @@ func (a *API) handleChannelAnalysis(c *gin.Context) {
 	// Create analysis post with conversation ID for streaming turn persistence
 	analysisPost := a.makeAnalysisPost(user.Locale, "", data.AnalysisType, result.ConversationID)
 
-	if err := a.streamingService.StreamToNewDM(stdcontext.Background(), bot.GetMMBot().UserId, result.Stream, user.Id, analysisPost, ""); err != nil {
+	if err := a.streamingService.StreamToNewDM(telemetry.DetachContext(c.Request.Context()), bot.GetMMBot().UserId, result.Stream, user.Id, analysisPost, ""); err != nil {
 		c.AbortWithError(http.StatusInternalServerError, err)
 		return
 	}
@@ -249,7 +248,7 @@ func (a *API) handleInterval(c *gin.Context) {
 
 	// Call channels interval processing with conversation entity
 	result, err := channels.New(bot.LLM(), a.prompts, a.mmClient, a.dbClient, a.convService).Interval(
-		context, channel.Id, userID, bot.GetMMBot().UserId, data.StartTime, data.EndTime, promptPreset,
+		c.Request.Context(), context, channel.Id, userID, bot.GetMMBot().UserId, data.StartTime, data.EndTime, promptPreset,
 	)
 	if err != nil {
 		c.AbortWithError(http.StatusInternalServerError, err)
@@ -262,7 +261,7 @@ func (a *API) handleInterval(c *gin.Context) {
 	post.AddProp(streaming.ConversationIDProp, result.ConversationID)
 
 	// Stream result to new DM
-	if err := a.streamingService.StreamToNewDM(stdcontext.Background(), bot.GetMMBot().UserId, result.Stream, user.Id, post, ""); err != nil {
+	if err := a.streamingService.StreamToNewDM(telemetry.DetachContext(c.Request.Context()), bot.GetMMBot().UserId, result.Stream, user.Id, post, ""); err != nil {
 		c.AbortWithError(http.StatusInternalServerError, err)
 		return
 	}

diff --git a/api/api_llm_bridge.go b/api/api_llm_bridge.go
@@ -343,7 +343,7 @@ func (a *API) prepareAgentBridgeCompletion(
 			return nil, llm.CompletionRequest{}, nil, nil, nil, http.StatusBadRequest, errors.New("no eligible tools available for this agent")
 		}
 
-		scopedTools := llm.NewToolStore(nil, false)
+		scopedTools := llm.NewToolStore()
 		for _, name := range allowedToolNames {
 			tool := llmRequest.Context.Tools.GetTool(name)
 			if tool == nil {
@@ -502,12 +502,12 @@ func (a *API) streamLLMResponse(c *gin.Context, bot *bots.Bot, llmRequest llm.Co
 	var err error
 	if shouldExecute != nil {
 		var runResult *toolrunner.ToolRunResult
-		runResult, err = toolrunner.New(bot.LLM()).Run(llmRequest, shouldExecute, nil, opts...)
+		runResult, err = toolrunner.New(bot.LLM()).Run(c.Request.Context(), llmRequest, shouldExecute, nil, opts...)
 		if runResult != nil {
 			streamResult = runResult.Stream
 		}
 	} else {
-		streamResult, err = bot.LLM().ChatCompletion(llmRequest, opts...)
+		streamResult, err = bot.LLM().ChatCompletion(c.Request.Context(), llmRequest, opts...)
 	}
 	if err != nil {
 		// If streaming hasn't started, we can still send a JSON error
@@ -542,14 +542,13 @@ func (a *API) streamLLMResponse(c *gin.Context, bot *bots.Bot, llmRequest llm.Co
 	}
 }
 
-// handleNonStreamingLLMResponse handles non-streaming LLM responses.
 // When shouldExecute is non-nil, the call is routed through a toolrunner so
 // allowlisted tool calls are auto-executed; the runner's text stream is
 // drained into a single concatenated string before responding, mirroring
 // what ChatCompletionNoStream would have produced.
 func (a *API) handleNonStreamingLLMResponse(c *gin.Context, bot *bots.Bot, llmRequest llm.CompletionRequest, shouldExecute func(llm.ToolCall) bool, opts ...llm.LanguageModelOption) {
 	if shouldExecute == nil {
-		response, err := bot.LLM().ChatCompletionNoStream(llmRequest, opts...)
+		response, err := bot.LLM().ChatCompletionNoStream(c.Request.Context(), llmRequest, opts...)
 		if err != nil {
 			c.JSON(http.StatusInternalServerError, bridgeclient.ErrorResponse{
 				Error: fmt.Sprintf("failed to complete LLM request: %v", err),
@@ -562,7 +561,7 @@ func (a *API) handleNonStreamingLLMResponse(c *gin.Context, bot *bots.Bot, llmRe
 		return
 	}
 
-	runResult, err := toolrunner.New(bot.LLM()).Run(llmRequest, shouldExecute, nil, opts...)
+	runResult, err := toolrunner.New(bot.LLM()).Run(c.Request.Context(), llmRequest, shouldExecute, nil, opts...)
 	if err != nil {
 		c.JSON(http.StatusInternalServerError, bridgeclient.ErrorResponse{
 			Error: fmt.Sprintf("failed to complete LLM request: %v", err),

diff --git a/api/api_no_tools_test.go b/api/api_no_tools_test.go
@@ -45,10 +45,6 @@ func (p *noToolsTestMCPProvider) GetToolsForUser(string) ([]llm.Tool, *mcp.Error
 
 type noToolsTestContextConfigProvider struct{}
 
-func (p *noToolsTestContextConfigProvider) GetEnableLLMTrace() bool {
-	return false
-}
-
 func (p *noToolsTestContextConfigProvider) GetServiceByID(string) (llm.ServiceConfig, bool) {
 	return llm.ServiceConfig{}, false
 }

diff --git a/api/api_post.go b/api/api_post.go
@@ -4,7 +4,6 @@
 package api
 
 import (
-	stdcontext "context"
 	"errors"
 	"fmt"
 	"net/http"
@@ -16,6 +15,7 @@ import (
 	"github.com/mattermost/mattermost-plugin-agents/mmapi"
 	"github.com/mattermost/mattermost-plugin-agents/react"
 	"github.com/mattermost/mattermost-plugin-agents/streaming"
+	"github.com/mattermost/mattermost-plugin-agents/telemetry"
 	"github.com/mattermost/mattermost-plugin-agents/threads"
 	"github.com/mattermost/mattermost/server/public/model"
 )
@@ -82,7 +82,7 @@ func (a *API) handleReact(c *gin.Context) {
 	emojiName, err := react.New(
 		bot.LLM(),
 		a.prompts,
-	).Resolve(post.Message, context)
+	).Resolve(c.Request.Context(), post.Message, context)
 	if err != nil {
 		c.AbortWithError(http.StatusInternalServerError, err)
 		return
@@ -154,13 +154,13 @@ func (a *API) handleThreadAnalysis(c *gin.Context) {
 	switch data.AnalysisType {
 	case "summarize_thread":
 		title = TitleThreadSummary
-		analyzeResult, err = analyzer.Summarize(post.Id, llmContext, botUserID, userID)
+		analyzeResult, err = analyzer.Summarize(c.Request.Context(), post.Id, llmContext, botUserID, userID)
 	case "action_items":
 		title = TitleFindActionItems
-		analyzeResult, err = analyzer.FindActionItems(post.Id, llmContext, botUserID, userID)
+		analyzeResult, err = analyzer.FindActionItems(c.Request.Context(), post.Id, llmContext, botUserID, userID)
 	case "open_questions":
 		title = TitleFindOpenQuestions
-		analyzeResult, err = analyzer.FindOpenQuestions(post.Id, llmContext, botUserID, userID)
+		analyzeResult, err = analyzer.FindOpenQuestions(c.Request.Context(), post.Id, llmContext, botUserID, userID)
 	}
 	if err != nil {
 		c.AbortWithError(http.StatusInternalServerError, fmt.Errorf("failed to analyze thread: %w", err))
@@ -169,7 +169,7 @@ func (a *API) handleThreadAnalysis(c *gin.Context) {
 
 	// Create analysis post with conversation ID
 	analysisPost := a.makeAnalysisPost(user.Locale, post.Id, data.AnalysisType, analyzeResult.ConversationID)
-	if err := a.streamingService.StreamToNewDM(stdcontext.Background(), botUserID, analyzeResult.Stream, user.Id, analysisPost, post.Id); err != nil {
+	if err := a.streamingService.StreamToNewDM(telemetry.DetachContext(c.Request.Context()), botUserID, analyzeResult.Stream, user.Id, analysisPost, post.Id); err != nil {
 		c.AbortWithError(http.StatusInternalServerError, err)
 		return
 	}
@@ -277,7 +277,7 @@ func (a *API) handleRegenerate(c *gin.Context) {
 		return
 	}
 
-	err := a.conversationsService.HandleRegenerate(userID, post, channel)
+	err := a.conversationsService.HandleRegenerate(c.Request.Context(), userID, post, channel)
 	if err != nil {
 		c.AbortWithError(http.StatusInternalServerError, fmt.Errorf("unable to regenerate post: %w", err))
 		return
@@ -316,7 +316,7 @@ func (a *API) handleToolCall(c *gin.Context) {
 		return
 	}
 
-	if err := a.conversationsService.HandleToolCall(userID, post, channel, data.AcceptedToolIDs); err != nil {
+	if err := a.conversationsService.HandleToolCall(c.Request.Context(), userID, post, channel, data.AcceptedToolIDs); err != nil {
 		c.AbortWithError(toolApprovalHTTPStatus(err), err)
 		return
 	}
@@ -370,7 +370,7 @@ func (a *API) handleToolResult(c *gin.Context) {
 		return
 	}
 
-	if err := a.conversationsService.HandleToolResult(userID, post, channel, data.AcceptedToolIDs); err != nil {
+	if err := a.conversationsService.HandleToolResult(c.Request.Context(), userID, post, channel, data.AcceptedToolIDs); err != nil {
 		c.AbortWithError(toolApprovalHTTPStatus(err), err)
 		return
 	}

diff --git a/api/fake_llm_test.go b/api/fake_llm_test.go
@@ -4,6 +4,7 @@
 package api
 
 import (
+	"context"
 	"fmt"
 	"sync"
 
@@ -48,7 +49,7 @@ type FakeLLM struct {
 }
 
 // ChatCompletion implements streaming completion
-func (f *FakeLLM) ChatCompletion(conversation llm.CompletionRequest, opts ...llm.LanguageModelOption) (*llm.TextStreamResult, error) {
+func (f *FakeLLM) ChatCompletion(_ context.Context, conversation llm.CompletionRequest, opts ...llm.LanguageModelOption) (*llm.TextStreamResult, error) {
 	var cfg llm.LanguageModelConfig
 	for _, opt := range opts {
 		opt(&cfg)
@@ -109,7 +110,7 @@ func (f *FakeLLM) ChatCompletion(conversation llm.CompletionRequest, opts ...llm
 }
 
 // ChatCompletionNoStream implements non-streaming completion
-func (f *FakeLLM) ChatCompletionNoStream(conversation llm.CompletionRequest, opts ...llm.LanguageModelOption) (string, error) {
+func (f *FakeLLM) ChatCompletionNoStream(_ context.Context, conversation llm.CompletionRequest, opts ...llm.LanguageModelOption) (string, error) {
 	var cfg llm.LanguageModelConfig
 	for _, opt := range opts {
 		opt(&cfg)