Skip to content

Commit fb49c15

Browse files
yroblataskbotclaude
authored
Enable session management v2 by default (#4091)
* Enable session management v2 flag by default * solve bugs and fix tests Regenerate aggregator mock to restore alphabetical method order MergeCapabilities was placed after ProcessPreQueriedCapabilities in the mock file. Running go generate restores the mockgen-expected alphabetical order to pass the codegen verification CI check. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Fix composite tool name collision in SMv2 session registration The v2 session registration path (handleSessionRegistrationV2) was adding composite tools without checking for name conflicts with backend tools, unlike the v1 injectCapabilities path which uses validateNoToolConflicts. - Add GetMultiSession to the SessionManager interface so server.go can access the session's resolved backend tool list for conflict validation - Call validateNoToolConflicts before registering composite SDK tools, skipping them (with an error log) if a collision is detected - Track actual composite tools added for accurate log output Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> Fix composite tool routing failure in SMv2 path Composite tool workflow steps call the shared router which reads DiscoveredCapabilities from the request context, but the discovery middleware was short-circuiting for MultiSession sessions and never injecting them, causing "capabilities not found in context" errors. - Add GetRoutingTable() to the MultiSession interface and implement it on defaultMultiSession (routing table is immutable post-creation) - Update discovery middleware: instead of skipping context injection for MultiSession, build AggregatedCapabilities from the session's routing table and inject it — enabling composite tool workflow steps to route backend tool calls correctly - Guard against nil routing table (session initialisation not yet complete) - Add GetRoutingTable() stub to test fakes in session manager and integration tests Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: taskbot <taskbot@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 6578f21 commit fb49c15

24 files changed

Lines changed: 1128 additions & 130 deletions

cmd/thv-operator/controllers/virtualmcpserver_controller.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -681,8 +681,10 @@ func (r *VirtualMCPServerReconciler) ensureHMACSecret(
681681
) error {
682682
ctxLogger := log.FromContext(ctx)
683683

684-
// Only ensure HMAC secret if Session Management V2 is enabled
685-
if vmcp.Spec.Config.Operational == nil || !vmcp.Spec.Config.Operational.SessionManagementV2 {
684+
// Skip HMAC secret creation only when SessionManagementV2 is explicitly set to false.
685+
// nil means "unset" which defaults to true (v2 enabled).
686+
op := vmcp.Spec.Config.Operational
687+
if op != nil && op.SessionManagementV2 != nil && !*op.SessionManagementV2 {
686688
return nil
687689
}
688690

cmd/thv-operator/controllers/virtualmcpserver_deployment.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,8 +277,10 @@ func (r *VirtualMCPServerReconciler) buildEnvVarsForVmcp(
277277
// Mount outgoing auth secrets
278278
env = append(env, r.buildOutgoingAuthEnvVars(ctx, vmcp, typedWorkloads)...)
279279

280-
// Mount HMAC secret for session token binding (Session Management V2)
281-
if vmcp.Spec.Config.Operational != nil && vmcp.Spec.Config.Operational.SessionManagementV2 {
280+
// Mount HMAC secret when SessionManagementV2 is enabled (nil = unset = default true).
281+
// Skip only when explicitly set to false.
282+
op := vmcp.Spec.Config.Operational
283+
if op == nil || op.SessionManagementV2 == nil || *op.SessionManagementV2 {
282284
env = append(env, r.buildHMACSecretEnvVar(vmcp))
283285
}
284286

cmd/vmcp/app/commands.go

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,10 @@ func discoverBackends(
297297
// - Otherwise: logs warning and creates factory with default insecure secret
298298
//
299299
// Returns an error only when running in Kubernetes without a secret (fail-fast for production).
300-
func createSessionFactory(outgoingRegistry vmcpauth.OutgoingAuthRegistry) (vmcpsession.MultiSessionFactory, error) {
300+
func createSessionFactory(
301+
outgoingRegistry vmcpauth.OutgoingAuthRegistry,
302+
agg aggregator.Aggregator,
303+
) (vmcpsession.MultiSessionFactory, error) {
301304
const (
302305
envKey = "VMCP_SESSION_HMAC_SECRET"
303306
minRecommendedSecretLen = 32
@@ -319,6 +322,7 @@ func createSessionFactory(outgoingRegistry vmcpauth.OutgoingAuthRegistry) (vmcps
319322
return vmcpsession.NewSessionFactory(
320323
outgoingRegistry,
321324
vmcpsession.WithHMACSecret([]byte(hmacSecret)),
325+
vmcpsession.WithAggregator(agg),
322326
), nil
323327
}
324328

@@ -332,7 +336,7 @@ func createSessionFactory(outgoingRegistry vmcpauth.OutgoingAuthRegistry) (vmcps
332336

333337
// Development mode: use default insecure secret with warning
334338
slog.Warn("VMCP_SESSION_HMAC_SECRET not set - using default insecure secret (NOT recommended for production)")
335-
return vmcpsession.NewSessionFactory(outgoingRegistry), nil
339+
return vmcpsession.NewSessionFactory(outgoingRegistry, vmcpsession.WithAggregator(agg)), nil
336340
}
337341

338342
// runServe implements the serve command logic
@@ -522,10 +526,12 @@ func runServe(cmd *cobra.Command, _ []string) error {
522526
return fmt.Errorf("failed to validate optimizer config: %w", err)
523527
}
524528

525-
// Create session factory only when SessionManagementV2 is enabled
529+
// Create session factory only when SessionManagementV2 is enabled.
530+
// nil means "unset" which defaults to true; explicit *false opts out.
531+
sessionManagementV2 := cfg.Operational.SessionManagementV2 == nil || *cfg.Operational.SessionManagementV2
526532
var sessionFactory vmcpsession.MultiSessionFactory
527-
if cfg.Operational.SessionManagementV2 {
528-
sessionFactory, err = createSessionFactory(outgoingRegistry)
533+
if sessionManagementV2 {
534+
sessionFactory, err = createSessionFactory(outgoingRegistry, agg)
529535
if err != nil {
530536
return err
531537
}
@@ -547,7 +553,7 @@ func runServe(cmd *cobra.Command, _ []string) error {
547553
Watcher: backendWatcher,
548554
StatusReporter: statusReporter,
549555
OptimizerConfig: optCfg,
550-
SessionManagementV2: cfg.Operational.SessionManagementV2,
556+
SessionManagementV2: sessionManagementV2,
551557
SessionFactory: sessionFactory,
552558
}
553559

deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -692,11 +692,12 @@ spec:
692692
- debug
693693
type: string
694694
sessionManagementV2:
695+
default: true
695696
description: |-
696697
SessionManagementV2 enables session-scoped backend client lifecycle.
697698
When true, vMCP creates real backend connections per session via MultiSessionFactory
698699
and routes tool calls directly through the session rather than the global router.
699-
Defaults to false; existing behaviour is completely unchanged when disabled.
700+
Defaults to true. Set explicitly to false to opt out.
700701
type: boolean
701702
timeouts:
702703
description: Timeouts configures timeout settings.

deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -695,11 +695,12 @@ spec:
695695
- debug
696696
type: string
697697
sessionManagementV2:
698+
default: true
698699
description: |-
699700
SessionManagementV2 enables session-scoped backend client lifecycle.
700701
When true, vMCP creates real backend connections per session via MultiSessionFactory
701702
and routes tool calls directly through the session rather than the global router.
702-
Defaults to false; existing behaviour is completely unchanged when disabled.
703+
Defaults to true. Set explicitly to false to opt out.
703704
type: boolean
704705
timeouts:
705706
description: Timeouts configures timeout settings.

docs/operator/crd-api.md

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/vmcp/aggregator/aggregator.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,23 @@ type Aggregator interface {
6161
// 2. Resolve conflicts
6262
// 3. Merge into final view
6363
AggregateCapabilities(ctx context.Context, backends []vmcp.Backend) (*AggregatedCapabilities, error)
64+
65+
// ProcessPreQueriedCapabilities applies the same aggregation pipeline (overrides,
66+
// conflict resolution, advertising filter) to tools that have already been fetched
67+
// from live backends. Used by the v2 session management path to reuse aggregator
68+
// logic without re-querying backends over HTTP.
69+
//
70+
// toolsByBackend maps backend WorkloadID → raw tools as returned by the backend.
71+
// targets maps backend WorkloadID → the pre-built BackendTarget for that backend.
72+
//
73+
// Returns the advertised tool list (resolved names, filtered) and a routing table
74+
// keyed by resolved name. Each routing table entry has OriginalCapabilityName set
75+
// so that GetBackendCapabilityName() translates back to the raw backend name.
76+
ProcessPreQueriedCapabilities(
77+
ctx context.Context,
78+
toolsByBackend map[string][]vmcp.Tool,
79+
targets map[string]*vmcp.BackendTarget,
80+
) (advertisedTools []vmcp.Tool, toolsRouting map[string]*vmcp.BackendTarget, err error)
6481
}
6582

6683
// BackendCapabilities contains the raw capabilities from a single backend.

pkg/vmcp/aggregator/default_aggregator.go

Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -340,13 +340,15 @@ func (a *defaultAggregator) MergeCapabilities(
340340
"backend", resolvedTool.BackendID, "tool", resolvedTool.ResolvedName)
341341
routingTable.Tools[resolvedTool.ResolvedName] = &vmcp.BackendTarget{
342342
WorkloadID: resolvedTool.BackendID,
343-
OriginalCapabilityName: resolvedTool.OriginalName,
343+
OriginalCapabilityName: actualBackendCapabilityName(a.toolConfigMap, resolvedTool.BackendID, resolvedTool.OriginalName),
344344
}
345345
} else {
346346
// Use the backendToTarget helper from registry package
347347
target := vmcp.BackendToTarget(backend)
348-
// Store the original tool name for forwarding to backend
349-
target.OriginalCapabilityName = resolvedTool.OriginalName
348+
// Store the actual backend capability name for forwarding to backend.
349+
// resolvedTool.OriginalName is the post-override name; reverse the override
350+
// to get the name the backend itself uses.
351+
target.OriginalCapabilityName = actualBackendCapabilityName(a.toolConfigMap, resolvedTool.BackendID, resolvedTool.OriginalName)
350352
routingTable.Tools[resolvedTool.ResolvedName] = target
351353
}
352354
}
@@ -493,6 +495,85 @@ func (a *defaultAggregator) AggregateCapabilities(
493495
return aggregated, nil
494496
}
495497

498+
// ProcessPreQueriedCapabilities implements Aggregator.ProcessPreQueriedCapabilities.
499+
// It reuses processBackendTools, ResolveConflicts, and shouldAdvertiseTool so that
500+
// the v2 session path applies identical transforms to the v1 aggregation path.
501+
func (a *defaultAggregator) ProcessPreQueriedCapabilities(
502+
ctx context.Context,
503+
toolsByBackend map[string][]vmcp.Tool,
504+
targets map[string]*vmcp.BackendTarget,
505+
) ([]vmcp.Tool, map[string]*vmcp.BackendTarget, error) {
506+
// Step 1: Apply per-backend overrides (renames, description changes).
507+
processed := make(map[string]*BackendCapabilities, len(toolsByBackend))
508+
for backendID, rawTools := range toolsByBackend {
509+
processed[backendID] = &BackendCapabilities{
510+
BackendID: backendID,
511+
Tools: processBackendTools(ctx, backendID, rawTools, a.toolConfigMap[backendID]),
512+
}
513+
}
514+
515+
// Step 2: Resolve naming conflicts across backends.
516+
resolved, err := a.ResolveConflicts(ctx, processed)
517+
if err != nil {
518+
return nil, nil, err
519+
}
520+
521+
// Step 3: Build advertised list and routing table, applying advertising filter.
522+
var advertisedTools []vmcp.Tool
523+
routingTable := make(map[string]*vmcp.BackendTarget, len(resolved.Tools))
524+
525+
for _, rt := range resolved.Tools {
526+
target, ok := targets[rt.BackendID]
527+
if !ok {
528+
slog.Warn("ProcessPreQueriedCapabilities: no target for backend, skipping tool",
529+
"backend", rt.BackendID, "tool", rt.ResolvedName)
530+
continue
531+
}
532+
// Clone the target and record the actual backend capability name for call routing.
533+
// rt.OriginalName is the post-override name; reverse the override map to get the
534+
// actual name the backend itself uses.
535+
t := *target
536+
t.OriginalCapabilityName = actualBackendCapabilityName(a.toolConfigMap, rt.BackendID, rt.OriginalName)
537+
routingTable[rt.ResolvedName] = &t
538+
539+
if a.shouldAdvertiseTool(rt.BackendID, rt.OriginalName) {
540+
advertisedTools = append(advertisedTools, vmcp.Tool{
541+
Name: rt.ResolvedName,
542+
Description: rt.Description,
543+
InputSchema: rt.InputSchema,
544+
OutputSchema: rt.OutputSchema,
545+
Annotations: rt.Annotations,
546+
BackendID: rt.BackendID,
547+
})
548+
}
549+
}
550+
551+
return advertisedTools, routingTable, nil
552+
}
553+
554+
// actualBackendCapabilityName returns the real capability name the backend uses,
555+
// reversing any per-backend override rename that processBackendTools may have applied.
556+
//
557+
// processBackendTools renames tools when WorkloadToolConfig.Overrides maps an original
558+
// backend name to a user-visible name. The conflict resolvers receive the post-override
559+
// name and store it as ResolvedTool.OriginalName. Setting OriginalCapabilityName to that
560+
// value would forward the overridden (user-visible) name to the backend, which only knows
561+
// the original name.
562+
//
563+
// Returns postOverrideName unchanged when no matching override is configured.
564+
func actualBackendCapabilityName(toolConfigMap map[string]*config.WorkloadToolConfig, backendID, postOverrideName string) string {
565+
wlConfig, ok := toolConfigMap[backendID]
566+
if !ok || wlConfig == nil {
567+
return postOverrideName
568+
}
569+
for origName, override := range wlConfig.Overrides {
570+
if override != nil && override.Name == postOverrideName {
571+
return origName
572+
}
573+
}
574+
return postOverrideName
575+
}
576+
496577
// shouldAdvertiseTool returns true if a tool from the given backend should be
497578
// advertised to MCP clients (included in tools/list response).
498579
//

0 commit comments

Comments
 (0)