diff --git a/cmd/thv-operator/api/v1alpha1/mcpremoteproxy_types.go b/cmd/thv-operator/api/v1alpha1/mcpremoteproxy_types.go index ba5d278046..4d88361d9a 100644 --- a/cmd/thv-operator/api/v1alpha1/mcpremoteproxy_types.go +++ b/cmd/thv-operator/api/v1alpha1/mcpremoteproxy_types.go @@ -38,6 +38,7 @@ type HeaderFromSecret struct { // MCPRemoteProxySpec defines the desired state of MCPRemoteProxy // // +kubebuilder:validation:XValidation:rule="!(has(self.oidcConfig) && has(self.oidcConfigRef))",message="oidcConfig and oidcConfigRef are mutually exclusive; use oidcConfigRef to reference a shared MCPOIDCConfig" +// +kubebuilder:validation:XValidation:rule="!(has(self.telemetry) && has(self.telemetryConfigRef))",message="telemetry and telemetryConfigRef are mutually exclusive; migrate to telemetryConfigRef" // //nolint:lll // CEL validation rules exceed line length limit type MCPRemoteProxySpec struct { @@ -102,7 +103,17 @@ type MCPRemoteProxySpec struct { // +optional ToolConfigRef *ToolConfigRef `json:"toolConfigRef,omitempty"` - // Telemetry defines observability configuration for the proxy + // TelemetryConfigRef references an MCPTelemetryConfig resource for shared telemetry configuration. + // The referenced MCPTelemetryConfig must exist in the same namespace as this MCPRemoteProxy. + // Cross-namespace references are not supported for security and isolation reasons. + // Mutually exclusive with the deprecated inline Telemetry field. + // +optional + TelemetryConfigRef *MCPTelemetryConfigReference `json:"telemetryConfigRef,omitempty"` + + // Telemetry defines inline observability configuration for the proxy. + // Deprecated: Use TelemetryConfigRef to reference a shared MCPTelemetryConfig resource instead. + // This field will be removed in a future release. Setting both telemetry and telemetryConfigRef + // is rejected by CEL validation. // +optional Telemetry *TelemetryConfig `json:"telemetry,omitempty"` @@ -174,6 +185,10 @@ type MCPRemoteProxyStatus struct { // +optional ToolConfigHash string `json:"toolConfigHash,omitempty"` + // TelemetryConfigHash stores the hash of the referenced MCPTelemetryConfig for change detection + // +optional + TelemetryConfigHash string `json:"telemetryConfigHash,omitempty"` + // ExternalAuthConfigHash is the hash of the referenced MCPExternalAuthConfig spec // +optional ExternalAuthConfigHash string `json:"externalAuthConfigHash,omitempty"` @@ -227,6 +242,9 @@ const ( // ConditionTypeMCPRemoteProxyToolConfigValidated indicates whether the ToolConfigRef is valid ConditionTypeMCPRemoteProxyToolConfigValidated = "ToolConfigValidated" + // ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated indicates whether the TelemetryConfigRef is valid + ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated = "TelemetryConfigRefValidated" + // ConditionTypeMCPRemoteProxyExternalAuthConfigValidated indicates whether the ExternalAuthConfigRef is valid ConditionTypeMCPRemoteProxyExternalAuthConfigValidated = "ExternalAuthConfigValidated" @@ -278,6 +296,18 @@ const ( // ConditionReasonMCPRemoteProxyToolConfigFetchError indicates an error occurred fetching the MCPToolConfig ConditionReasonMCPRemoteProxyToolConfigFetchError = "ToolConfigFetchError" + // ConditionReasonMCPRemoteProxyTelemetryConfigRefValid indicates the TelemetryConfigRef is valid + ConditionReasonMCPRemoteProxyTelemetryConfigRefValid = "TelemetryConfigRefValid" + + // ConditionReasonMCPRemoteProxyTelemetryConfigRefNotFound indicates the referenced MCPTelemetryConfig was not found + ConditionReasonMCPRemoteProxyTelemetryConfigRefNotFound = "TelemetryConfigRefNotFound" + + // ConditionReasonMCPRemoteProxyTelemetryConfigRefInvalid indicates the referenced MCPTelemetryConfig is invalid + ConditionReasonMCPRemoteProxyTelemetryConfigRefInvalid = "TelemetryConfigRefInvalid" + + // ConditionReasonMCPRemoteProxyTelemetryConfigRefFetchError indicates an error occurred fetching the MCPTelemetryConfig + ConditionReasonMCPRemoteProxyTelemetryConfigRefFetchError = "TelemetryConfigRefFetchError" + // ConditionReasonMCPRemoteProxyExternalAuthConfigValid indicates the ExternalAuthConfigRef is valid ConditionReasonMCPRemoteProxyExternalAuthConfigValid = "ExternalAuthConfigValid" diff --git a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go index 36b80c89e8..a8e405dec1 100644 --- a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go @@ -1756,6 +1756,11 @@ func (in *MCPRemoteProxySpec) DeepCopyInto(out *MCPRemoteProxySpec) { *out = new(ToolConfigRef) **out = **in } + if in.TelemetryConfigRef != nil { + in, out := &in.TelemetryConfigRef, &out.TelemetryConfigRef + *out = new(MCPTelemetryConfigReference) + **out = **in + } if in.Telemetry != nil { in, out := &in.Telemetry, &out.Telemetry *out = new(TelemetryConfig) diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_controller.go b/cmd/thv-operator/controllers/mcpremoteproxy_controller.go index 95d6146404..3538797223 100644 --- a/cmd/thv-operator/controllers/mcpremoteproxy_controller.go +++ b/cmd/thv-operator/controllers/mcpremoteproxy_controller.go @@ -48,6 +48,7 @@ type MCPRemoteProxyReconciler struct { // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcptoolconfigs,verbs=get;list;watch // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpexternalauthconfigs,verbs=get;list;watch // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpoidcconfigs,verbs=get;list;watch +// +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcptelemetryconfigs,verbs=get;list;watch // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpoidcconfigs/status,verbs=get;update;patch // +kubebuilder:rbac:groups="",resources=configmaps,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups="",resources=services,verbs=create;delete;get;list;patch;update;watch @@ -128,6 +129,16 @@ func (r *MCPRemoteProxyReconciler) validateAndHandleConfigs(ctx context.Context, return err } + // Handle MCPTelemetryConfig + if err := r.handleTelemetryConfig(ctx, proxy); err != nil { + ctxLogger.Error(err, "Failed to handle MCPTelemetryConfig") + proxy.Status.Phase = mcpv1alpha1.MCPRemoteProxyPhaseFailed + if statusErr := r.Status().Update(ctx, proxy); statusErr != nil { + ctxLogger.Error(statusErr, "Failed to update MCPRemoteProxy status after MCPTelemetryConfig error") + } + return err + } + // Handle MCPExternalAuthConfig if err := r.handleExternalAuthConfig(ctx, proxy); err != nil { ctxLogger.Error(err, "Failed to handle MCPExternalAuthConfig") @@ -641,6 +652,97 @@ func (r *MCPRemoteProxyReconciler) handleToolConfig(ctx context.Context, proxy * return nil } +// handleTelemetryConfig validates and tracks the hash of the referenced MCPTelemetryConfig. +// It updates the MCPRemoteProxy status when the telemetry configuration changes. +func (r *MCPRemoteProxyReconciler) handleTelemetryConfig(ctx context.Context, proxy *mcpv1alpha1.MCPRemoteProxy) error { + ctxLogger := log.FromContext(ctx) + + if proxy.Spec.TelemetryConfigRef == nil { + // No MCPTelemetryConfig referenced, clear any stored hash and condition. + condType := mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated + condRemoved := meta.FindStatusCondition(proxy.Status.Conditions, condType) != nil + meta.RemoveStatusCondition(&proxy.Status.Conditions, condType) + if condRemoved || proxy.Status.TelemetryConfigHash != "" { + proxy.Status.TelemetryConfigHash = "" + if err := r.Status().Update(ctx, proxy); err != nil { + return fmt.Errorf("failed to clear MCPTelemetryConfig hash from status: %w", err) + } + } + return nil + } + + // Get the referenced MCPTelemetryConfig + telemetryConfig, err := ctrlutil.GetTelemetryConfigForMCPRemoteProxy(ctx, r.Client, proxy) + if err != nil { + // Transient API error (not a NotFound) + meta.SetStatusCondition(&proxy.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + Status: metav1.ConditionFalse, + Reason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefFetchError, + Message: err.Error(), + ObservedGeneration: proxy.Generation, + }) + return err + } + + if telemetryConfig == nil { + // Resource genuinely does not exist + meta.SetStatusCondition(&proxy.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + Status: metav1.ConditionFalse, + Reason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefNotFound, + Message: fmt.Sprintf("MCPTelemetryConfig %s not found", proxy.Spec.TelemetryConfigRef.Name), + ObservedGeneration: proxy.Generation, + }) + return fmt.Errorf("MCPTelemetryConfig %s not found", proxy.Spec.TelemetryConfigRef.Name) + } + + // Validate that the MCPTelemetryConfig is valid (has Valid=True condition) + if err := telemetryConfig.Validate(); err != nil { + meta.SetStatusCondition(&proxy.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + Status: metav1.ConditionFalse, + Reason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefInvalid, + Message: fmt.Sprintf("MCPTelemetryConfig %s is invalid: %v", proxy.Spec.TelemetryConfigRef.Name, err), + ObservedGeneration: proxy.Generation, + }) + return fmt.Errorf("MCPTelemetryConfig %s is invalid: %w", proxy.Spec.TelemetryConfigRef.Name, err) + } + + // Detect whether the condition is transitioning to True (e.g. recovering from + // a transient error). Without this check the status update is skipped when the + // hash is unchanged, leaving a stale False condition. + condType := mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated + prevCondition := meta.FindStatusCondition(proxy.Status.Conditions, condType) + needsUpdate := prevCondition == nil || prevCondition.Status != metav1.ConditionTrue + + meta.SetStatusCondition(&proxy.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + Status: metav1.ConditionTrue, + Reason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefValid, + Message: fmt.Sprintf("MCPTelemetryConfig %s is valid", proxy.Spec.TelemetryConfigRef.Name), + ObservedGeneration: proxy.Generation, + }) + + if proxy.Status.TelemetryConfigHash != telemetryConfig.Status.ConfigHash { + ctxLogger.Info("MCPTelemetryConfig has changed, updating MCPRemoteProxy", + "proxy", proxy.Name, + "telemetryConfig", telemetryConfig.Name, + "oldHash", proxy.Status.TelemetryConfigHash, + "newHash", telemetryConfig.Status.ConfigHash) + proxy.Status.TelemetryConfigHash = telemetryConfig.Status.ConfigHash + needsUpdate = true + } + + if needsUpdate { + if err := r.Status().Update(ctx, proxy); err != nil { + return fmt.Errorf("failed to update MCPTelemetryConfig status: %w", err) + } + } + + return nil +} + // handleExternalAuthConfig validates and tracks the hash of the referenced MCPExternalAuthConfig func (r *MCPRemoteProxyReconciler) handleExternalAuthConfig(ctx context.Context, proxy *mcpv1alpha1.MCPRemoteProxy) error { ctxLogger := log.FromContext(ctx) @@ -1390,6 +1492,37 @@ func (r *MCPRemoteProxyReconciler) mapOIDCConfigToMCPRemoteProxy( return requests } +// mapTelemetryConfigToMCPRemoteProxy maps MCPTelemetryConfig changes to MCPRemoteProxy reconciliation requests. +func (r *MCPRemoteProxyReconciler) mapTelemetryConfigToMCPRemoteProxy( + ctx context.Context, obj client.Object, +) []reconcile.Request { + telemetryConfig, ok := obj.(*mcpv1alpha1.MCPTelemetryConfig) + if !ok { + return nil + } + + proxyList := &mcpv1alpha1.MCPRemoteProxyList{} + if err := r.List(ctx, proxyList, client.InNamespace(telemetryConfig.Namespace)); err != nil { + log.FromContext(ctx).Error(err, "Failed to list MCPRemoteProxies for MCPTelemetryConfig watch") + return nil + } + + var requests []reconcile.Request + for _, proxy := range proxyList.Items { + if proxy.Spec.TelemetryConfigRef != nil && + proxy.Spec.TelemetryConfigRef.Name == telemetryConfig.Name { + requests = append(requests, reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: proxy.Name, + Namespace: proxy.Namespace, + }, + }) + } + } + + return requests +} + // SetupWithManager sets up the controller with the Manager func (r *MCPRemoteProxyReconciler) SetupWithManager(mgr ctrl.Manager) error { // Create a handler that maps MCPExternalAuthConfig changes to MCPRemoteProxy reconciliation requests @@ -1470,5 +1603,9 @@ func (r *MCPRemoteProxyReconciler) SetupWithManager(mgr ctrl.Manager) error { &mcpv1alpha1.MCPOIDCConfig{}, handler.EnqueueRequestsFromMapFunc(r.mapOIDCConfigToMCPRemoteProxy), ). + Watches( + &mcpv1alpha1.MCPTelemetryConfig{}, + handler.EnqueueRequestsFromMapFunc(r.mapTelemetryConfigToMCPRemoteProxy), + ). Complete(r) } diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_deployment.go b/cmd/thv-operator/controllers/mcpremoteproxy_deployment.go index 33749b298f..bca8face85 100644 --- a/cmd/thv-operator/controllers/mcpremoteproxy_deployment.go +++ b/cmd/thv-operator/controllers/mcpremoteproxy_deployment.go @@ -30,6 +30,7 @@ func (r *MCPRemoteProxyReconciler) deploymentForMCPRemoteProxy( // Build deployment components using helper functions args := r.buildContainerArgs() volumeMounts, volumes := r.buildVolumesForProxy(proxy) + r.addTelemetryCABundleVolumes(ctx, proxy, &volumes, &volumeMounts) env := r.buildEnvVarsForProxy(ctx, proxy) // Add embedded auth server volumes and env vars. AuthServerRef takes precedence; @@ -143,6 +144,29 @@ func (*MCPRemoteProxyReconciler) buildVolumesForProxy( return volumeMounts, volumes } +// addTelemetryCABundleVolumes appends CA bundle volumes for the referenced MCPTelemetryConfig. +// Must be called from deploymentForMCPRemoteProxy where the client is available. +func (r *MCPRemoteProxyReconciler) addTelemetryCABundleVolumes( + ctx context.Context, + proxy *mcpv1alpha1.MCPRemoteProxy, + volumes *[]corev1.Volume, + volumeMounts *[]corev1.VolumeMount, +) { + if proxy.Spec.TelemetryConfigRef == nil { + return + } + telCfg, err := ctrlutil.GetTelemetryConfigForMCPRemoteProxy(ctx, r.Client, proxy) + if err != nil { + log.FromContext(ctx).Error(err, "Failed to fetch MCPTelemetryConfig for CA bundle volume") + return + } + if telCfg != nil { + caVolumes, caMounts := ctrlutil.AddTelemetryCABundleVolumes(telCfg) + *volumes = append(*volumes, caVolumes...) + *volumeMounts = append(*volumeMounts, caMounts...) + } +} + // buildEnvVarsForProxy builds environment variables for the proxy container func (r *MCPRemoteProxyReconciler) buildEnvVarsForProxy( ctx context.Context, proxy *mcpv1alpha1.MCPRemoteProxy, diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_runconfig.go b/cmd/thv-operator/controllers/mcpremoteproxy_runconfig.go index 21655b24f9..92fc193260 100644 --- a/cmd/thv-operator/controllers/mcpremoteproxy_runconfig.go +++ b/cmd/thv-operator/controllers/mcpremoteproxy_runconfig.go @@ -25,7 +25,7 @@ import ( // ensureRunConfigConfigMap ensures the RunConfig ConfigMap exists and is up to date for MCPRemoteProxy func (r *MCPRemoteProxyReconciler) ensureRunConfigConfigMap(ctx context.Context, proxy *mcpv1alpha1.MCPRemoteProxy) error { - runConfig, err := r.createRunConfigFromMCPRemoteProxy(proxy) + runConfig, err := r.createRunConfigFromMCPRemoteProxy(ctx, proxy) if err != nil { return fmt.Errorf("failed to create RunConfig from MCPRemoteProxy: %w", err) } @@ -71,6 +71,7 @@ func (r *MCPRemoteProxyReconciler) ensureRunConfigConfigMap(ctx context.Context, // createRunConfigFromMCPRemoteProxy converts MCPRemoteProxy spec to RunConfig // Key difference from MCPServer: Sets RemoteURL instead of Image, and Deployer remains nil func (r *MCPRemoteProxyReconciler) createRunConfigFromMCPRemoteProxy( + ctx context.Context, proxy *mcpv1alpha1.MCPRemoteProxy, ) (*runner.RunConfig, error) { proxyHost := defaultProxyHost @@ -79,28 +80,9 @@ func (r *MCPRemoteProxyReconciler) createRunConfigFromMCPRemoteProxy( } // Get tool configuration from MCPToolConfig if referenced - var toolsFilter []string - var toolsOverride map[string]runner.ToolOverride - - if proxy.Spec.ToolConfigRef != nil { - toolConfig, err := ctrlutil.GetToolConfigForMCPRemoteProxy(context.Background(), r.Client, proxy) - if err != nil { - return nil, fmt.Errorf("failed to get MCPToolConfig: %w", err) - } - - if toolConfig != nil { - toolsFilter = toolConfig.Spec.ToolsFilter - - if len(toolConfig.Spec.ToolsOverride) > 0 { - toolsOverride = make(map[string]runner.ToolOverride) - for toolName, override := range toolConfig.Spec.ToolsOverride { - toolsOverride[toolName] = runner.ToolOverride{ - Name: override.Name, - Description: override.Description, - } - } - } - } + toolsFilter, toolsOverride, err := r.resolveToolConfig(proxy) + if err != nil { + return nil, err } // Determine transport type (default to streamable-http to match CLI) @@ -127,22 +109,24 @@ func (r *MCPRemoteProxyReconciler) createRunConfigFromMCPRemoteProxy( options = append(options, runner.WithToolsOverride(toolsOverride)) } + // Add telemetry configuration: prefer TelemetryConfigRef over deprecated inline Telemetry + if err := r.addTelemetryOptions(ctx, proxy, &options); err != nil { + return nil, err + } + // Create context for API operations - ctx, cancel := context.WithTimeout(context.Background(), defaultAPITimeout) + apiCtx, cancel := context.WithTimeout(context.Background(), defaultAPITimeout) defer cancel() - // Add telemetry configuration if specified - runconfig.AddTelemetryConfigOptions(ctx, &options, proxy.Spec.Telemetry, proxy.Name) - // Add authorization configuration if specified - if err := ctrlutil.AddAuthzConfigOptions(ctx, r.Client, proxy.Namespace, proxy.Spec.AuthzConfig, &options); err != nil { + if err := ctrlutil.AddAuthzConfigOptions(apiCtx, r.Client, proxy.Namespace, proxy.Spec.AuthzConfig, &options); err != nil { return nil, fmt.Errorf("failed to process AuthzConfig: %w", err) } // Add OIDC configuration (required for proxy mode) // Supports both legacy inline OIDCConfig and new MCPOIDCConfigRef paths - resolvedOIDCConfig, err := r.resolveAndAddOIDCConfig(ctx, proxy, &options) + resolvedOIDCConfig, err := r.resolveAndAddOIDCConfig(apiCtx, proxy, &options) if err != nil { return nil, err } @@ -150,7 +134,7 @@ func (r *MCPRemoteProxyReconciler) createRunConfigFromMCPRemoteProxy( // Add external auth configuration if specified (updated call) // Will fail if embedded auth server is used without OIDC config or resourceUrl if err := ctrlutil.AddExternalAuthConfigOptions( - ctx, r.Client, proxy.Namespace, proxy.Name, proxy.Spec.ExternalAuthConfigRef, + apiCtx, r.Client, proxy.Namespace, proxy.Name, proxy.Spec.ExternalAuthConfigRef, resolvedOIDCConfig, &options, ); err != nil { return nil, fmt.Errorf("failed to process ExternalAuthConfig: %w", err) @@ -158,7 +142,7 @@ func (r *MCPRemoteProxyReconciler) createRunConfigFromMCPRemoteProxy( // Validate authServerRef/externalAuthConfigRef conflict and add authServerRef options if err := ctrlutil.ValidateAndAddAuthServerRefOptions( - ctx, r.Client, proxy.Namespace, proxy.Name, proxy.Spec.AuthServerRef, + apiCtx, r.Client, proxy.Namespace, proxy.Name, proxy.Spec.AuthServerRef, proxy.Spec.ExternalAuthConfigRef, resolvedOIDCConfig, &options, ); err != nil { return nil, fmt.Errorf("failed to process authServerRef: %w", err) @@ -320,3 +304,57 @@ func addHeaderForwardConfigOptions(proxy *mcpv1alpha1.MCPRemoteProxy, options *[ *options = append(*options, runner.WithHeaderForwardSecrets(headerSecrets)) } } + +// resolveToolConfig fetches the MCPToolConfig referenced by the proxy and +// returns the tools filter and override map. +func (r *MCPRemoteProxyReconciler) resolveToolConfig( + proxy *mcpv1alpha1.MCPRemoteProxy, +) ([]string, map[string]runner.ToolOverride, error) { + if proxy.Spec.ToolConfigRef == nil { + return nil, nil, nil + } + + toolConfig, err := ctrlutil.GetToolConfigForMCPRemoteProxy(context.Background(), r.Client, proxy) + if err != nil { + return nil, nil, fmt.Errorf("failed to get MCPToolConfig: %w", err) + } + if toolConfig == nil { + return nil, nil, nil + } + + var toolsOverride map[string]runner.ToolOverride + if len(toolConfig.Spec.ToolsOverride) > 0 { + toolsOverride = make(map[string]runner.ToolOverride) + for toolName, override := range toolConfig.Spec.ToolsOverride { + toolsOverride[toolName] = runner.ToolOverride{ + Name: override.Name, + Description: override.Description, + } + } + } + + return toolConfig.Spec.ToolsFilter, toolsOverride, nil +} + +// addTelemetryOptions resolves telemetry configuration for the RunConfig. +// Prefers TelemetryConfigRef over the deprecated inline Telemetry field. +func (r *MCPRemoteProxyReconciler) addTelemetryOptions( + ctx context.Context, + proxy *mcpv1alpha1.MCPRemoteProxy, + options *[]runner.RunConfigBuilderOption, +) error { + if proxy.Spec.TelemetryConfigRef != nil { + telCfg, err := ctrlutil.GetTelemetryConfigForMCPRemoteProxy(ctx, r.Client, proxy) + if err != nil { + return fmt.Errorf("failed to get MCPTelemetryConfig: %w", err) + } + if telCfg != nil { + caPath := ctrlutil.TelemetryCABundleFilePath(telCfg) + svcName := proxy.Spec.TelemetryConfigRef.ServiceName + runconfig.AddMCPTelemetryConfigRefOptions(options, &telCfg.Spec, svcName, proxy.Name, caPath) + } + return nil + } + runconfig.AddTelemetryConfigOptions(ctx, options, proxy.Spec.Telemetry, proxy.Name) + return nil +} diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_runconfig_test.go b/cmd/thv-operator/controllers/mcpremoteproxy_runconfig_test.go index 8156b5ab8b..8dfb74a490 100644 --- a/cmd/thv-operator/controllers/mcpremoteproxy_runconfig_test.go +++ b/cmd/thv-operator/controllers/mcpremoteproxy_runconfig_test.go @@ -344,7 +344,7 @@ func TestCreateRunConfigFromMCPRemoteProxy(t *testing.T) { Scheme: scheme, } - config, err := reconciler.createRunConfigFromMCPRemoteProxy(tt.proxy) + config, err := reconciler.createRunConfigFromMCPRemoteProxy(t.Context(), tt.proxy) if tt.expectError { assert.Error(t, err) @@ -497,7 +497,7 @@ func TestCreateRunConfigFromMCPRemoteProxy_WithTokenExchange(t *testing.T) { Scheme: scheme, } - runConfig, err := reconciler.createRunConfigFromMCPRemoteProxy(tt.proxy) + runConfig, err := reconciler.createRunConfigFromMCPRemoteProxy(t.Context(), tt.proxy) if tt.expectError { assert.Error(t, err) @@ -730,7 +730,7 @@ func TestCreateRunConfigFromMCPRemoteProxy_WithBearerToken(t *testing.T) { Scheme: scheme, } - runConfig, err := reconciler.createRunConfigFromMCPRemoteProxy(tt.proxy) + runConfig, err := reconciler.createRunConfigFromMCPRemoteProxy(t.Context(), tt.proxy) if tt.expectError { assert.Error(t, err) diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_telemetryconfig_test.go b/cmd/thv-operator/controllers/mcpremoteproxy_telemetryconfig_test.go new file mode 100644 index 0000000000..da4a3ee4ee --- /dev/null +++ b/cmd/thv-operator/controllers/mcpremoteproxy_telemetryconfig_test.go @@ -0,0 +1,302 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controllers + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" +) + +func TestHandleTelemetryConfig_MCPRemoteProxy(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, mcpv1alpha1.AddToScheme(scheme)) + + tests := []struct { + name string + proxy *mcpv1alpha1.MCPRemoteProxy + telemetryConfig *mcpv1alpha1.MCPTelemetryConfig + expectError bool + expectedHash string + expectedCondType string + expectedCondStatus metav1.ConditionStatus + expectedCondReason string + expectNoCondition bool + expectHashCleared bool + }{ + { + name: "nil ref clears hash and removes condition", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{TelemetryConfigRef: nil}, + Status: mcpv1alpha1.MCPRemoteProxyStatus{ + TelemetryConfigHash: "old-hash", + }, + }, + expectError: false, + expectNoCondition: true, + expectHashCleared: true, + }, + { + name: "valid ref sets condition true and updates hash", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "my-telemetry"}, + }, + }, + telemetryConfig: &mcpv1alpha1.MCPTelemetryConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "my-telemetry", Namespace: "default"}, + Spec: newTelemetrySpec("https://otel-collector:4317", true, false), + Status: mcpv1alpha1.MCPTelemetryConfigStatus{ + ConfigHash: "abc123", + }, + }, + expectError: false, + expectedHash: "abc123", + expectedCondType: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + expectedCondStatus: metav1.ConditionTrue, + expectedCondReason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefValid, + }, + { + name: "not found sets condition false", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "missing"}, + }, + }, + expectError: true, + expectedCondType: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + expectedCondStatus: metav1.ConditionFalse, + expectedCondReason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefNotFound, + }, + { + name: "invalid config sets condition false", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "invalid-telemetry"}, + }, + }, + // Spec with endpoint but no tracing/metrics enabled → Validate() fails + telemetryConfig: &mcpv1alpha1.MCPTelemetryConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "invalid-telemetry", Namespace: "default"}, + Spec: mcpv1alpha1.MCPTelemetryConfigSpec{ + OpenTelemetry: &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: "https://otel-collector:4317", + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: false}, + Metrics: &mcpv1alpha1.OpenTelemetryMetricsConfig{Enabled: false}, + }, + }, + }, + expectError: true, + expectedCondType: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + expectedCondStatus: metav1.ConditionFalse, + expectedCondReason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefInvalid, + }, + { + name: "hash change triggers update", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "my-telemetry"}, + }, + Status: mcpv1alpha1.MCPRemoteProxyStatus{ + TelemetryConfigHash: "old-hash", + }, + }, + telemetryConfig: &mcpv1alpha1.MCPTelemetryConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "my-telemetry", Namespace: "default"}, + Spec: newTelemetrySpec("https://otel-collector:4317", true, false), + Status: mcpv1alpha1.MCPTelemetryConfigStatus{ + ConfigHash: "new-hash", + }, + }, + expectError: false, + expectedHash: "new-hash", + expectedCondType: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + expectedCondStatus: metav1.ConditionTrue, + expectedCondReason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefValid, + }, + { + name: "recovery from False condition persists True", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "my-telemetry"}, + }, + Status: mcpv1alpha1.MCPRemoteProxyStatus{ + TelemetryConfigHash: "abc123", + Conditions: []metav1.Condition{ + { + Type: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + Status: metav1.ConditionFalse, + Reason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefFetchError, + }, + }, + }, + }, + telemetryConfig: &mcpv1alpha1.MCPTelemetryConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "my-telemetry", Namespace: "default"}, + Spec: newTelemetrySpec("https://otel-collector:4317", true, false), + Status: mcpv1alpha1.MCPTelemetryConfigStatus{ + ConfigHash: "abc123", + }, + }, + expectError: false, + expectedHash: "abc123", + expectedCondType: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + expectedCondStatus: metav1.ConditionTrue, + expectedCondReason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefValid, + }, + { + name: "nil ref with stale condition persists removal", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{TelemetryConfigRef: nil}, + Status: mcpv1alpha1.MCPRemoteProxyStatus{ + Conditions: []metav1.Condition{ + { + Type: mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, + Status: metav1.ConditionFalse, + Reason: mcpv1alpha1.ConditionReasonMCPRemoteProxyTelemetryConfigRefNotFound, + }, + }, + }, + }, + expectError: false, + expectNoCondition: true, + expectHashCleared: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + ctx := t.Context() + + builder := fake.NewClientBuilder().WithScheme(scheme) + if tt.telemetryConfig != nil { + builder = builder.WithObjects(tt.telemetryConfig) + } + builder = builder.WithStatusSubresource(&mcpv1alpha1.MCPRemoteProxy{}) + builder = builder.WithObjects(tt.proxy) + fakeClient := builder.Build() + + reconciler := &MCPRemoteProxyReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.handleTelemetryConfig(ctx, tt.proxy) + + if tt.expectError { + require.Error(t, err) + } else { + require.NoError(t, err) + } + + // Re-fetch persisted state from the fake client. + // For success paths, the handler persists via r.Status().Update(). + // For error paths, conditions are set in-memory but the caller + // (validateAndHandleConfigs) is responsible for persisting — so + // we use in-memory state for error-path condition assertions. + persisted := &mcpv1alpha1.MCPRemoteProxy{} + require.NoError(t, fakeClient.Get(ctx, types.NamespacedName{ + Name: tt.proxy.Name, Namespace: tt.proxy.Namespace, + }, persisted)) + + // For success paths, assert on persisted state. + // For error paths, assert conditions on in-memory state (caller persists). + statusToCheck := persisted.Status + if tt.expectError { + statusToCheck = tt.proxy.Status + } + + if tt.expectNoCondition { + for _, c := range persisted.Status.Conditions { + assert.NotEqual(t, mcpv1alpha1.ConditionTypeMCPRemoteProxyTelemetryConfigRefValidated, c.Type, + "condition should have been removed from persisted state") + } + } + + if tt.expectHashCleared { + assert.Empty(t, persisted.Status.TelemetryConfigHash, "hash should be cleared") + } + + if tt.expectedCondType != "" { + var found bool + for _, c := range statusToCheck.Conditions { + if c.Type == tt.expectedCondType { + found = true + assert.Equal(t, tt.expectedCondStatus, c.Status) + assert.Equal(t, tt.expectedCondReason, c.Reason) + break + } + } + assert.True(t, found, "expected condition %s not found", tt.expectedCondType) + } + + if tt.expectedHash != "" { + assert.Equal(t, tt.expectedHash, persisted.Status.TelemetryConfigHash) + } + }) + } +} + +func TestMapTelemetryConfigToMCPRemoteProxy(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, mcpv1alpha1.AddToScheme(scheme)) + + proxy1 := &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "proxy1", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "shared-telemetry"}, + }, + } + proxy2 := &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "proxy2", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "other-telemetry"}, + }, + } + proxy3 := &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "proxy3", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{}, // no ref + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(proxy1, proxy2, proxy3). + Build() + + reconciler := &MCPRemoteProxyReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + ctx := t.Context() + + telemetryConfig := &mcpv1alpha1.MCPTelemetryConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "shared-telemetry", Namespace: "default"}, + } + + requests := reconciler.mapTelemetryConfigToMCPRemoteProxy(ctx, telemetryConfig) + + require.Len(t, requests, 1) + assert.Equal(t, types.NamespacedName{Name: "proxy1", Namespace: "default"}, requests[0].NamespacedName) +} diff --git a/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go b/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go index 0dee596253..4b061e7a98 100644 --- a/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go +++ b/cmd/thv-operator/controllers/mcptelemetryconfig_controller.go @@ -193,9 +193,59 @@ func (r *MCPTelemetryConfigReconciler) SetupWithManager(mgr ctrl.Manager) error return ctrl.NewControllerManagedBy(mgr). For(&mcpv1alpha1.MCPTelemetryConfig{}). Watches(&mcpv1alpha1.MCPServer{}, mcpServerHandler). + Watches( + &mcpv1alpha1.MCPRemoteProxy{}, + handler.EnqueueRequestsFromMapFunc(r.mapMCPRemoteProxyToTelemetryConfig), + ). Complete(r) } +// mapMCPRemoteProxyToTelemetryConfig enqueues MCPTelemetryConfig reconcile requests +// when an MCPRemoteProxy changes. Handles both the currently-referenced config and +// any config that still lists this proxy in ReferencingWorkloads (ref-removal case). +func (r *MCPTelemetryConfigReconciler) mapMCPRemoteProxyToTelemetryConfig( + ctx context.Context, obj client.Object, +) []reconcile.Request { + proxy, ok := obj.(*mcpv1alpha1.MCPRemoteProxy) + if !ok { + return nil + } + + seen := make(map[types.NamespacedName]struct{}) + var requests []reconcile.Request + + if proxy.Spec.TelemetryConfigRef != nil { + nn := types.NamespacedName{ + Name: proxy.Spec.TelemetryConfigRef.Name, + Namespace: proxy.Namespace, + } + seen[nn] = struct{}{} + requests = append(requests, reconcile.Request{NamespacedName: nn}) + } + + // Also enqueue any MCPTelemetryConfig that still lists this proxy in + // ReferencingWorkloads — handles ref-removal and proxy-deletion cases. + telemetryConfigList := &mcpv1alpha1.MCPTelemetryConfigList{} + if err := r.List(ctx, telemetryConfigList, client.InNamespace(proxy.Namespace)); err != nil { + log.FromContext(ctx).Error(err, "Failed to list MCPTelemetryConfigs for MCPRemoteProxy watch") + return requests + } + for _, cfg := range telemetryConfigList.Items { + nn := types.NamespacedName{Name: cfg.Name, Namespace: cfg.Namespace} + if _, already := seen[nn]; already { + continue + } + for _, ref := range cfg.Status.ReferencingWorkloads { + if ref.Kind == mcpv1alpha1.WorkloadKindMCPRemoteProxy && ref.Name == proxy.Name { + requests = append(requests, reconcile.Request{NamespacedName: nn}) + break + } + } + } + + return requests +} + // calculateConfigHash calculates a hash of the MCPTelemetryConfig spec using Kubernetes utilities func (*MCPTelemetryConfigReconciler) calculateConfigHash(spec mcpv1alpha1.MCPTelemetryConfigSpec) string { return ctrlutil.CalculateConfigHash(spec) @@ -256,11 +306,33 @@ func (r *MCPTelemetryConfigReconciler) findReferencingWorkloads( ctx context.Context, telemetryConfig *mcpv1alpha1.MCPTelemetryConfig, ) ([]mcpv1alpha1.WorkloadReference, error) { - return ctrlutil.FindWorkloadRefsFromMCPServers(ctx, r.Client, telemetryConfig.Namespace, telemetryConfig.Name, + serverRefs, err := ctrlutil.FindWorkloadRefsFromMCPServers(ctx, r.Client, telemetryConfig.Namespace, telemetryConfig.Name, func(server *mcpv1alpha1.MCPServer) *string { if server.Spec.TelemetryConfigRef != nil { return &server.Spec.TelemetryConfigRef.Name } return nil }) + if err != nil { + return nil, err + } + + proxies, err := ctrlutil.FindReferencingMCPRemoteProxies(ctx, r.Client, telemetryConfig.Namespace, telemetryConfig.Name, + func(proxy *mcpv1alpha1.MCPRemoteProxy) *string { + if proxy.Spec.TelemetryConfigRef != nil { + return &proxy.Spec.TelemetryConfigRef.Name + } + return nil + }) + if err != nil { + return nil, err + } + + refs := make([]mcpv1alpha1.WorkloadReference, 0, len(serverRefs)+len(proxies)) + refs = append(refs, serverRefs...) + for _, proxy := range proxies { + refs = append(refs, mcpv1alpha1.WorkloadReference{Kind: mcpv1alpha1.WorkloadKindMCPRemoteProxy, Name: proxy.Name}) + } + ctrlutil.SortWorkloadRefs(refs) + return refs, nil } diff --git a/cmd/thv-operator/pkg/controllerutil/config.go b/cmd/thv-operator/pkg/controllerutil/config.go index 772cda9be2..a665de020a 100644 --- a/cmd/thv-operator/pkg/controllerutil/config.go +++ b/cmd/thv-operator/pkg/controllerutil/config.go @@ -10,6 +10,7 @@ import ( "slices" "strings" + "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/dump" "sigs.k8s.io/controller-runtime/pkg/client" @@ -191,6 +192,34 @@ func GetExternalAuthConfigForMCPRemoteProxy( return externalAuthConfig, nil } +// GetTelemetryConfigForMCPRemoteProxy fetches the MCPTelemetryConfig referenced by the proxy. +// Returns (nil, nil) when TelemetryConfigRef is nil or the resource is not found. +// Returns (nil, err) only for transient API errors so callers can distinguish +// "config missing" from "API unavailable". +func GetTelemetryConfigForMCPRemoteProxy( + ctx context.Context, + c client.Client, + proxy *mcpv1alpha1.MCPRemoteProxy, +) (*mcpv1alpha1.MCPTelemetryConfig, error) { + if proxy.Spec.TelemetryConfigRef == nil { + return nil, nil + } + + telemetryConfig := &mcpv1alpha1.MCPTelemetryConfig{} + err := c.Get(ctx, types.NamespacedName{ + Name: proxy.Spec.TelemetryConfigRef.Name, + Namespace: proxy.Namespace, + }, telemetryConfig) + if errors.IsNotFound(err) { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("failed to get MCPTelemetryConfig %s: %w", proxy.Spec.TelemetryConfigRef.Name, err) + } + + return telemetryConfig, nil +} + // GetExternalAuthConfigByName is a generic helper for fetching MCPExternalAuthConfig by name func GetExternalAuthConfigByName( ctx context.Context, diff --git a/cmd/thv-operator/pkg/controllerutil/config_test.go b/cmd/thv-operator/pkg/controllerutil/config_test.go index 515b1664fe..eeeb33d68d 100644 --- a/cmd/thv-operator/pkg/controllerutil/config_test.go +++ b/cmd/thv-operator/pkg/controllerutil/config_test.go @@ -459,3 +459,98 @@ func TestFindWorkloadRefsFromMCPServers(t *testing.T) { assert.Empty(t, refs) }) } + +func TestGetTelemetryConfigForMCPRemoteProxy(t *testing.T) { + t.Parallel() + + scheme := runtime.NewScheme() + require.NoError(t, mcpv1alpha1.AddToScheme(scheme)) + + tests := []struct { + name string + proxy *mcpv1alpha1.MCPRemoteProxy + telemetryConfig *mcpv1alpha1.MCPTelemetryConfig + expectNil bool + expectError bool + expectedName string + }{ + { + name: "nil ref returns nil without error", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{TelemetryConfigRef: nil}, + }, + expectNil: true, + expectError: false, + }, + { + name: "fetches referenced config", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "my-telemetry"}, + }, + }, + telemetryConfig: &mcpv1alpha1.MCPTelemetryConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "my-telemetry", Namespace: "default"}, + }, + expectNil: false, + expectError: false, + expectedName: "my-telemetry", + }, + { + name: "not found returns nil without error", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "default"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "missing"}, + }, + }, + expectNil: true, + expectError: false, + }, + { + name: "cross-namespace returns nil (not found)", + proxy: &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{Name: "test-proxy", Namespace: "namespace-b"}, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{Name: "shared-config"}, + }, + }, + telemetryConfig: &mcpv1alpha1.MCPTelemetryConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "shared-config", Namespace: "namespace-a"}, + }, + expectNil: true, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + ctx := t.Context() + + builder := fake.NewClientBuilder().WithScheme(scheme) + if tt.telemetryConfig != nil { + builder = builder.WithObjects(tt.telemetryConfig) + } + fakeClient := builder.Build() + + result, err := GetTelemetryConfigForMCPRemoteProxy(ctx, fakeClient, tt.proxy) + + if tt.expectError { + assert.Error(t, err) + assert.Nil(t, result) + return + } + + assert.NoError(t, err) + if tt.expectNil { + assert.Nil(t, result) + } else { + require.NotNil(t, result) + assert.Equal(t, tt.expectedName, result.Name) + } + }) + } +} diff --git a/cmd/thv-operator/test-integration/mcp-telemetry-config/mcptelemetryconfig_controller_integration_test.go b/cmd/thv-operator/test-integration/mcp-telemetry-config/mcptelemetryconfig_controller_integration_test.go index 48d4f6920b..60bad92625 100644 --- a/cmd/thv-operator/test-integration/mcp-telemetry-config/mcptelemetryconfig_controller_integration_test.go +++ b/cmd/thv-operator/test-integration/mcp-telemetry-config/mcptelemetryconfig_controller_integration_test.go @@ -15,9 +15,10 @@ import ( ) const ( - testEndpoint = "https://otel-collector:4317" - timeout = time.Second * 30 - interval = time.Millisecond * 250 + testEndpoint = "https://otel-collector:4317" + telemetryFinalizerName = "mcptelemetryconfig.toolhive.stacklok.dev/finalizer" + timeout = time.Second * 30 + interval = time.Millisecond * 250 ) var _ = Describe("MCPTelemetryConfig Controller", func() { @@ -149,7 +150,7 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { return false } for _, f := range fetched.Finalizers { - if f == "mcptelemetryconfig.toolhive.stacklok.dev/finalizer" { + if f == telemetryFinalizerName { return true } } @@ -257,7 +258,7 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { return false } for _, f := range fetched.Finalizers { - if f == "mcptelemetryconfig.toolhive.stacklok.dev/finalizer" { + if f == telemetryFinalizerName { return true } } @@ -323,4 +324,155 @@ var _ = Describe("MCPTelemetryConfig Controller", func() { return err != nil // Should be NotFound }, timeout, interval).Should(BeTrue(), "Config should be deleted after references are removed") }) + + It("should track MCPRemoteProxy in ReferencingWorkloads", func() { + telemetryConfig := &mcpv1alpha1.MCPTelemetryConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-proxy-ref-tracking", + Namespace: "default", + }, + } + telemetryConfig.Spec.OpenTelemetry = &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: testEndpoint, + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true}, + } + + Expect(k8sClient.Create(ctx, telemetryConfig)).To(Succeed()) + + // Wait for config to be ready + Eventually(func() bool { + fetched := &mcpv1alpha1.MCPTelemetryConfig{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: telemetryConfig.Name, + Namespace: telemetryConfig.Namespace, + }, fetched) + return err == nil && fetched.Status.ConfigHash != "" + }, timeout, interval).Should(BeTrue()) + + // Create an MCPRemoteProxy that references this config + proxy := &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "proxy-ref-tracking", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + RemoteURL: "https://example.com/mcp", + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{ + Name: "test-proxy-ref-tracking", + }, + }, + } + Expect(k8sClient.Create(ctx, proxy)).To(Succeed()) + + // The MCPRemoteProxy watch should trigger reconciliation of MCPTelemetryConfig. + // Verify ReferencingWorkloads includes the proxy. + Eventually(func() []string { + fetched := &mcpv1alpha1.MCPTelemetryConfig{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: telemetryConfig.Name, + Namespace: telemetryConfig.Namespace, + }, fetched) + if err != nil { + return nil + } + names := make([]string, 0, len(fetched.Status.ReferencingWorkloads)) + for _, ref := range fetched.Status.ReferencingWorkloads { + names = append(names, ref.Kind+"/"+ref.Name) + } + return names + }, timeout, interval).Should(ContainElement("MCPRemoteProxy/proxy-ref-tracking")) + }) + + It("should block deletion when MCPRemoteProxy references the config", func() { + telemetryConfig := &mcpv1alpha1.MCPTelemetryConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-proxy-deletion-protection", + Namespace: "default", + }, + } + telemetryConfig.Spec.OpenTelemetry = &mcpv1alpha1.MCPTelemetryOTelConfig{ + Enabled: true, + Endpoint: testEndpoint, + Tracing: &mcpv1alpha1.OpenTelemetryTracingConfig{Enabled: true}, + } + + Expect(k8sClient.Create(ctx, telemetryConfig)).To(Succeed()) + + // Wait for finalizer + Eventually(func() bool { + fetched := &mcpv1alpha1.MCPTelemetryConfig{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: telemetryConfig.Name, + Namespace: telemetryConfig.Namespace, + }, fetched) + if err != nil { + return false + } + for _, f := range fetched.Finalizers { + if f == telemetryFinalizerName { + return true + } + } + return false + }, timeout, interval).Should(BeTrue()) + + // Create an MCPRemoteProxy that references this config + proxy := &mcpv1alpha1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "proxy-deletion-blocker", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPRemoteProxySpec{ + RemoteURL: "https://example.com/mcp", + TelemetryConfigRef: &mcpv1alpha1.MCPTelemetryConfigReference{ + Name: "test-proxy-deletion-protection", + }, + }, + } + Expect(k8sClient.Create(ctx, proxy)).To(Succeed()) + + // Wait for ReferencingWorkloads to include the proxy + Eventually(func() []string { + fetched := &mcpv1alpha1.MCPTelemetryConfig{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: telemetryConfig.Name, + Namespace: telemetryConfig.Namespace, + }, fetched) + if err != nil { + return nil + } + names := make([]string, 0, len(fetched.Status.ReferencingWorkloads)) + for _, ref := range fetched.Status.ReferencingWorkloads { + names = append(names, ref.Name) + } + return names + }, timeout, interval).Should(ContainElement("proxy-deletion-blocker")) + + // Attempt to delete — finalizer blocks removal + Expect(k8sClient.Delete(ctx, telemetryConfig)).To(Succeed()) + + // Verify object still exists + Consistently(func() bool { + fetched := &mcpv1alpha1.MCPTelemetryConfig{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: telemetryConfig.Name, + Namespace: telemetryConfig.Namespace, + }, fetched) + return err == nil + }, 3*time.Second, interval).Should(BeTrue(), "Config should not be deleted while proxy references it") + + // Remove the referencing proxy + Expect(k8sClient.Delete(ctx, proxy)).To(Succeed()) + + // Config should now be deleted + Eventually(func() bool { + fetched := &mcpv1alpha1.MCPTelemetryConfig{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: telemetryConfig.Name, + Namespace: telemetryConfig.Namespace, + }, fetched) + return err != nil // Should be NotFound + }, timeout, interval).Should(BeTrue(), "Config should be deleted after proxy reference is removed") + }) }) diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpremoteproxies.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpremoteproxies.yaml index 22f478538f..52d0287b9c 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpremoteproxies.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpremoteproxies.yaml @@ -624,8 +624,11 @@ spec: - None type: string telemetry: - description: Telemetry defines observability configuration for the - proxy + description: |- + Telemetry defines inline observability configuration for the proxy. + Deprecated: Use TelemetryConfigRef to reference a shared MCPTelemetryConfig resource instead. + This field will be removed in a future release. Setting both telemetry and telemetryConfigRef + is rejected by CEL validation. properties: openTelemetry: description: OpenTelemetry defines OpenTelemetry configuration @@ -698,6 +701,27 @@ spec: type: boolean type: object type: object + telemetryConfigRef: + description: |- + TelemetryConfigRef references an MCPTelemetryConfig resource for shared telemetry configuration. + The referenced MCPTelemetryConfig must exist in the same namespace as this MCPRemoteProxy. + Cross-namespace references are not supported for security and isolation reasons. + Mutually exclusive with the deprecated inline Telemetry field. + properties: + name: + description: Name is the name of the MCPTelemetryConfig resource + minLength: 1 + type: string + serviceName: + description: |- + ServiceName overrides the telemetry service name for this specific server. + This MUST be unique per server for proper observability (e.g., distinguishing + traces and metrics from different servers sharing the same collector). + If empty, defaults to the server name with "thv-" prefix at runtime. + type: string + required: + - name + type: object toolConfigRef: description: |- ToolConfigRef references a MCPToolConfig resource for tool filtering and renaming. @@ -734,6 +758,9 @@ spec: - message: oidcConfig and oidcConfigRef are mutually exclusive; use oidcConfigRef to reference a shared MCPOIDCConfig rule: '!(has(self.oidcConfig) && has(self.oidcConfigRef))' + - message: telemetry and telemetryConfigRef are mutually exclusive; migrate + to telemetryConfigRef + rule: '!(has(self.telemetry) && has(self.telemetryConfigRef))' status: description: MCPRemoteProxyStatus defines the observed state of MCPRemoteProxy properties: @@ -832,6 +859,10 @@ spec: - Failed - Terminating type: string + telemetryConfigHash: + description: TelemetryConfigHash stores the hash of the referenced + MCPTelemetryConfig for change detection + type: string toolConfigHash: description: ToolConfigHash stores the hash of the referenced ToolConfig for change detection diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpremoteproxies.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpremoteproxies.yaml index e477567068..981ae2d1b8 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpremoteproxies.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpremoteproxies.yaml @@ -627,8 +627,11 @@ spec: - None type: string telemetry: - description: Telemetry defines observability configuration for the - proxy + description: |- + Telemetry defines inline observability configuration for the proxy. + Deprecated: Use TelemetryConfigRef to reference a shared MCPTelemetryConfig resource instead. + This field will be removed in a future release. Setting both telemetry and telemetryConfigRef + is rejected by CEL validation. properties: openTelemetry: description: OpenTelemetry defines OpenTelemetry configuration @@ -701,6 +704,27 @@ spec: type: boolean type: object type: object + telemetryConfigRef: + description: |- + TelemetryConfigRef references an MCPTelemetryConfig resource for shared telemetry configuration. + The referenced MCPTelemetryConfig must exist in the same namespace as this MCPRemoteProxy. + Cross-namespace references are not supported for security and isolation reasons. + Mutually exclusive with the deprecated inline Telemetry field. + properties: + name: + description: Name is the name of the MCPTelemetryConfig resource + minLength: 1 + type: string + serviceName: + description: |- + ServiceName overrides the telemetry service name for this specific server. + This MUST be unique per server for proper observability (e.g., distinguishing + traces and metrics from different servers sharing the same collector). + If empty, defaults to the server name with "thv-" prefix at runtime. + type: string + required: + - name + type: object toolConfigRef: description: |- ToolConfigRef references a MCPToolConfig resource for tool filtering and renaming. @@ -737,6 +761,9 @@ spec: - message: oidcConfig and oidcConfigRef are mutually exclusive; use oidcConfigRef to reference a shared MCPOIDCConfig rule: '!(has(self.oidcConfig) && has(self.oidcConfigRef))' + - message: telemetry and telemetryConfigRef are mutually exclusive; migrate + to telemetryConfigRef + rule: '!(has(self.telemetry) && has(self.telemetryConfigRef))' status: description: MCPRemoteProxyStatus defines the observed state of MCPRemoteProxy properties: @@ -835,6 +862,10 @@ spec: - Failed - Terminating type: string + telemetryConfigHash: + description: TelemetryConfigHash stores the hash of the referenced + MCPTelemetryConfig for change detection + type: string toolConfigHash: description: ToolConfigHash stores the hash of the referenced ToolConfig for change detection diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index eb528d6e4d..e60245ee45 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -2267,7 +2267,8 @@ _Appears in:_ | `authzConfig` _[api.v1alpha1.AuthzConfigRef](#apiv1alpha1authzconfigref)_ | AuthzConfig defines authorization policy configuration for the proxy | | Optional: \{\}
| | `audit` _[api.v1alpha1.AuditConfig](#apiv1alpha1auditconfig)_ | Audit defines audit logging configuration for the proxy | | Optional: \{\}
| | `toolConfigRef` _[api.v1alpha1.ToolConfigRef](#apiv1alpha1toolconfigref)_ | ToolConfigRef references a MCPToolConfig resource for tool filtering and renaming.
The referenced MCPToolConfig must exist in the same namespace as this MCPRemoteProxy.
Cross-namespace references are not supported for security and isolation reasons.
If specified, this allows filtering and overriding tools from the remote MCP server. | | Optional: \{\}
| -| `telemetry` _[api.v1alpha1.TelemetryConfig](#apiv1alpha1telemetryconfig)_ | Telemetry defines observability configuration for the proxy | | Optional: \{\}
| +| `telemetryConfigRef` _[api.v1alpha1.MCPTelemetryConfigReference](#apiv1alpha1mcptelemetryconfigreference)_ | TelemetryConfigRef references an MCPTelemetryConfig resource for shared telemetry configuration.
The referenced MCPTelemetryConfig must exist in the same namespace as this MCPRemoteProxy.
Cross-namespace references are not supported for security and isolation reasons.
Mutually exclusive with the deprecated inline Telemetry field. | | Optional: \{\}
| +| `telemetry` _[api.v1alpha1.TelemetryConfig](#apiv1alpha1telemetryconfig)_ | Telemetry defines inline observability configuration for the proxy.
Deprecated: Use TelemetryConfigRef to reference a shared MCPTelemetryConfig resource instead.
This field will be removed in a future release. Setting both telemetry and telemetryConfigRef
is rejected by CEL validation. | | Optional: \{\}
| | `resources` _[api.v1alpha1.ResourceRequirements](#apiv1alpha1resourcerequirements)_ | Resources defines the resource requirements for the proxy container | | Optional: \{\}
| | `serviceAccount` _string_ | ServiceAccount is the name of an already existing service account to use by the proxy.
If not specified, a ServiceAccount will be created automatically and used by the proxy. | | Optional: \{\}
| | `trustProxyHeaders` _boolean_ | TrustProxyHeaders indicates whether to trust X-Forwarded-* headers from reverse proxies
When enabled, the proxy will use X-Forwarded-Proto, X-Forwarded-Host, X-Forwarded-Port,
and X-Forwarded-Prefix headers to construct endpoint URLs | false | Optional: \{\}
| @@ -2296,6 +2297,7 @@ _Appears in:_ | `observedGeneration` _integer_ | ObservedGeneration reflects the generation of the most recently observed MCPRemoteProxy | | Optional: \{\}
| | `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#condition-v1-meta) array_ | Conditions represent the latest available observations of the MCPRemoteProxy's state | | Optional: \{\}
| | `toolConfigHash` _string_ | ToolConfigHash stores the hash of the referenced ToolConfig for change detection | | Optional: \{\}
| +| `telemetryConfigHash` _string_ | TelemetryConfigHash stores the hash of the referenced MCPTelemetryConfig for change detection | | Optional: \{\}
| | `externalAuthConfigHash` _string_ | ExternalAuthConfigHash is the hash of the referenced MCPExternalAuthConfig spec | | Optional: \{\}
| | `authServerConfigHash` _string_ | AuthServerConfigHash is the hash of the referenced authServerRef spec,
used to detect configuration changes and trigger reconciliation. | | Optional: \{\}
| | `oidcConfigHash` _string_ | OIDCConfigHash is the hash of the referenced MCPOIDCConfig spec for change detection | | Optional: \{\}
| @@ -2597,6 +2599,7 @@ same namespace as the MCPServer. _Appears in:_ +- [api.v1alpha1.MCPRemoteProxySpec](#apiv1alpha1mcpremoteproxyspec) - [api.v1alpha1.MCPServerSpec](#apiv1alpha1mcpserverspec) | Field | Description | Default | Validation |