From b36ed41f5bcc6968a40a878d6b207edd9cbf9a93 Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Wed, 29 Apr 2026 15:35:49 +0300 Subject: [PATCH 1/2] Add operator-level defaultImagePullSecrets plumbing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cluster operators frequently need a registry pull secret applied to every workload the operator spawns (proxy runners, registry API, vMCP servers, embedding servers). Today the chart only exposes imagePullSecrets for the operator's own pod, forcing users to set the secret on every CR or to mutate the namespace-default ServiceAccount. This change introduces a chart value, operator.defaultImagePullSecrets, that the operator picks up at startup via THV_DEFAULT_IMAGE_PULL_SECRETS and applies as a default to every workload it spawns. All five workload-spawning reconcilers consume the shared imagepullsecrets.Defaults value and merge it with the per-CR list at workload-construction time: MCPServer, MCPRemoteProxy, MCPRegistry (via registryapi.manager), VirtualMCPServer, and EmbeddingServer. Precedence rule: per-CR imagePullSecrets take priority on name collisions; chart-level entries are appended additively and deduped by Name. The CR-level slice is never mutated. EmbeddingServer places the chart defaults on the base PodSpec and lets strategic-merge-patch additively union the user's PodTemplateSpec entries (PodSpec.ImagePullSecrets is declared with patchStrategy:"merge",patchMergeKey:"name"). Drift detection on every controller routes through the same merge helper as the construction site so chart defaults do not flag perpetual reconcile loops. The Helm template renders operator.env before chart-managed env vars so a user-supplied entry cannot silently override a reserved name like THV_DEFAULT_IMAGE_PULL_SECRETS — Kubernetes keeps the last entry on a duplicate-named env. The startup parser logs a diagnostic when the env var is set but parses to nothing (typos like " , " or ",,,") so the misconfiguration is visible. Part of #5102 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../controllers/embeddingserver_controller.go | 18 +- ...ingserver_default_imagepullsecrets_test.go | 72 +++++ .../controllers/mcpregistry_controller.go | 14 +- .../controllers/mcpremoteproxy_controller.go | 35 ++- ...moteproxy_default_imagepullsecrets_test.go | 110 ++++++++ .../controllers/mcpremoteproxy_deployment.go | 2 +- .../controllers/mcpserver_controller.go | 53 ++-- ...mcpserver_default_imagepullsecrets_test.go | 168 +++++++++++ .../virtualmcpserver_controller.go | 44 ++- ...mcpserver_default_imagepullsecrets_test.go | 129 +++++++++ .../virtualmcpserver_deployment.go | 33 ++- cmd/thv-operator/main.go | 94 +++++-- .../pkg/imagepullsecrets/defaults.go | 120 ++++++++ .../pkg/imagepullsecrets/defaults_test.go | 264 ++++++++++++++++++ .../pkg/registryapi/deployment.go | 4 +- cmd/thv-operator/pkg/registryapi/manager.go | 18 +- .../pkg/registryapi/manager_test.go | 7 +- cmd/thv-operator/pkg/registryapi/rbac.go | 2 +- .../mcp-registry/suite_test.go | 5 +- deploy/charts/operator/README.md | 3 +- .../charts/operator/templates/deployment.yaml | 15 +- deploy/charts/operator/values.yaml | 9 + 22 files changed, 1116 insertions(+), 103 deletions(-) create mode 100644 cmd/thv-operator/controllers/embeddingserver_default_imagepullsecrets_test.go create mode 100644 cmd/thv-operator/controllers/mcpremoteproxy_default_imagepullsecrets_test.go create mode 100644 cmd/thv-operator/controllers/mcpserver_default_imagepullsecrets_test.go create mode 100644 cmd/thv-operator/controllers/virtualmcpserver_default_imagepullsecrets_test.go create mode 100644 cmd/thv-operator/pkg/imagepullsecrets/defaults.go create mode 100644 cmd/thv-operator/pkg/imagepullsecrets/defaults_test.go diff --git a/cmd/thv-operator/controllers/embeddingserver_controller.go b/cmd/thv-operator/controllers/embeddingserver_controller.go index 70e55668b2..8149ad9d85 100644 --- a/cmd/thv-operator/controllers/embeddingserver_controller.go +++ b/cmd/thv-operator/controllers/embeddingserver_controller.go @@ -29,6 +29,7 @@ import ( mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" ) // EmbeddingServerReconciler reconciles a EmbeddingServer object @@ -37,6 +38,13 @@ type EmbeddingServerReconciler struct { Scheme *runtime.Scheme Recorder events.EventRecorder PlatformDetector *ctrlutil.SharedPlatformDetector + // ImagePullSecretsDefaults are cluster-wide defaults sourced from the + // operator chart, applied to the StatefulSet's PodSpec before the + // user-provided PodTemplateSpec strategic-merge patch runs. The strategic + // merge with the user PTS continues to additively merge the user's + // imagePullSecrets entries on top, with the user's entries winning on + // name collisions per Kubernetes' strategic-merge semantics. + ImagePullSecretsDefaults imagepullsecrets.Defaults } const ( @@ -644,7 +652,12 @@ func (*EmbeddingServerReconciler) applyResourceRequirements(embedding *mcpv1beta // buildPodTemplate builds the pod template for the statefulset. // User-provided PodTemplateSpec customizations are applied later in // statefulSetForEmbedding via strategic merge patch. -func (*EmbeddingServerReconciler) buildPodTemplate( +// +// Cluster-wide chart defaults for imagePullSecrets are placed on the base +// PodSpec here so that a subsequent strategic-merge with the user PTS +// additively unions the lists (Kubernetes treats PodSpec.ImagePullSecrets +// as a merge list keyed on Name; user entries win on name collisions). +func (r *EmbeddingServerReconciler) buildPodTemplate( labels map[string]string, container corev1.Container, ) corev1.PodTemplateSpec { @@ -655,7 +668,8 @@ func (*EmbeddingServerReconciler) buildPodTemplate( Labels: labels, }, Spec: corev1.PodSpec{ - Containers: []corev1.Container{container}, + ImagePullSecrets: r.ImagePullSecretsDefaults.List(), + Containers: []corev1.Container{container}, }, } } diff --git a/cmd/thv-operator/controllers/embeddingserver_default_imagepullsecrets_test.go b/cmd/thv-operator/controllers/embeddingserver_default_imagepullsecrets_test.go new file mode 100644 index 0000000000..1b9654227c --- /dev/null +++ b/cmd/thv-operator/controllers/embeddingserver_default_imagepullsecrets_test.go @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controllers + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + + ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" +) + +// TestEmbeddingServer_DefaultImagePullSecrets verifies that cluster-wide +// chart defaults reach the StatefulSet's PodSpec.ImagePullSecrets. +// +// EmbeddingServer has no per-CR imagePullSecrets field; users add their own +// entries via spec.podTemplateSpec.spec.imagePullSecrets, which is +// strategic-merged on top of this base list. The strategic-merge behavior +// (additive union keyed by Name) is exercised by integration tests against a +// real K8s API; here we only assert the chart defaults reach the base PodSpec. +func TestEmbeddingServer_DefaultImagePullSecrets(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + defaults []string + wantSecrets []corev1.LocalObjectReference + }{ + { + name: "chart defaults reach base PodSpec", + defaults: []string{"chart-default", "second-default"}, + wantSecrets: []corev1.LocalObjectReference{ + {Name: "chart-default"}, + {Name: "second-default"}, + }, + }, + { + name: "no defaults yields nil ImagePullSecrets", + defaults: nil, + wantSecrets: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + embedding := createTestEmbeddingServer( + "default-pullsecrets-embed", + testNamespaceDefault, + "image:latest", + "model", + ) + + scheme := createEmbeddingServerTestScheme() + reconciler := &EmbeddingServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImagePullSecretsDefaults: imagepullsecrets.NewDefaults(tt.defaults), + } + + sts := reconciler.statefulSetForEmbedding(t.Context(), embedding) + require.NotNil(t, sts) + assert.Equal(t, tt.wantSecrets, sts.Spec.Template.Spec.ImagePullSecrets, + "StatefulSet PodSpec ImagePullSecrets must reflect chart defaults") + }) + } +} diff --git a/cmd/thv-operator/controllers/mcpregistry_controller.go b/cmd/thv-operator/controllers/mcpregistry_controller.go index 6e75ddf520..0a142f47ca 100644 --- a/cmd/thv-operator/controllers/mcpregistry_controller.go +++ b/cmd/thv-operator/controllers/mcpregistry_controller.go @@ -22,6 +22,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/registryapi" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/registryapi/config" ) @@ -47,9 +48,16 @@ type MCPRegistryReconciler struct { registryAPIManager registryapi.Manager } -// NewMCPRegistryReconciler creates a new MCPRegistryReconciler with required dependencies -func NewMCPRegistryReconciler(k8sClient client.Client, scheme *runtime.Scheme) *MCPRegistryReconciler { - registryAPIManager := registryapi.NewManager(k8sClient, scheme) +// NewMCPRegistryReconciler creates a new MCPRegistryReconciler with required +// dependencies. imagePullSecretsDefaults are cluster-wide pull-secret defaults +// from the operator chart that are merged with the per-CR list at registry-api +// workload-construction time. +func NewMCPRegistryReconciler( + k8sClient client.Client, + scheme *runtime.Scheme, + imagePullSecretsDefaults imagepullsecrets.Defaults, +) *MCPRegistryReconciler { + registryAPIManager := registryapi.NewManager(k8sClient, scheme, imagePullSecretsDefaults) return &MCPRegistryReconciler{ Client: k8sClient, Scheme: scheme, diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_controller.go b/cmd/thv-operator/controllers/mcpremoteproxy_controller.go index eb72794ef8..255a26044c 100644 --- a/cmd/thv-operator/controllers/mcpremoteproxy_controller.go +++ b/cmd/thv-operator/controllers/mcpremoteproxy_controller.go @@ -30,6 +30,7 @@ import ( mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/kubernetes/rbac" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/runconfig/configmap/checksum" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/validation" @@ -41,6 +42,10 @@ type MCPRemoteProxyReconciler struct { Scheme *runtime.Scheme Recorder events.EventRecorder PlatformDetector *ctrlutil.SharedPlatformDetector + // ImagePullSecretsDefaults are cluster-wide defaults sourced from the + // operator chart that are merged with the per-CR imagePullSecrets when + // constructing workloads. The zero value is a usable empty Defaults. + ImagePullSecretsDefaults imagepullsecrets.Defaults } // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=mcpremoteproxies,verbs=get;list;watch;create;update;patch;delete @@ -1092,18 +1097,25 @@ func (r *MCPRemoteProxyReconciler) ensureRBACResources(ctx context.Context, prox Namespace: proxy.Namespace, Rules: remoteProxyRBACRules, Owner: proxy, - ImagePullSecrets: imagePullSecretsForRemoteProxy(proxy), + ImagePullSecrets: r.imagePullSecretsForRemoteProxy(proxy), }) return err } -// imagePullSecretsForRemoteProxy returns the image pull secrets configured via -// spec.resourceOverrides.proxyDeployment.imagePullSecrets, or nil if unset. -func imagePullSecretsForRemoteProxy(proxy *mcpv1beta1.MCPRemoteProxy) []corev1.LocalObjectReference { - if proxy.Spec.ResourceOverrides == nil || proxy.Spec.ResourceOverrides.ProxyDeployment == nil { - return nil +// imagePullSecretsForRemoteProxy returns the image pull secrets the operator +// will set on the workload's PodSpec and ServiceAccount. The list is the merge +// of cluster-wide chart defaults (from r.ImagePullSecretsDefaults) with the +// per-CR list from spec.resourceOverrides.proxyDeployment.imagePullSecrets. +// CR-level entries win on name collisions; chart-level entries are appended +// additively. Returns nil when both inputs are empty. +func (r *MCPRemoteProxyReconciler) imagePullSecretsForRemoteProxy( + proxy *mcpv1beta1.MCPRemoteProxy, +) []corev1.LocalObjectReference { + var crLevel []corev1.LocalObjectReference + if proxy.Spec.ResourceOverrides != nil && proxy.Spec.ResourceOverrides.ProxyDeployment != nil { + crLevel = proxy.Spec.ResourceOverrides.ProxyDeployment.ImagePullSecrets } - return proxy.Spec.ResourceOverrides.ProxyDeployment.ImagePullSecrets + return r.ImagePullSecretsDefaults.Merge(crLevel) } // updateMCPRemoteProxyStatus updates the status of the MCPRemoteProxy @@ -1390,14 +1402,15 @@ func (r *MCPRemoteProxyReconciler) podTemplateMetadataNeedsUpdate( // podSpecNeedsUpdate checks if pod-level fields (not container fields) have drifted. // -// Currently compares ImagePullSecrets sourced from spec.resourceOverrides.proxyDeployment. -// Uses equality.Semantic.DeepEqual so nil and empty slices are treated as equal, +// Currently compares ImagePullSecrets — the merge of cluster-wide chart +// defaults with spec.resourceOverrides.proxyDeployment.imagePullSecrets. Uses +// equality.Semantic.DeepEqual so nil and empty slices are treated as equal, // which matches Kubernetes' own serialization semantics. -func (*MCPRemoteProxyReconciler) podSpecNeedsUpdate( +func (r *MCPRemoteProxyReconciler) podSpecNeedsUpdate( deployment *appsv1.Deployment, proxy *mcpv1beta1.MCPRemoteProxy, ) bool { - expected := imagePullSecretsForRemoteProxy(proxy) + expected := r.imagePullSecretsForRemoteProxy(proxy) current := deployment.Spec.Template.Spec.ImagePullSecrets return !equality.Semantic.DeepEqual(current, expected) } diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_default_imagepullsecrets_test.go b/cmd/thv-operator/controllers/mcpremoteproxy_default_imagepullsecrets_test.go new file mode 100644 index 0000000000..ed48ef7e89 --- /dev/null +++ b/cmd/thv-operator/controllers/mcpremoteproxy_default_imagepullsecrets_test.go @@ -0,0 +1,110 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controllers + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" + ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" +) + +// TestMCPRemoteProxy_DefaultImagePullSecrets verifies that the merge of +// cluster-wide chart defaults with spec.resourceOverrides.proxyDeployment.imagePullSecrets +// reaches both the proxy-runner ServiceAccount and the Deployment PodSpec. +// +// The Merge precedence rule is exhaustively covered in +// imagepullsecrets/defaults_test.go::TestDefaultsMerge; the cases here exist +// only to prove the wiring is correct end-to-end. +func TestMCPRemoteProxy_DefaultImagePullSecrets(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + defaults []string + crSecrets []corev1.LocalObjectReference + wantSecrets []corev1.LocalObjectReference + }{ + { + name: "merged defaults+CR with name collision reach SA and Deployment", + defaults: []string{"shared", "chart-only"}, + crSecrets: []corev1.LocalObjectReference{ + {Name: "shared"}, + }, + wantSecrets: []corev1.LocalObjectReference{ + {Name: "shared"}, + {Name: "chart-only"}, + }, + }, + { + name: "no defaults and no CR yields empty fields", + defaults: nil, + crSecrets: nil, + wantSecrets: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + proxy := &mcpv1beta1.MCPRemoteProxy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default-pullsecrets-proxy", + Namespace: "default", + }, + Spec: mcpv1beta1.MCPRemoteProxySpec{ + RemoteURL: "https://mcp.example.com", + ProxyPort: 8080, + }, + } + if tt.crSecrets != nil { + proxy.Spec.ResourceOverrides = &mcpv1beta1.ResourceOverrides{ + ProxyDeployment: &mcpv1beta1.ProxyDeploymentOverrides{ + ImagePullSecrets: tt.crSecrets, + }, + } + } + + scheme := createRunConfigTestScheme() + _ = rbacv1.AddToScheme(scheme) + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(proxy). + Build() + + reconciler := &MCPRemoteProxyReconciler{ + Client: fakeClient, + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImagePullSecretsDefaults: imagepullsecrets.NewDefaults(tt.defaults), + } + + require.NoError(t, reconciler.ensureRBACResources(t.Context(), proxy)) + + sa := &corev1.ServiceAccount{} + require.NoError(t, fakeClient.Get(t.Context(), types.NamespacedName{ + Name: proxyRunnerServiceAccountNameForRemoteProxy(proxy.Name), + Namespace: proxy.Namespace, + }, sa)) + assert.Equal(t, tt.wantSecrets, sa.ImagePullSecrets, + "proxy runner SA ImagePullSecrets must reflect merged defaults+CR") + + dep := reconciler.deploymentForMCPRemoteProxy(t.Context(), proxy, "test-checksum") + require.NotNil(t, dep) + assert.Equal(t, tt.wantSecrets, dep.Spec.Template.Spec.ImagePullSecrets, + "proxy runner Deployment ImagePullSecrets must reflect merged defaults+CR") + }) + } +} diff --git a/cmd/thv-operator/controllers/mcpremoteproxy_deployment.go b/cmd/thv-operator/controllers/mcpremoteproxy_deployment.go index 2b70ca3645..bff48386a3 100644 --- a/cmd/thv-operator/controllers/mcpremoteproxy_deployment.go +++ b/cmd/thv-operator/controllers/mcpremoteproxy_deployment.go @@ -72,7 +72,7 @@ func (r *MCPRemoteProxyReconciler) deploymentForMCPRemoteProxy( }, Spec: corev1.PodSpec{ ServiceAccountName: serviceAccountNameForRemoteProxy(proxy), - ImagePullSecrets: imagePullSecretsForRemoteProxy(proxy), + ImagePullSecrets: r.imagePullSecretsForRemoteProxy(proxy), Containers: []corev1.Container{{ Image: getToolhiveRunnerImage(), Name: "toolhive", diff --git a/cmd/thv-operator/controllers/mcpserver_controller.go b/cmd/thv-operator/controllers/mcpserver_controller.go index 72f9d636c0..55e9240095 100644 --- a/cmd/thv-operator/controllers/mcpserver_controller.go +++ b/cmd/thv-operator/controllers/mcpserver_controller.go @@ -35,6 +35,7 @@ import ( mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/kubernetes/rbac" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/runconfig/configmap/checksum" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/validation" @@ -48,6 +49,10 @@ type MCPServerReconciler struct { Scheme *runtime.Scheme Recorder events.EventRecorder PlatformDetector *ctrlutil.SharedPlatformDetector + // ImagePullSecretsDefaults are cluster-wide defaults sourced from the + // operator chart that are merged with the per-CR imagePullSecrets when + // constructing workloads. The zero value is a usable empty Defaults. + ImagePullSecretsDefaults imagepullsecrets.Defaults } // defaultRBACRules are the default RBAC rules that the @@ -893,12 +898,7 @@ func (r *MCPServerReconciler) ensureRBACResources(ctx context.Context, mcpServer rbacClient := rbac.NewClient(r.Client, r.Scheme) proxyRunnerNameForRBAC := ctrlutil.ProxyRunnerServiceAccountName(mcpServer.Name) - // Extract ImagePullSecrets from ResourceOverrides if present - var imagePullSecrets []corev1.LocalObjectReference - if mcpServer.Spec.ResourceOverrides != nil && - mcpServer.Spec.ResourceOverrides.ProxyDeployment != nil { - imagePullSecrets = mcpServer.Spec.ResourceOverrides.ProxyDeployment.ImagePullSecrets - } + imagePullSecrets := r.imagePullSecretsForMCPServer(mcpServer) // Ensure RBAC resources for proxy runner if _, err := rbacClient.EnsureRBACResources(ctx, rbac.EnsureRBACResourcesParams{ @@ -929,6 +929,28 @@ func (r *MCPServerReconciler) ensureRBACResources(ctx context.Context, mcpServer return err } +// imagePullSecretsForMCPServer returns the image pull secrets the operator +// will set on the proxy runner Deployment, the proxy runner ServiceAccount, +// and the auto-created MCP server ServiceAccount. The list is the merge of +// cluster-wide chart defaults (from r.ImagePullSecretsDefaults) with the +// per-CR list from spec.resourceOverrides.proxyDeployment.imagePullSecrets. +// CR-level entries win on name collisions; chart-level entries are appended +// additively. Returns nil when both inputs are empty. +// +// All sites that read or compare ImagePullSecrets — including +// deploymentNeedsUpdate's drift check — must call this helper so the desired +// list is computed identically and reconciliation reaches a fixed point. +func (r *MCPServerReconciler) imagePullSecretsForMCPServer( + mcpServer *mcpv1beta1.MCPServer, +) []corev1.LocalObjectReference { + var crLevel []corev1.LocalObjectReference + if mcpServer.Spec.ResourceOverrides != nil && + mcpServer.Spec.ResourceOverrides.ProxyDeployment != nil { + crLevel = mcpServer.Spec.ResourceOverrides.ProxyDeployment.ImagePullSecrets + } + return r.ImagePullSecretsDefaults.Merge(crLevel) +} + // deploymentForMCPServer returns a MCPServer Deployment object // //nolint:gocyclo @@ -1219,11 +1241,7 @@ func (r *MCPServerReconciler) deploymentForMCPServer( env = ctrlutil.EnsureRequiredEnvVars(ctx, env) - // Extract ImagePullSecrets from ResourceOverrides if present - var imagePullSecrets []corev1.LocalObjectReference - if m.Spec.ResourceOverrides != nil && m.Spec.ResourceOverrides.ProxyDeployment != nil { - imagePullSecrets = m.Spec.ResourceOverrides.ProxyDeployment.ImagePullSecrets - } + imagePullSecrets := r.imagePullSecretsForMCPServer(m) dep := &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ @@ -1724,13 +1742,12 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate( } // Check if image pull secrets have changed. - // Sourced from spec.resourceOverrides.proxyDeployment.imagePullSecrets. - // Uses equality.Semantic.DeepEqual so nil and empty slices are treated as equal. - var expectedPullSecrets []corev1.LocalObjectReference - if mcpServer.Spec.ResourceOverrides != nil && - mcpServer.Spec.ResourceOverrides.ProxyDeployment != nil { - expectedPullSecrets = mcpServer.Spec.ResourceOverrides.ProxyDeployment.ImagePullSecrets - } + // Must mirror the construction site (deploymentForMCPServer) which sets + // the merge of chart-level defaults with the per-CR list. Comparing + // against the CR-only field would flag perpetual drift whenever any + // chart default is configured. Uses equality.Semantic.DeepEqual so + // nil and empty slices are treated as equal. + expectedPullSecrets := r.imagePullSecretsForMCPServer(mcpServer) if !equality.Semantic.DeepEqual(deployment.Spec.Template.Spec.ImagePullSecrets, expectedPullSecrets) { return true } diff --git a/cmd/thv-operator/controllers/mcpserver_default_imagepullsecrets_test.go b/cmd/thv-operator/controllers/mcpserver_default_imagepullsecrets_test.go new file mode 100644 index 0000000000..7188274b57 --- /dev/null +++ b/cmd/thv-operator/controllers/mcpserver_default_imagepullsecrets_test.go @@ -0,0 +1,168 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controllers + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + + mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" +) + +// TestEnsureRBACResources_DefaultImagePullSecrets verifies that cluster-wide +// chart defaults are merged with per-CR ImagePullSecrets when reconciling +// the proxy-runner ServiceAccount and the MCP server ServiceAccount. +// +// The Merge precedence rule itself is exhaustively covered in +// imagepullsecrets/defaults_test.go::TestDefaultsMerge. The cases here exist +// only to prove that the merged slice actually reaches the constructed +// ServiceAccount fields, so we keep this table to the minimum that exercises +// both ends of the wiring (overlap + empty). +func TestEnsureRBACResources_DefaultImagePullSecrets(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + defaults []string + crSecrets []corev1.LocalObjectReference + wantSecrets []corev1.LocalObjectReference + }{ + { + // Overlap proves merge precedence reaches the SA: shared is + // deduplicated, chart-only is appended after the CR entry. + name: "merged defaults+CR with name collision reach ServiceAccount", + defaults: []string{"shared", "chart-only"}, + crSecrets: []corev1.LocalObjectReference{ + {Name: "shared"}, + }, + wantSecrets: []corev1.LocalObjectReference{ + {Name: "shared"}, + {Name: "chart-only"}, + }, + }, + { + name: "no defaults and no CR yields empty ServiceAccount field", + defaults: nil, + crSecrets: nil, + wantSecrets: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + tc := setupTest("test-server-default-pull-secrets", "default") + tc.reconciler.ImagePullSecretsDefaults = imagepullsecrets.NewDefaults(tt.defaults) + + if tt.crSecrets != nil { + tc.mcpServer.Spec.ResourceOverrides = &mcpv1beta1.ResourceOverrides{ + ProxyDeployment: &mcpv1beta1.ProxyDeploymentOverrides{ + ImagePullSecrets: tt.crSecrets, + }, + } + } + + require.NoError(t, tc.ensureRBACResources()) + + // Proxy-runner ServiceAccount. + sa := &corev1.ServiceAccount{} + require.NoError(t, tc.client.Get(t.Context(), types.NamespacedName{ + Name: tc.proxyRunnerNameForRBAC, + Namespace: tc.mcpServer.Namespace, + }, sa)) + assert.Equal(t, tt.wantSecrets, sa.ImagePullSecrets, + "proxy runner SA ImagePullSecrets must reflect merged defaults+CR") + + // MCP server ServiceAccount (auto-created when CR doesn't supply one). + mcpSA := &corev1.ServiceAccount{} + require.NoError(t, tc.client.Get(t.Context(), types.NamespacedName{ + Name: mcpServerServiceAccountName(tc.mcpServer.Name), + Namespace: tc.mcpServer.Namespace, + }, mcpSA)) + assert.Equal(t, tt.wantSecrets, mcpSA.ImagePullSecrets, + "MCP server SA ImagePullSecrets must reflect merged defaults+CR") + }) + } +} + +// TestDeploymentNeedsUpdate_DefaultImagePullSecrets is a regression test for a +// bug where deploymentNeedsUpdate compared the live Deployment's +// ImagePullSecrets against only the per-CR slice while the construction site +// applied the chart-default-merged slice. With chart defaults configured the +// comparison was always unequal, so every reconcile returned needsUpdate=true +// and the controller looped forever. The fix routes both sites through +// imagePullSecretsForMCPServer. +func TestDeploymentNeedsUpdate_DefaultImagePullSecrets(t *testing.T) { + t.Parallel() + + tc := setupTest("test-server-drift-pull-secrets", "default") + tc.reconciler.ImagePullSecretsDefaults = imagepullsecrets.NewDefaults([]string{"chart-default"}) + + dep := tc.reconciler.deploymentForMCPServer(t.Context(), tc.mcpServer, "test-checksum") + require.NotNil(t, dep) + + assert.False(t, tc.reconciler.deploymentNeedsUpdate(t.Context(), dep, tc.mcpServer, "test-checksum"), + "freshly built Deployment must not be flagged for update by drift detection") +} + +// TestDeploymentForMCPServer_DefaultImagePullSecrets verifies that cluster-wide +// chart defaults are merged with per-CR ImagePullSecrets when constructing the +// proxy-runner Deployment PodSpec. See the comment on +// TestEnsureRBACResources_DefaultImagePullSecrets for why this table is small. +func TestDeploymentForMCPServer_DefaultImagePullSecrets(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + defaults []string + crSecrets []corev1.LocalObjectReference + wantSecrets []corev1.LocalObjectReference + }{ + { + name: "merged defaults+CR reach Deployment PodSpec", + defaults: []string{"chart-default"}, + crSecrets: []corev1.LocalObjectReference{ + {Name: "cr-secret"}, + }, + wantSecrets: []corev1.LocalObjectReference{ + {Name: "cr-secret"}, + {Name: "chart-default"}, + }, + }, + { + name: "no defaults and no CR yields nil PodSpec field", + defaults: nil, + crSecrets: nil, + wantSecrets: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + tc := setupTest("test-server-default-pull-secrets-dep", "default") + tc.reconciler.ImagePullSecretsDefaults = imagepullsecrets.NewDefaults(tt.defaults) + + if tt.crSecrets != nil { + tc.mcpServer.Spec.ResourceOverrides = &mcpv1beta1.ResourceOverrides{ + ProxyDeployment: &mcpv1beta1.ProxyDeploymentOverrides{ + ImagePullSecrets: tt.crSecrets, + }, + } + } + + dep := tc.reconciler.deploymentForMCPServer(t.Context(), tc.mcpServer, "test-checksum") + require.NotNil(t, dep) + assert.Equal(t, tt.wantSecrets, dep.Spec.Template.Spec.ImagePullSecrets, + "proxy runner Deployment ImagePullSecrets must reflect merged defaults+CR") + }) + } +} diff --git a/cmd/thv-operator/controllers/virtualmcpserver_controller.go b/cmd/thv-operator/controllers/virtualmcpserver_controller.go index 9a479f41c5..a6b81c034e 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_controller.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_controller.go @@ -34,6 +34,7 @@ import ( mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/kubernetes/rbac" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/runconfig/configmap/checksum" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/virtualmcpserverstatus" @@ -103,6 +104,10 @@ type VirtualMCPServerReconciler struct { Scheme *runtime.Scheme Recorder events.EventRecorder PlatformDetector *ctrlutil.SharedPlatformDetector + // ImagePullSecretsDefaults are cluster-wide defaults sourced from the + // operator chart that are merged with vmcp.Spec.ImagePullSecrets when + // constructing workloads. The zero value is a usable empty Defaults. + ImagePullSecretsDefaults imagepullsecrets.Defaults } // +kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=virtualmcpservers,verbs=get;list;watch;create;update;patch;delete @@ -975,11 +980,27 @@ func (r *VirtualMCPServerReconciler) ensureRBACResources( Namespace: vmcp.Namespace, Rules: rules, Owner: vmcp, - ImagePullSecrets: vmcp.Spec.ImagePullSecrets, + ImagePullSecrets: r.imagePullSecretsForVMCP(vmcp), }) return err } +// imagePullSecretsForVMCP returns the image pull secrets the operator will set +// on the workload's PodSpec and ServiceAccount: the merge of cluster-wide +// chart defaults (from r.ImagePullSecretsDefaults) with vmcp.Spec.ImagePullSecrets. +// CR-level entries win on name collisions; chart-level entries are appended +// additively. Returns nil when both inputs are empty. +// +// Note: the live Deployment.Spec.Template.Spec.ImagePullSecrets is the +// strategic-merge union of this list with anything the user supplied under +// spec.podTemplateSpec.spec.imagePullSecrets — see imagePullSecretsNeedsUpdate +// for how drift is detected without comparing the live field directly. +func (r *VirtualMCPServerReconciler) imagePullSecretsForVMCP( + vmcp *mcpv1beta1.VirtualMCPServer, +) []corev1.LocalObjectReference { + return r.ImagePullSecretsDefaults.Merge(vmcp.Spec.ImagePullSecrets) +} + // ensureHMACSecret ensures the HMAC secret exists for session token binding. // This secret is required when Session Management V2 is enabled. // The secret is automatically generated with a cryptographically secure random value. @@ -1546,14 +1567,17 @@ func (*VirtualMCPServerReconciler) podTemplateSpecNeedsUpdate( return deployment.Annotations[podTemplateSpecHashAnnotation] != expectedHash } -// imagePullSecretsNeedsUpdate detects drift on vmcp.Spec.ImagePullSecrets by comparing a -// hash of the desired list against the value stored in imagePullRefsHashAnnotation. -// We cannot compare deployment.Spec.Template.Spec.ImagePullSecrets directly because the -// live list is the strategic-merge union with anything the user supplied under -// spec.podTemplateSpec.spec.imagePullSecrets, so a direct equality check would either -// flag spurious drift or miss real changes depending on PodTemplateSpec content. -// PodTemplateSpec drift is covered separately by podTemplateSpecNeedsUpdate. -func (*VirtualMCPServerReconciler) imagePullSecretsNeedsUpdate( +// imagePullSecretsNeedsUpdate detects drift on the desired imagePullSecrets +// list (chart-level defaults merged with vmcp.Spec.ImagePullSecrets) by +// comparing a hash of the desired list against the value stored in +// imagePullRefsHashAnnotation. We cannot compare +// deployment.Spec.Template.Spec.ImagePullSecrets directly because the live +// list is the strategic-merge union with anything the user supplied under +// spec.podTemplateSpec.spec.imagePullSecrets, so a direct equality check +// would either flag spurious drift or miss real changes depending on +// PodTemplateSpec content. PodTemplateSpec drift is covered separately by +// podTemplateSpecNeedsUpdate. +func (r *VirtualMCPServerReconciler) imagePullSecretsNeedsUpdate( ctx context.Context, deployment *appsv1.Deployment, vmcp *mcpv1beta1.VirtualMCPServer, @@ -1562,7 +1586,7 @@ func (*VirtualMCPServerReconciler) imagePullSecretsNeedsUpdate( return true } - expectedHash, err := imagePullSecretsHash(vmcp.Spec.ImagePullSecrets) + expectedHash, err := imagePullSecretsHash(r.imagePullSecretsForVMCP(vmcp)) if err != nil { log.FromContext(ctx).Error(err, "Failed to hash imagePullSecrets, assuming update needed") return true diff --git a/cmd/thv-operator/controllers/virtualmcpserver_default_imagepullsecrets_test.go b/cmd/thv-operator/controllers/virtualmcpserver_default_imagepullsecrets_test.go new file mode 100644 index 0000000000..d3ff0f6d84 --- /dev/null +++ b/cmd/thv-operator/controllers/virtualmcpserver_default_imagepullsecrets_test.go @@ -0,0 +1,129 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package controllers + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" + ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" + "github.com/stacklok/toolhive/pkg/vmcp/workloads" +) + +// TestVirtualMCPServer_DefaultImagePullSecrets verifies that the merge of +// cluster-wide chart defaults with vmcp.Spec.ImagePullSecrets reaches the +// vMCP Deployment PodSpec, the ServiceAccount, and the +// imagePullRefsHashAnnotation that drives drift detection. +// +// The Merge precedence rule itself is exhaustively covered in +// imagepullsecrets/defaults_test.go::TestDefaultsMerge. +func TestVirtualMCPServer_DefaultImagePullSecrets(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + defaults []string + crSecrets []corev1.LocalObjectReference + wantSecrets []corev1.LocalObjectReference + }{ + { + name: "merged defaults+CR with name collision reach Deployment, SA, and hash", + defaults: []string{"shared", "chart-only"}, + crSecrets: []corev1.LocalObjectReference{ + {Name: "shared"}, + }, + wantSecrets: []corev1.LocalObjectReference{ + {Name: "shared"}, + {Name: "chart-only"}, + }, + }, + { + name: "no defaults and no CR yields empty fields and no annotation", + defaults: nil, + crSecrets: nil, + wantSecrets: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1beta1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default-pullsecrets-vmcp", + Namespace: "default", + }, + Spec: mcpv1beta1.VirtualMCPServerSpec{ + GroupRef: &mcpv1beta1.MCPGroupRef{Name: "test-group"}, + ImagePullSecrets: tt.crSecrets, + }, + } + + scheme := runtime.NewScheme() + require.NoError(t, mcpv1beta1.AddToScheme(scheme)) + require.NoError(t, corev1.AddToScheme(scheme)) + require.NoError(t, rbacv1.AddToScheme(scheme)) + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(vmcp). + Build() + + r := &VirtualMCPServerReconciler{ + Client: fakeClient, + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImagePullSecretsDefaults: imagepullsecrets.NewDefaults(tt.defaults), + } + + // Verify Deployment PodSpec carries the merged list. + dep := r.deploymentForVirtualMCPServer(t.Context(), vmcp, "test-checksum", nil, []workloads.TypedWorkload{}) + require.NotNil(t, dep) + assert.Equal(t, tt.wantSecrets, dep.Spec.Template.Spec.ImagePullSecrets, + "vMCP Deployment ImagePullSecrets must reflect merged defaults+CR") + + // Verify the drift-detection annotation is present iff the + // merged list is non-empty, and matches the hash of the merged list. + expectedHash, err := imagePullSecretsHash(tt.wantSecrets) + require.NoError(t, err) + gotHash, present := dep.Annotations[imagePullRefsHashAnnotation] + if expectedHash == "" { + assert.False(t, present, + "imagePullRefsHashAnnotation must be absent when merged list is empty") + } else { + assert.True(t, present, "imagePullRefsHashAnnotation must be set") + assert.Equal(t, expectedHash, gotHash, + "hash annotation must match hash of the merged list") + } + + // Confirm drift detection treats this freshly-built Deployment as + // up-to-date — i.e. the annotation matches the desired-state hash + // computed from the same merge. Without this, every reconcile + // would loop. + assert.False(t, r.imagePullSecretsNeedsUpdate(t.Context(), dep, vmcp), + "freshly built Deployment must not be flagged as needing update") + + // Verify the ServiceAccount also carries the merged list. + require.NoError(t, r.ensureRBACResources(t.Context(), vmcp)) + sa := &corev1.ServiceAccount{} + require.NoError(t, fakeClient.Get(t.Context(), types.NamespacedName{ + Name: r.serviceAccountNameForVmcp(vmcp), + Namespace: vmcp.Namespace, + }, sa)) + assert.Equal(t, tt.wantSecrets, sa.ImagePullSecrets, + "vMCP SA ImagePullSecrets must reflect merged defaults+CR") + }) + } +} diff --git a/cmd/thv-operator/controllers/virtualmcpserver_deployment.go b/cmd/thv-operator/controllers/virtualmcpserver_deployment.go index 54385bea14..ea476512b2 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_deployment.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_deployment.go @@ -37,12 +37,15 @@ const ( // Used to detect changes without comparing full rendered templates (which include K8s-defaulted fields). podTemplateSpecHashAnnotation = "toolhive.stacklok.io/podtemplatespec-hash" - // imagePullRefsHashAnnotation tracks the SHA256 hash of vmcp.Spec.ImagePullSecrets. - // Mirrors the podTemplateSpecHashAnnotation pattern to detect drift on the explicit - // CRD field without re-running strategic-merge logic during reconciliation. - // Combined with podTemplateSpecHashAnnotation (which covers any imagePullSecrets the - // user added under spec.podTemplateSpec.spec.imagePullSecrets), this is sufficient - // to detect every input that influences the deployed PodSpec.ImagePullSecrets. + // imagePullRefsHashAnnotation tracks the SHA256 hash of the desired + // imagePullSecrets list — chart-level defaults merged with + // vmcp.Spec.ImagePullSecrets — used by buildDeploymentMetadataForVmcp. + // Mirrors the podTemplateSpecHashAnnotation pattern to detect drift on + // these inputs without re-running strategic-merge logic during + // reconciliation. Combined with podTemplateSpecHashAnnotation (which + // covers any imagePullSecrets the user added under + // spec.podTemplateSpec.spec.imagePullSecrets), this is sufficient to + // detect every input that influences the deployed PodSpec.ImagePullSecrets. imagePullRefsHashAnnotation = "toolhive.stacklok.io/imagepullsecrets-hash" // Log level configuration @@ -202,7 +205,7 @@ func (r *VirtualMCPServerReconciler) deploymentForVirtualMCPServer( Spec: corev1.PodSpec{ TerminationGracePeriodSeconds: int64Ptr(vmcpTerminationGracePeriodSeconds), ServiceAccountName: serviceAccountName, - ImagePullSecrets: vmcp.Spec.ImagePullSecrets, + ImagePullSecrets: r.imagePullSecretsForVMCP(vmcp), Containers: []corev1.Container{{ Image: getVmcpImage(), ImagePullPolicy: corev1.PullIfNotPresent, @@ -687,7 +690,7 @@ func (r *VirtualMCPServerReconciler) getExternalAuthConfigSecretEnvVar( } // buildDeploymentMetadataForVmcp builds deployment-level labels and annotations -func (*VirtualMCPServerReconciler) buildDeploymentMetadataForVmcp( +func (r *VirtualMCPServerReconciler) buildDeploymentMetadataForVmcp( baseLabels map[string]string, vmcp *mcpv1beta1.VirtualMCPServer, ) (map[string]string, map[string]string) { @@ -704,12 +707,14 @@ func (*VirtualMCPServerReconciler) buildDeploymentMetadataForVmcp( } } - // Store hash of vmcp.Spec.ImagePullSecrets so deploymentNeedsUpdate can detect - // drift on this field. Without this annotation, edits to spec.imagePullSecrets - // on an existing CR would not propagate to the running Deployment because the - // drift checks compare individual fields and never look at PodSpec.ImagePullSecrets - // directly (the live value is the strategic-merge union with PodTemplateSpec). - if hash, err := imagePullSecretsHash(vmcp.Spec.ImagePullSecrets); err == nil && hash != "" { + // Store hash of the desired imagePullSecrets list — chart-level defaults + // merged with vmcp.Spec.ImagePullSecrets — so deploymentNeedsUpdate can + // detect drift on this field. Without this annotation, edits to either + // the chart default or spec.imagePullSecrets on an existing CR would not + // propagate to the running Deployment because the drift checks compare + // individual fields and never look at PodSpec.ImagePullSecrets directly + // (the live value is the strategic-merge union with PodTemplateSpec). + if hash, err := imagePullSecretsHash(r.imagePullSecretsForVMCP(vmcp)); err == nil && hash != "" { deploymentAnnotations[imagePullRefsHashAnnotation] = hash } diff --git a/cmd/thv-operator/main.go b/cmd/thv-operator/main.go index 85073a385b..1011614e7b 100644 --- a/cmd/thv-operator/main.go +++ b/cmd/thv-operator/main.go @@ -33,6 +33,7 @@ import ( mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" "github.com/stacklok/toolhive/cmd/thv-operator/controllers" ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" "github.com/stacklok/toolhive/pkg/operator/telemetry" ) @@ -98,7 +99,27 @@ func main() { os.Exit(1) } - if err := setupControllersAndWebhooks(mgr); err != nil { + // Parse cluster-wide default imagePullSecrets once at startup. The Defaults + // value is shared (by copy) with every reconciler that constructs workloads. + imagePullSecretsDefaults := imagepullsecrets.LoadDefaultsFromEnv() + if defaults := imagePullSecretsDefaults.List(); len(defaults) > 0 { + names := make([]string, 0, len(defaults)) + for _, ref := range defaults { + names = append(names, ref.Name) + } + setupLog.Info("loaded cluster-wide default imagePullSecrets", "imagePullSecrets", names) + } else if rawValue, set := os.LookupEnv(imagepullsecrets.EnvVar); set && rawValue != "" { + // The env var was set but parsed to nothing — likely a typo such as + // " , " or ",,,". Surface this so the misconfiguration is diagnosable + // instead of being silently ignored. + setupLog.Info( + "THV_DEFAULT_IMAGE_PULL_SECRETS is set but contains no valid secret names; "+ + "chart-level defaults will not be applied", + "imagePullSecrets", rawValue, + ) + } + + if err := setupControllersAndWebhooks(mgr, imagePullSecretsDefaults); err != nil { setupLog.Error(err, "unable to setup controllers and webhooks") os.Exit(1) } @@ -127,8 +148,10 @@ func main() { } } -// setupControllersAndWebhooks sets up all controllers and webhooks with the manager -func setupControllersAndWebhooks(mgr ctrl.Manager) error { +// setupControllersAndWebhooks sets up all controllers and webhooks with the manager. +// The imagePullSecretsDefaults are propagated to controllers that construct +// workloads so that chart-level defaults are applied alongside per-CR overrides. +func setupControllersAndWebhooks(mgr ctrl.Manager, imagePullSecretsDefaults imagepullsecrets.Defaults) error { // Check feature flags enableServer := isFeatureEnabled(featureServer, true) enableRegistry := isFeatureEnabled(featureRegistry, true) @@ -161,7 +184,7 @@ func setupControllersAndWebhooks(mgr ctrl.Manager) error { // Set up server-related controllers if enabledFeatures[featureServer] { - if err := setupServerControllers(mgr); err != nil { + if err := setupServerControllers(mgr, imagePullSecretsDefaults); err != nil { return err } } else { @@ -170,7 +193,7 @@ func setupControllersAndWebhooks(mgr ctrl.Manager) error { // Set up registry controller if enabledFeatures[featureRegistry] { - if err := setupRegistryController(mgr); err != nil { + if err := setupRegistryController(mgr, imagePullSecretsDefaults); err != nil { return err } } else { @@ -179,7 +202,7 @@ func setupControllersAndWebhooks(mgr ctrl.Manager) error { // Set up Virtual MCP controllers and webhooks if enabledFeatures[featureVMCP] { - if err := setupAggregationControllers(mgr); err != nil { + if err := setupAggregationControllers(mgr, imagePullSecretsDefaults); err != nil { return err } } else { @@ -249,17 +272,20 @@ func setupGroupRefFieldIndexes(mgr ctrl.Manager) error { // setupServerControllers sets up server-related controllers // (MCPServer, MCPExternalAuthConfig, MCPRemoteProxy, MCPServerEntry, ToolConfig). -func setupServerControllers(mgr ctrl.Manager) error { +// imagePullSecretsDefaults are merged with per-CR imagePullSecrets when +// reconcilers construct workloads. +func setupServerControllers(mgr ctrl.Manager, imagePullSecretsDefaults imagepullsecrets.Defaults) error { if err := setupGroupRefFieldIndexes(mgr); err != nil { return err } // Set up MCPServer controller rec := &controllers.MCPServerReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorder("mcpserver-controller"), - PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorder("mcpserver-controller"), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImagePullSecretsDefaults: imagePullSecretsDefaults, } if err := rec.SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller MCPServer: %w", err) @@ -299,20 +325,22 @@ func setupServerControllers(mgr ctrl.Manager) error { // Set up MCPRemoteProxy controller if err := (&controllers.MCPRemoteProxyReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorder("mcpremoteproxy-controller"), - PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorder("mcpremoteproxy-controller"), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImagePullSecretsDefaults: imagePullSecretsDefaults, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller MCPRemoteProxy: %w", err) } // Set up EmbeddingServer controller if err := (&controllers.EmbeddingServerReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorder("embeddingserver-controller"), - PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorder("embeddingserver-controller"), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImagePullSecretsDefaults: imagePullSecretsDefaults, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller EmbeddingServer: %w", err) } @@ -327,19 +355,24 @@ func setupServerControllers(mgr ctrl.Manager) error { return nil } -// setupRegistryController sets up the MCPRegistry controller -func setupRegistryController(mgr ctrl.Manager) error { - if err := (controllers.NewMCPRegistryReconciler(mgr.GetClient(), mgr.GetScheme())).SetupWithManager(mgr); err != nil { +// setupRegistryController sets up the MCPRegistry controller. +// imagePullSecretsDefaults are merged with mcpRegistry.Spec.ImagePullSecrets +// when the registry-api workload is constructed. +func setupRegistryController(mgr ctrl.Manager, imagePullSecretsDefaults imagepullsecrets.Defaults) error { + rec := controllers.NewMCPRegistryReconciler(mgr.GetClient(), mgr.GetScheme(), imagePullSecretsDefaults) + if err := rec.SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller MCPRegistry: %w", err) } return nil } // setupAggregationControllers sets up Virtual MCP-related controllers and webhooks -// (MCPGroup, VirtualMCPServer, and their webhooks) -// Note: This function assumes server controllers are enabled (enforced by dependency check) -// The field index for MCPServer.Spec.GroupRef is created in setupServerControllers -func setupAggregationControllers(mgr ctrl.Manager) error { +// (MCPGroup, VirtualMCPServer, and their webhooks). +// Note: This function assumes server controllers are enabled (enforced by dependency check). +// The field index for MCPServer.Spec.GroupRef is created in setupServerControllers. +// imagePullSecretsDefaults are merged with vmcp.Spec.ImagePullSecrets when the +// VirtualMCPServer Deployment is constructed. +func setupAggregationControllers(mgr ctrl.Manager, imagePullSecretsDefaults imagepullsecrets.Defaults) error { // Set up MCPGroup controller if err := (&controllers.MCPGroupReconciler{ Client: mgr.GetClient(), @@ -349,10 +382,11 @@ func setupAggregationControllers(mgr ctrl.Manager) error { // Set up VirtualMCPServer controller if err := (&controllers.VirtualMCPServerReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorder("virtualmcpserver-controller"), - PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorder("virtualmcpserver-controller"), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImagePullSecretsDefaults: imagePullSecretsDefaults, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller VirtualMCPServer: %w", err) } diff --git a/cmd/thv-operator/pkg/imagepullsecrets/defaults.go b/cmd/thv-operator/pkg/imagepullsecrets/defaults.go new file mode 100644 index 0000000000..293165b5d2 --- /dev/null +++ b/cmd/thv-operator/pkg/imagepullsecrets/defaults.go @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +// Package imagepullsecrets provides cluster-wide default imagePullSecrets +// that the ToolHive operator applies to every workload it spawns. +// +// The operator parses a comma-separated list of secret names from the +// THV_DEFAULT_IMAGE_PULL_SECRETS environment variable at startup and exposes +// the result as a Defaults value that controllers consume during reconciliation. +// +// Defaults are merged with any per-CR imagePullSecrets at workload-construction +// time. See Defaults.Merge for the precedence rule. +package imagepullsecrets + +import ( + "os" + "slices" + "strings" + + corev1 "k8s.io/api/core/v1" +) + +// EnvVar is the environment variable name that the operator parses at startup +// to populate cluster-wide default imagePullSecrets. +// +// The value is a comma-separated list of secret names, e.g. "regcred,otherscred". +// Whitespace around entries is tolerated; empty entries are skipped. +const EnvVar = "THV_DEFAULT_IMAGE_PULL_SECRETS" + +// Defaults holds the cluster-wide default imagePullSecrets that the operator +// applies to every workload it spawns when the corresponding CR does not +// explicitly override them. +// +// The zero value is a usable empty Defaults: Merge returns the CR-level value +// unchanged. Construct a populated Defaults via LoadDefaultsFromEnv or +// NewDefaults. +type Defaults struct { + // secrets is the parsed list of default imagePullSecrets, in the order + // they were specified in the environment variable. The slice is never + // shared with callers; Merge always returns a fresh slice. + secrets []corev1.LocalObjectReference +} + +// NewDefaults constructs a Defaults from a slice of secret names. Names are +// trimmed of surrounding whitespace; empty names are skipped. +func NewDefaults(names []string) Defaults { + parsed := make([]corev1.LocalObjectReference, 0, len(names)) + for _, raw := range names { + name := strings.TrimSpace(raw) + if name == "" { + continue + } + parsed = append(parsed, corev1.LocalObjectReference{Name: name}) + } + return Defaults{secrets: parsed} +} + +// LoadDefaultsFromEnv parses Defaults from the THV_DEFAULT_IMAGE_PULL_SECRETS +// environment variable. +// +// The variable is a comma-separated list of secret names. An empty or unset +// variable yields an empty Defaults whose Merge is a no-op. +func LoadDefaultsFromEnv() Defaults { + return NewDefaults(strings.Split(os.Getenv(EnvVar), ",")) +} + +// List returns a freshly allocated copy of the configured default +// imagePullSecrets. The caller may freely mutate the returned slice. +// An empty Defaults returns nil (not a zero-length slice) so callers can +// leave a PodSpec or ServiceAccount field unset. +func (d Defaults) List() []corev1.LocalObjectReference { + if len(d.secrets) == 0 { + return nil + } + return slices.Clone(d.secrets) +} + +// Merge combines the cluster-wide defaults with the CR-level imagePullSecrets +// and returns the resulting list. +// +// Precedence rule: chart-level defaults are appended additively to the +// CR-level list, with the CR-level entries taking priority on name conflicts. +// Concretely: +// +// - The CR-level list comes first in the result, preserving its order. +// - Each chart-level default is appended only if its Name does not already +// appear in the CR-level list (deduplication is by Name). +// - The CR-level list is never mutated; callers receive a fresh slice. +// +// If both inputs are empty, Merge returns nil so callers can leave the +// PodSpec/ServiceAccount field unset. +func (d Defaults) Merge(crLevel []corev1.LocalObjectReference) []corev1.LocalObjectReference { + if len(crLevel) == 0 && len(d.secrets) == 0 { + return nil + } + + merged := make([]corev1.LocalObjectReference, 0, len(crLevel)+len(d.secrets)) + seen := make(map[string]struct{}, len(crLevel)+len(d.secrets)) + + for _, ref := range crLevel { + if _, dup := seen[ref.Name]; dup { + continue + } + seen[ref.Name] = struct{}{} + merged = append(merged, ref) + } + + for _, ref := range d.secrets { + if _, dup := seen[ref.Name]; dup { + continue + } + seen[ref.Name] = struct{}{} + merged = append(merged, ref) + } + + if len(merged) == 0 { + return nil + } + return merged +} diff --git a/cmd/thv-operator/pkg/imagepullsecrets/defaults_test.go b/cmd/thv-operator/pkg/imagepullsecrets/defaults_test.go new file mode 100644 index 0000000000..bda7ec08b1 --- /dev/null +++ b/cmd/thv-operator/pkg/imagepullsecrets/defaults_test.go @@ -0,0 +1,264 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package imagepullsecrets + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" +) + +func TestNewDefaults(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input []string + want []corev1.LocalObjectReference + }{ + { + name: "nil slice returns empty defaults", + input: nil, + want: nil, + }, + { + name: "empty slice returns empty defaults", + input: []string{}, + want: nil, + }, + { + name: "single name", + input: []string{"regcred"}, + want: []corev1.LocalObjectReference{{Name: "regcred"}}, + }, + { + name: "multiple names preserve order", + input: []string{"regcred", "otherscred"}, + want: []corev1.LocalObjectReference{ + {Name: "regcred"}, + {Name: "otherscred"}, + }, + }, + { + name: "whitespace tolerated", + input: []string{" regcred ", "\totherscred\n"}, + want: []corev1.LocalObjectReference{ + {Name: "regcred"}, + {Name: "otherscred"}, + }, + }, + { + name: "empty entries skipped", + input: []string{"regcred", "", " ", "otherscred"}, + want: []corev1.LocalObjectReference{ + {Name: "regcred"}, + {Name: "otherscred"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := NewDefaults(tt.input).List() + assert.Equal(t, tt.want, got) + }) + } +} + +// TestLoadDefaultsFromEnv covers env-var parsing across the values an admin +// could plausibly set. The unset case is functionally redundant with the empty +// case (strings.Split("", ",") -> [""] which NewDefaults filters out), so it is +// not exercised separately. All cases mutate the process environment via +// t.Setenv, so this function cannot use t.Parallel(). +func TestLoadDefaultsFromEnv(t *testing.T) { + tests := []struct { + name string + envVal string + want []corev1.LocalObjectReference + }{ + { + name: "empty env var yields empty defaults", + envVal: "", + want: nil, + }, + { + name: "single secret", + envVal: "regcred", + want: []corev1.LocalObjectReference{{Name: "regcred"}}, + }, + { + name: "comma-separated list", + envVal: "regcred,otherscred", + want: []corev1.LocalObjectReference{ + {Name: "regcred"}, + {Name: "otherscred"}, + }, + }, + { + name: "whitespace tolerated", + envVal: " regcred , otherscred ", + want: []corev1.LocalObjectReference{ + {Name: "regcred"}, + {Name: "otherscred"}, + }, + }, + { + name: "empty entries skipped", + envVal: "regcred,,otherscred,", + want: []corev1.LocalObjectReference{ + {Name: "regcred"}, + {Name: "otherscred"}, + }, + }, + { + name: "only commas yields empty", + envVal: ",,,", + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Cannot run in parallel because we mutate process env. + t.Setenv(EnvVar, tt.envVal) + got := LoadDefaultsFromEnv().List() + assert.Equal(t, tt.want, got) + }) + } +} + +func TestDefaultsMerge(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + defaults []string + crLevel []corev1.LocalObjectReference + want []corev1.LocalObjectReference + }{ + { + name: "both empty returns nil", + defaults: nil, + crLevel: nil, + want: nil, + }, + { + name: "defaults only", + defaults: []string{"regcred", "otherscred"}, + crLevel: nil, + want: []corev1.LocalObjectReference{ + {Name: "regcred"}, + {Name: "otherscred"}, + }, + }, + { + name: "cr-level only", + defaults: nil, + crLevel: []corev1.LocalObjectReference{ + {Name: "cr-secret"}, + }, + want: []corev1.LocalObjectReference{ + {Name: "cr-secret"}, + }, + }, + { + name: "no overlap appends defaults after cr-level", + defaults: []string{"chart-default"}, + crLevel: []corev1.LocalObjectReference{ + {Name: "cr-secret"}, + }, + want: []corev1.LocalObjectReference{ + {Name: "cr-secret"}, + {Name: "chart-default"}, + }, + }, + { + name: "name overlap: cr-level wins", + defaults: []string{"shared", "chart-only"}, + crLevel: []corev1.LocalObjectReference{ + {Name: "cr-only"}, + {Name: "shared"}, + }, + want: []corev1.LocalObjectReference{ + {Name: "cr-only"}, + {Name: "shared"}, + {Name: "chart-only"}, + }, + }, + { + name: "duplicate cr-level entries deduplicated", + defaults: nil, + crLevel: []corev1.LocalObjectReference{ + {Name: "dup"}, + {Name: "dup"}, + }, + want: []corev1.LocalObjectReference{ + {Name: "dup"}, + }, + }, + { + name: "cr-level order preserved", + defaults: []string{"a", "b"}, + crLevel: []corev1.LocalObjectReference{ + {Name: "z"}, + {Name: "y"}, + }, + want: []corev1.LocalObjectReference{ + {Name: "z"}, + {Name: "y"}, + {Name: "a"}, + {Name: "b"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + d := NewDefaults(tt.defaults) + got := d.Merge(tt.crLevel) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestDefaultsMergeDoesNotMutateCRLevel(t *testing.T) { + t.Parallel() + + d := NewDefaults([]string{"chart-default"}) + crLevel := []corev1.LocalObjectReference{ + {Name: "cr-secret"}, + } + originalCR := append([]corev1.LocalObjectReference(nil), crLevel...) + + got := d.Merge(crLevel) + + assert.Equal(t, originalCR, crLevel, "Merge must not mutate the caller's slice") + assert.NotSame(t, &crLevel[0], &got[0], "Merge must return a fresh slice") +} + +func TestDefaultsListReturnsCopy(t *testing.T) { + t.Parallel() + + d := NewDefaults([]string{"regcred", "otherscred"}) + first := d.List() + first[0] = corev1.LocalObjectReference{Name: "mutated"} + + second := d.List() + assert.Equal(t, "regcred", second[0].Name, "List must return a fresh slice each call") +} + +func TestZeroValueDefaults(t *testing.T) { + t.Parallel() + + var d Defaults + assert.Nil(t, d.List()) + assert.Nil(t, d.Merge(nil)) + + cr := []corev1.LocalObjectReference{{Name: "cr"}} + got := d.Merge(cr) + assert.Equal(t, cr, got) +} diff --git a/cmd/thv-operator/pkg/registryapi/deployment.go b/cmd/thv-operator/pkg/registryapi/deployment.go index aa7c97f9cb..ca9ac8fbba 100644 --- a/cmd/thv-operator/pkg/registryapi/deployment.go +++ b/cmd/thv-operator/pkg/registryapi/deployment.go @@ -172,7 +172,7 @@ func (m *manager) ensureDeployment( // buildRegistryAPIDeployment creates a Deployment for the registry API. It mounts a ConfigMap // created from the raw ConfigYAML string and supports user-provided Volumes, VolumeMounts, // and PGPassSecretRef. -func (*manager) buildRegistryAPIDeployment( +func (m *manager) buildRegistryAPIDeployment( ctx context.Context, mcpRegistry *mcpv1beta1.MCPRegistry, configMapName string, @@ -208,7 +208,7 @@ func (*manager) buildRegistryAPIDeployment( WithServiceAccountName(GetServiceAccountName(mcpRegistry)), WithContainer(BuildRegistryAPIContainer(getRegistryAPIImage())), WithRegistryServerConfigMount(RegistryAPIContainerName, configMapName), - WithImagePullSecrets(mcpRegistry.Spec.ImagePullSecrets), + WithImagePullSecrets(m.imagePullSecretsDefaults.Merge(mcpRegistry.Spec.ImagePullSecrets)), } // Add user-provided volumes (deserialized from raw JSON) diff --git a/cmd/thv-operator/pkg/registryapi/manager.go b/cmd/thv-operator/pkg/registryapi/manager.go index b3a92e924e..720285377f 100644 --- a/cmd/thv-operator/pkg/registryapi/manager.go +++ b/cmd/thv-operator/pkg/registryapi/manager.go @@ -13,6 +13,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/kubernetes" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/kubernetes/configmaps" "github.com/stacklok/toolhive/cmd/thv-operator/pkg/registryapi/config" @@ -23,17 +24,26 @@ type manager struct { client client.Client scheme *runtime.Scheme kubeHelper *kubernetes.Client + // imagePullSecretsDefaults are cluster-wide defaults sourced from the + // operator chart that are merged with the per-CR imagePullSecrets when + // constructing the registry-api workload. The zero value is a usable + // empty Defaults. + imagePullSecretsDefaults imagepullsecrets.Defaults } -// NewManager creates a new registry API manager +// NewManager creates a new registry API manager. imagePullSecretsDefaults are +// cluster-wide pull-secret defaults from the operator chart; passing the zero +// value disables the merge and the registry-api uses only the per-CR list. func NewManager( k8sClient client.Client, scheme *runtime.Scheme, + imagePullSecretsDefaults imagepullsecrets.Defaults, ) Manager { return &manager{ - client: k8sClient, - scheme: scheme, - kubeHelper: kubernetes.NewClient(k8sClient, scheme), + client: k8sClient, + scheme: scheme, + kubeHelper: kubernetes.NewClient(k8sClient, scheme), + imagePullSecretsDefaults: imagePullSecretsDefaults, } } diff --git a/cmd/thv-operator/pkg/registryapi/manager_test.go b/cmd/thv-operator/pkg/registryapi/manager_test.go index 8ac86b51a9..f4589286b0 100644 --- a/cmd/thv-operator/pkg/registryapi/manager_test.go +++ b/cmd/thv-operator/pkg/registryapi/manager_test.go @@ -22,6 +22,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/interceptor" mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" ) func TestNewManager(t *testing.T) { @@ -46,7 +47,7 @@ func TestNewManager(t *testing.T) { scheme := runtime.NewScheme() // Create manager - manager := NewManager(nil, scheme) + manager := NewManager(nil, scheme, imagepullsecrets.Defaults{}) // Verify manager is created assert.NotNil(t, manager) @@ -88,7 +89,7 @@ func TestReconcileAPIService(t *testing.T) { } // Create manager - manager := NewManager(fakeClient, scheme) + manager := NewManager(fakeClient, scheme, imagepullsecrets.Defaults{}) // Execute result := manager.ReconcileAPIService(context.Background(), mcpRegistry) @@ -159,7 +160,7 @@ func TestReconcileAPIService(t *testing.T) { } // Create manager - manager := NewManager(fakeClient, scheme) + manager := NewManager(fakeClient, scheme, imagepullsecrets.Defaults{}) // Execute result := manager.ReconcileAPIService(context.Background(), mcpRegistry) diff --git a/cmd/thv-operator/pkg/registryapi/rbac.go b/cmd/thv-operator/pkg/registryapi/rbac.go index 1dc7321a0c..03de5d9635 100644 --- a/cmd/thv-operator/pkg/registryapi/rbac.go +++ b/cmd/thv-operator/pkg/registryapi/rbac.go @@ -83,7 +83,7 @@ func (m *manager) ensureRBACResources( Rules: registryAPIRBACRules, Owner: mcpRegistry, Labels: labels, - ImagePullSecrets: mcpRegistry.Spec.ImagePullSecrets, + ImagePullSecrets: m.imagePullSecretsDefaults.Merge(mcpRegistry.Spec.ImagePullSecrets), }); err != nil { return err } diff --git a/cmd/thv-operator/test-integration/mcp-registry/suite_test.go b/cmd/thv-operator/test-integration/mcp-registry/suite_test.go index 52ae4e0c5f..0caca7cef4 100644 --- a/cmd/thv-operator/test-integration/mcp-registry/suite_test.go +++ b/cmd/thv-operator/test-integration/mcp-registry/suite_test.go @@ -24,6 +24,7 @@ import ( mcpv1beta1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1beta1" "github.com/stacklok/toolhive/cmd/thv-operator/controllers" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/imagepullsecrets" ) var ( @@ -114,7 +115,9 @@ var _ = BeforeSuite(func() { // Set up MCPRegistry controller By("setting up MCPRegistry controller") - err = controllers.NewMCPRegistryReconciler(testMgr.GetClient(), testMgr.GetScheme()).SetupWithManager(testMgr) + err = controllers.NewMCPRegistryReconciler( + testMgr.GetClient(), testMgr.GetScheme(), imagepullsecrets.Defaults{}, + ).SetupWithManager(testMgr) Expect(err).NotTo(HaveOccurred()) // Start the manager in the background diff --git a/deploy/charts/operator/README.md b/deploy/charts/operator/README.md index 7bd6baa9af..1ae1254d89 100644 --- a/deploy/charts/operator/README.md +++ b/deploy/charts/operator/README.md @@ -46,7 +46,7 @@ The command removes all the Kubernetes components associated with the chart and |-----|------|---------|-------------| | fullnameOverride | string | `"toolhive-operator"` | Provide a fully-qualified name override for resources | | nameOverride | string | `""` | Override the name of the chart | -| operator | object | `{"affinity":{},"autoscaling":{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80},"containerSecurityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}},"env":[],"features":{"experimental":false,"registry":true,"server":true,"virtualMCP":true},"gc":{"gogc":75,"gomemlimit":"110MiB"},"image":"ghcr.io/stacklok/toolhive/operator:v0.25.0","imagePullPolicy":"IfNotPresent","imagePullSecrets":[],"leaderElectionRole":{"binding":{"name":"toolhive-operator-leader-election-rolebinding"},"name":"toolhive-operator-leader-election-role","rules":[{"apiGroups":[""],"resources":["configmaps"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["coordination.k8s.io"],"resources":["leases"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":[""],"resources":["events"],"verbs":["create","patch"]}]},"livenessProbe":{"httpGet":{"path":"/healthz","port":"health"},"initialDelaySeconds":15,"periodSeconds":20},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"podSecurityContext":{"runAsNonRoot":true},"ports":[{"containerPort":8080,"name":"metrics","protocol":"TCP"},{"containerPort":8081,"name":"health","protocol":"TCP"}],"proxyHost":"0.0.0.0","rbac":{"allowedNamespaces":[],"scope":"cluster"},"readinessProbe":{"httpGet":{"path":"/readyz","port":"health"},"initialDelaySeconds":5,"periodSeconds":10},"replicaCount":1,"resources":{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}},"serviceAccount":{"annotations":{},"automountServiceAccountToken":true,"create":true,"labels":{},"name":"toolhive-operator"},"tolerations":[],"toolhiveRunnerImage":"ghcr.io/stacklok/toolhive/proxyrunner:v0.25.0","vmcpImage":"ghcr.io/stacklok/toolhive/vmcp:v0.25.0","volumeMounts":[],"volumes":[]}` | All values for the operator deployment and associated resources | +| operator | object | `{"affinity":{},"autoscaling":{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80},"containerSecurityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}},"defaultImagePullSecrets":[],"env":[],"features":{"experimental":false,"registry":true,"server":true,"virtualMCP":true},"gc":{"gogc":75,"gomemlimit":"110MiB"},"image":"ghcr.io/stacklok/toolhive/operator:v0.25.0","imagePullPolicy":"IfNotPresent","imagePullSecrets":[],"leaderElectionRole":{"binding":{"name":"toolhive-operator-leader-election-rolebinding"},"name":"toolhive-operator-leader-election-role","rules":[{"apiGroups":[""],"resources":["configmaps"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["coordination.k8s.io"],"resources":["leases"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":[""],"resources":["events"],"verbs":["create","patch"]}]},"livenessProbe":{"httpGet":{"path":"/healthz","port":"health"},"initialDelaySeconds":15,"periodSeconds":20},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"podSecurityContext":{"runAsNonRoot":true},"ports":[{"containerPort":8080,"name":"metrics","protocol":"TCP"},{"containerPort":8081,"name":"health","protocol":"TCP"}],"proxyHost":"0.0.0.0","rbac":{"allowedNamespaces":[],"scope":"cluster"},"readinessProbe":{"httpGet":{"path":"/readyz","port":"health"},"initialDelaySeconds":5,"periodSeconds":10},"replicaCount":1,"resources":{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}},"serviceAccount":{"annotations":{},"automountServiceAccountToken":true,"create":true,"labels":{},"name":"toolhive-operator"},"tolerations":[],"toolhiveRunnerImage":"ghcr.io/stacklok/toolhive/proxyrunner:v0.25.0","vmcpImage":"ghcr.io/stacklok/toolhive/vmcp:v0.25.0","volumeMounts":[],"volumes":[]}` | All values for the operator deployment and associated resources | | operator.affinity | object | `{}` | Affinity settings for the operator pod | | operator.autoscaling | object | `{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80}` | Configuration for horizontal pod autoscaling | | operator.autoscaling.enabled | bool | `false` | Enable autoscaling for the operator | @@ -54,6 +54,7 @@ The command removes all the Kubernetes components associated with the chart and | operator.autoscaling.minReplicas | int | `1` | Minimum number of replicas | | operator.autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization percentage for autoscaling | | operator.containerSecurityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}}` | Container security context settings for the operator | +| operator.defaultImagePullSecrets | list | `[]` | List of image pull secrets that the operator applies as defaults to every workload it spawns (proxy runners, vMCP servers, registry API, etc.). Per-CR `imagePullSecrets` take precedence on name collisions; chart-level entries are appended additively. Each entry is a string (the Secret name); the operator parses these once at startup from the THV_DEFAULT_IMAGE_PULL_SECRETS environment variable. The Secrets must exist in the namespace where each workload is created. | | operator.env | list | `[]` | Environment variables to set in the operator container | | operator.features.experimental | bool | `false` | Enable experimental features | | operator.features.registry | bool | `true` | Enable registry controller (MCPRegistry). This automatically sets ENABLE_REGISTRY environment variable. | diff --git a/deploy/charts/operator/templates/deployment.yaml b/deploy/charts/operator/templates/deployment.yaml index 12170c58b8..93dd46acfe 100644 --- a/deploy/charts/operator/templates/deployment.yaml +++ b/deploy/charts/operator/templates/deployment.yaml @@ -43,6 +43,16 @@ spec: ports: {{- toYaml .Values.operator.ports | nindent 12 }} env: + {{- /* + User-supplied env entries are rendered first so that chart-managed + env vars below win on name collision: Kubernetes keeps the last + entry when a name appears more than once on the container. This + prevents an accidental `operator.env` override of reserved names + like THV_DEFAULT_IMAGE_PULL_SECRETS or TOOLHIVE_RUNNER_IMAGE. + */}} + {{- with .Values.operator.env }} + {{- toYaml . | nindent 10 }} + {{- end }} - name: GOMEMLIMIT value: {{ .Values.operator.gc.gomemlimit | quote }} - name: GOGC @@ -76,8 +86,9 @@ spec: value: "{{ .Values.operator.proxyHost }}" - name: TOOLHIVE_REGISTRY_API_IMAGE value: "{{ .Values.registryAPI.image }}" - {{- if .Values.operator.env }} - {{- toYaml .Values.operator.env | nindent 10 }} + {{- with .Values.operator.defaultImagePullSecrets }} + - name: THV_DEFAULT_IMAGE_PULL_SECRETS + value: "{{ join "," . }}" {{- end }} livenessProbe: {{- toYaml .Values.operator.livenessProbe | nindent 12 }} diff --git a/deploy/charts/operator/values.yaml b/deploy/charts/operator/values.yaml index 1850f90ec8..524456416a 100644 --- a/deploy/charts/operator/values.yaml +++ b/deploy/charts/operator/values.yaml @@ -25,6 +25,15 @@ operator: # -- List of image pull secrets to use imagePullSecrets: [] + + # -- List of image pull secrets that the operator applies as defaults to every + # workload it spawns (proxy runners, vMCP servers, registry API, etc.). + # Per-CR `imagePullSecrets` take precedence on name collisions; chart-level + # entries are appended additively. Each entry is a string (the Secret name); + # the operator parses these once at startup from the + # THV_DEFAULT_IMAGE_PULL_SECRETS environment variable. The Secrets must exist + # in the namespace where each workload is created. + defaultImagePullSecrets: [] # -- Container image for the operator image: ghcr.io/stacklok/toolhive/operator:v0.25.0 # -- Image pull policy for the operator container From b4d4248d7067faa95b1f1dfdf3d4b2d0892d2b00 Mon Sep 17 00:00:00 2001 From: Juan Antonio Osorio Date: Wed, 29 Apr 2026 16:45:21 +0300 Subject: [PATCH 2/2] Address review: TOOLHIVE_ env var prefix and accept both shapes Rename THV_DEFAULT_IMAGE_PULL_SECRETS to TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS to match the operator container's TOOLHIVE_* convention before the env var ships and a rename costs a deprecation cycle. Accept both shapes in operator.defaultImagePullSecrets to mirror operator.imagePullSecrets above: - plain strings: ["regcred", "otherscred"] - object with `name`: [{name: regcred}, {name: otherscred}] Users who copy the [{name: foo}] pattern from operator.imagePullSecrets no longer get a silent THV_DEFAULT_IMAGE_PULL_SECRETS=map[name:foo] env var and ImagePullBackOff at runtime. Malformed entries (numbers, nested lists, objects without a `name` field) now fail the helm template render with a clear message instead of producing a bogus env var. --- cmd/thv-operator/main.go | 2 +- .../pkg/imagepullsecrets/defaults.go | 11 +++---- deploy/charts/operator/README.md | 2 +- .../charts/operator/templates/deployment.yaml | 29 +++++++++++++++++-- deploy/charts/operator/values.yaml | 15 +++++++--- 5 files changed, 45 insertions(+), 14 deletions(-) diff --git a/cmd/thv-operator/main.go b/cmd/thv-operator/main.go index 1011614e7b..54b5ddfa75 100644 --- a/cmd/thv-operator/main.go +++ b/cmd/thv-operator/main.go @@ -113,7 +113,7 @@ func main() { // " , " or ",,,". Surface this so the misconfiguration is diagnosable // instead of being silently ignored. setupLog.Info( - "THV_DEFAULT_IMAGE_PULL_SECRETS is set but contains no valid secret names; "+ + "TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS is set but contains no valid secret names; "+ "chart-level defaults will not be applied", "imagePullSecrets", rawValue, ) diff --git a/cmd/thv-operator/pkg/imagepullsecrets/defaults.go b/cmd/thv-operator/pkg/imagepullsecrets/defaults.go index 293165b5d2..efdc285733 100644 --- a/cmd/thv-operator/pkg/imagepullsecrets/defaults.go +++ b/cmd/thv-operator/pkg/imagepullsecrets/defaults.go @@ -5,8 +5,9 @@ // that the ToolHive operator applies to every workload it spawns. // // The operator parses a comma-separated list of secret names from the -// THV_DEFAULT_IMAGE_PULL_SECRETS environment variable at startup and exposes -// the result as a Defaults value that controllers consume during reconciliation. +// TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS environment variable at startup and +// exposes the result as a Defaults value that controllers consume during +// reconciliation. // // Defaults are merged with any per-CR imagePullSecrets at workload-construction // time. See Defaults.Merge for the precedence rule. @@ -25,7 +26,7 @@ import ( // // The value is a comma-separated list of secret names, e.g. "regcred,otherscred". // Whitespace around entries is tolerated; empty entries are skipped. -const EnvVar = "THV_DEFAULT_IMAGE_PULL_SECRETS" +const EnvVar = "TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS" // Defaults holds the cluster-wide default imagePullSecrets that the operator // applies to every workload it spawns when the corresponding CR does not @@ -55,8 +56,8 @@ func NewDefaults(names []string) Defaults { return Defaults{secrets: parsed} } -// LoadDefaultsFromEnv parses Defaults from the THV_DEFAULT_IMAGE_PULL_SECRETS -// environment variable. +// LoadDefaultsFromEnv parses Defaults from the +// TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS environment variable. // // The variable is a comma-separated list of secret names. An empty or unset // variable yields an empty Defaults whose Merge is a no-op. diff --git a/deploy/charts/operator/README.md b/deploy/charts/operator/README.md index 1ae1254d89..b82e5bc8a5 100644 --- a/deploy/charts/operator/README.md +++ b/deploy/charts/operator/README.md @@ -54,7 +54,7 @@ The command removes all the Kubernetes components associated with the chart and | operator.autoscaling.minReplicas | int | `1` | Minimum number of replicas | | operator.autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization percentage for autoscaling | | operator.containerSecurityContext | object | `{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}}` | Container security context settings for the operator | -| operator.defaultImagePullSecrets | list | `[]` | List of image pull secrets that the operator applies as defaults to every workload it spawns (proxy runners, vMCP servers, registry API, etc.). Per-CR `imagePullSecrets` take precedence on name collisions; chart-level entries are appended additively. Each entry is a string (the Secret name); the operator parses these once at startup from the THV_DEFAULT_IMAGE_PULL_SECRETS environment variable. The Secrets must exist in the namespace where each workload is created. | +| operator.defaultImagePullSecrets | list | `[]` | List of image pull secrets that the operator applies as defaults to every workload it spawns (proxy runners, vMCP servers, registry API, etc.). Per-CR `imagePullSecrets` take precedence on name collisions; chart-level entries are appended additively. The operator parses these once at startup from the TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS environment variable. The Secrets must exist in the namespace where each workload is created. Each entry may be either a plain string (the Secret name) or an object with a `name` field, e.g.: defaultImagePullSecrets: - regcred - name: otherscred The two shapes are equivalent; the object form matches `operator.imagePullSecrets` above for convenience. | | operator.env | list | `[]` | Environment variables to set in the operator container | | operator.features.experimental | bool | `false` | Enable experimental features | | operator.features.registry | bool | `true` | Enable registry controller (MCPRegistry). This automatically sets ENABLE_REGISTRY environment variable. | diff --git a/deploy/charts/operator/templates/deployment.yaml b/deploy/charts/operator/templates/deployment.yaml index 93dd46acfe..bda8770126 100644 --- a/deploy/charts/operator/templates/deployment.yaml +++ b/deploy/charts/operator/templates/deployment.yaml @@ -48,7 +48,7 @@ spec: env vars below win on name collision: Kubernetes keeps the last entry when a name appears more than once on the container. This prevents an accidental `operator.env` override of reserved names - like THV_DEFAULT_IMAGE_PULL_SECRETS or TOOLHIVE_RUNNER_IMAGE. + like TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS or TOOLHIVE_RUNNER_IMAGE. */}} {{- with .Values.operator.env }} {{- toYaml . | nindent 10 }} @@ -87,8 +87,31 @@ spec: - name: TOOLHIVE_REGISTRY_API_IMAGE value: "{{ .Values.registryAPI.image }}" {{- with .Values.operator.defaultImagePullSecrets }} - - name: THV_DEFAULT_IMAGE_PULL_SECRETS - value: "{{ join "," . }}" + {{- /* + Accept both shapes per values.yaml documentation: + - plain strings: ["regcred", "otherscred"] + - objects with a `name` field: [{name: regcred}, {name: otherscred}] + The object form mirrors `operator.imagePullSecrets` above so users + can copy that pattern without silent breakage. Anything else (numbers, + nested lists, objects without `name`) fails the template render with + a clear message instead of producing an env var like + `TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS=map[name:foo]`. + */}} + {{- $names := list }} + {{- range $i, $entry := . }} + {{- if kindIs "string" $entry }} + {{- $names = append $names $entry }} + {{- else if kindIs "map" $entry }} + {{- if not $entry.name }} + {{- fail (printf "operator.defaultImagePullSecrets[%d]: object entry must have a non-empty `name` field" $i) }} + {{- end }} + {{- $names = append $names $entry.name }} + {{- else }} + {{- fail (printf "operator.defaultImagePullSecrets[%d]: entry must be a string or an object with a `name` field, got %s" $i (kindOf $entry)) }} + {{- end }} + {{- end }} + - name: TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS + value: {{ join "," $names | quote }} {{- end }} livenessProbe: {{- toYaml .Values.operator.livenessProbe | nindent 12 }} diff --git a/deploy/charts/operator/values.yaml b/deploy/charts/operator/values.yaml index 524456416a..4ff7072e6c 100644 --- a/deploy/charts/operator/values.yaml +++ b/deploy/charts/operator/values.yaml @@ -29,10 +29,17 @@ operator: # -- List of image pull secrets that the operator applies as defaults to every # workload it spawns (proxy runners, vMCP servers, registry API, etc.). # Per-CR `imagePullSecrets` take precedence on name collisions; chart-level - # entries are appended additively. Each entry is a string (the Secret name); - # the operator parses these once at startup from the - # THV_DEFAULT_IMAGE_PULL_SECRETS environment variable. The Secrets must exist - # in the namespace where each workload is created. + # entries are appended additively. The operator parses these once at startup + # from the TOOLHIVE_DEFAULT_IMAGE_PULL_SECRETS environment variable. The + # Secrets must exist in the namespace where each workload is created. + # + # Each entry may be either a plain string (the Secret name) or an object + # with a `name` field, e.g.: + # defaultImagePullSecrets: + # - regcred + # - name: otherscred + # The two shapes are equivalent; the object form matches `operator.imagePullSecrets` + # above for convenience. defaultImagePullSecrets: [] # -- Container image for the operator image: ghcr.io/stacklok/toolhive/operator:v0.25.0